[ARM] Add the non-MVE instructions in Arm v8.1-M.

This adds support for the new family of conditional selection / increment / negation instructions; the low-overhead branch instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole list of registers at once; the new VMRS/VMSR and VLDR/VSTR instructions to get data in and out of 8.1-M system registers, particularly including the new VPR register used by MVE vector predication. To support this, we also add a register name 'zr' (used by the CSEL family to force one of the inputs to the constant 0), and operand types for lists of registers that are also allowed to include APSR or VPR (used by CLRM). The VLDR/VSTR instructions also need some new addressing modes. The low-overhead branch instructions exist in their own separate architecture extension, which we treat as enabled by default, but you can say -mattr=-lob or equivalent to turn it off. Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover Reviewed By: samparker Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62667 llvm-svn: 362953
author: Simon Tatham <simon.tatham@arm.com> 2019-06-10 15:36:34 +0000
committer: Simon Tatham <simon.tatham@arm.com> 2019-06-10 15:36:34 +0000
commit: baeea9193370deeefb19ea7602606e262fec9be6 (patch)
tree: 89e90d76800c89b18b403f893b27799bbf1a7e90 /llvm/lib
parent: 05bf5f9328e2bcada093cc36e729621763b68823 (diff)
download: bcm5719-llvm-baeea9193370deeefb19ea7602606e262fec9be6.tar.gz
bcm5719-llvm-baeea9193370deeefb19ea7602606e262fec9be6.zip
20 files changed, 1598 insertions, 90 deletions
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 5671c0bd831..b687db12eaf 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -405,6 +405,12 @@ def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
 def FeatureSB       : SubtargetFeature<"sb", "HasSB", "true",
   "Enable v8.5a Speculation Barrier" >;
 
+// Armv8.1-M extensions
+
+def FeatureLOB            : SubtargetFeature<"lob", "HasLOB", "true",
+                                             "Enable Low Overhead Branch "
+                                             "extensions">;
+
 //===----------------------------------------------------------------------===//
 // ARM architecture class
 //
@@ -805,7 +811,8 @@ def ARMv81mMainline : Architecture<"armv8.1-m.main", "ARMv81mMainline",
                                                        Feature8MSecExt,
                                                        FeatureAcquireRelease,
                                                        FeatureMClass,
-                                                       FeatureRAS]>;
+                                                       FeatureRAS,
+                                                       FeatureLOB]>;
 
 // Aliases
 def IWMMXT   : Architecture<"iwmmxt",      "ARMv5te",  [ARMv5te]>;
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 6bede80adaa..c00ded6e96a 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -762,6 +762,14 @@ void ARMAsmPrinter::emitAttributes() {
 
 //===----------------------------------------------------------------------===//
 
+static MCSymbol *getBFLabel(StringRef Prefix, unsigned FunctionNumber,
+                             unsigned LabelId, MCContext &Ctx) {
+
+  MCSymbol *Label = Ctx.getOrCreateSymbol(Twine(Prefix)
+                       + "BF" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+  return Label;
+}
+
 static MCSymbol *getPICLabel(StringRef Prefix, unsigned FunctionNumber,
                              unsigned LabelId, MCContext &Ctx) {
 
@@ -1436,6 +1444,65 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
+  case ARM::t2BFi:
+  case ARM::t2BFic:
+  case ARM::t2BFLi:
+  case ARM::t2BFr:
+  case ARM::t2BFLr: {
+    // This is a Branch Future instruction.
+
+    const MCExpr *BranchLabel = MCSymbolRefExpr::create(
+        getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+                   MI->getOperand(0).getIndex(), OutContext),
+        OutContext);
+
+    auto MCInst = MCInstBuilder(Opc).addExpr(BranchLabel);
+    if (MI->getOperand(1).isReg()) {
+      // For BFr/BFLr
+      MCInst.addReg(MI->getOperand(1).getReg());
+    } else {
+      // For BFi/BFLi/BFic
+      const MCExpr *BranchTarget;
+      if (MI->getOperand(1).isMBB())
+        BranchTarget = MCSymbolRefExpr::create(
+            MI->getOperand(1).getMBB()->getSymbol(), OutContext);
+      else if (MI->getOperand(1).isGlobal()) {
+        const GlobalValue *GV = MI->getOperand(1).getGlobal();
+        BranchTarget = MCSymbolRefExpr::create(
+            GetARMGVSymbol(GV, MI->getOperand(1).getTargetFlags()), OutContext);
+      } else if (MI->getOperand(1).isSymbol()) {
+        BranchTarget = MCSymbolRefExpr::create(
+            GetExternalSymbolSymbol(MI->getOperand(1).getSymbolName()),
+            OutContext);
+      }
+
+      MCInst.addExpr(BranchTarget);
+    }
+
+      if (Opc == ARM::t2BFic) {
+        const MCExpr *ElseLabel = MCSymbolRefExpr::create(
+            getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+                       MI->getOperand(2).getIndex(), OutContext),
+            OutContext);
+        MCInst.addExpr(ElseLabel);
+        MCInst.addImm(MI->getOperand(3).getImm());
+      } else {
+        MCInst.addImm(MI->getOperand(2).getImm())
+            .addReg(MI->getOperand(3).getReg());
+      }
+
+    EmitToStreamer(*OutStreamer, MCInst);
+    return;
+  }
+  case ARM::t2BF_LabelPseudo: {
+    // This is a pseudo op for a label used by a branch future instruction
+
+    // Emit the label.
+    OutStreamer->EmitLabel(getBFLabel(DL.getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(0).getIndex(), OutContext));
+    return;
+  }
   case ARM::tPICADD: {
     // This is a pseudo op for a label + instruction sequence, which looks like:
     // LPC0:
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 96200a09109..26618b0508d 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -203,6 +203,8 @@ getReservedRegs(const MachineFunction &MF) const {
     for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI)
       if (Reserved.test(*SI))
         markSuperRegs(Reserved, Reg);
+  // For v8.1m architecture
+  markSuperRegs(Reserved, ARM::ZR);
 
   assert(checkAllSuperRegsMarked(Reserved));
   return Reserved;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index fd268609c20..2888c8b0d8f 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -367,6 +367,16 @@ def reglist : Operand<i32> {
   let DecoderMethod = "DecodeRegListOperand";
 }
 
+// A list of general purpose registers and APSR separated by comma.
+// Used by CLRM
+def RegListWithAPSRAsmOperand : AsmOperandClass { let Name = "RegListWithAPSR"; }
+def reglist_with_apsr : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = RegListWithAPSRAsmOperand;
+  let PrintMethod = "printRegisterList";
+  let DecoderMethod = "DecodeRegListOperand";
+}
+
 def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
 
 def DPRRegListAsmOperand : AsmOperandClass {
@@ -391,6 +401,21 @@ def spr_reglist : Operand<i32> {
   let DecoderMethod = "DecodeSPRRegListOperand";
 }
 
+def FPSRegListWithVPRAsmOperand : AsmOperandClass { let Name =
+    "FPSRegListWithVPR"; }
+def fp_sreglist_with_vpr : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = FPSRegListWithVPRAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+def FPDRegListWithVPRAsmOperand : AsmOperandClass { let Name =
+    "FPDRegListWithVPR"; }
+def fp_dreglist_with_vpr : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = FPDRegListWithVPRAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+
 // An operand for the CONSTPOOL_ENTRY pseudo-instruction.
 def cpinst_operand : Operand<i32> {
   let PrintMethod = "printCPInstOperand";
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index e82cbeef43f..d8c6d3e625e 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -150,6 +150,26 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
 
 // Define Thumb2 specific addressing modes.
 
+// t2_addr_offset_none := reg
+def MemNoOffsetT2AsmOperand
+  : AsmOperandClass { let Name = "MemNoOffsetT2"; }
+def t2_addr_offset_none : MemOperand {
+  let PrintMethod = "printAddrMode7Operand";
+  let DecoderMethod = "DecodeGPRnopcRegisterClass";
+  let ParserMatchClass = MemNoOffsetT2AsmOperand;
+  let MIOperandInfo = (ops GPRnopc:$base);
+}
+
+// t2_nosp_addr_offset_none := reg
+def MemNoOffsetT2NoSpAsmOperand
+  : AsmOperandClass { let Name = "MemNoOffsetT2NoSp"; }
+def t2_nosp_addr_offset_none : MemOperand {
+  let PrintMethod = "printAddrMode7Operand";
+  let DecoderMethod = "Decodet2rGPRRegisterClass";
+  let ParserMatchClass = MemNoOffsetT2NoSpAsmOperand;
+  let MIOperandInfo = (ops t2rGPR:$base);
+}
+
 // t2addrmode_imm12  := reg + imm12
 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
 def t2addrmode_imm12 : MemOperand,
@@ -247,10 +267,38 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
 def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
 def t2am_imm8s4_offset : MemOperand {
   let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
-  let EncoderMethod = "getT2Imm8s4OpValue";
+  let EncoderMethod = "getT2ScaledImmOpValue<8,2>";
   let DecoderMethod = "DecodeT2Imm8S4";
 }
 
+// t2addrmode_imm7s4  := reg +/- (imm7 << 2)
+def MemImm7s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm7s4Offset";}
+class T2AddrMode_Imm7s4 : MemOperand {
+  let EncoderMethod = "getT2AddrModeImm7s4OpValue";
+  let DecoderMethod = "DecodeT2AddrModeImm7<2,0>";
+  let ParserMatchClass = MemImm7s4OffsetAsmOperand;
+  let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
+}
+
+def t2addrmode_imm7s4 : T2AddrMode_Imm7s4 {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8s4Operand<false>";
+}
+
+def t2addrmode_imm7s4_pre : T2AddrMode_Imm7s4 {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8s4Operand<true>";
+}
+
+def t2am_imm7s4_offset_asmoperand : AsmOperandClass { let Name = "Imm7s4"; }
+def t2am_imm7s4_offset : MemOperand {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+  let ParserMatchClass = t2am_imm7s4_offset_asmoperand;
+  let EncoderMethod = "getT2ScaledImmOpValue<7,2>";
+  let DecoderMethod = "DecodeT2Imm7S4";
+}
+
 // t2addrmode_imm0_1020s4  := reg + (imm8 << 2)
 def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
   let Name = "MemImm0_1020s4Offset";
@@ -289,6 +337,55 @@ def addrmode_tbh : MemOperand {
   let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
 }
 
+// Define ARMv8.1-M specific addressing modes.
+
+// Label operands for BF/BFL/WLS/DLS/LE
+class BFLabelOp<string signed, string isNeg, string size, string fixup>
+  : Operand<OtherVT> {
+  let EncoderMethod = !strconcat("getBFTargetOpValue<", isNeg, ", ",
+                                 fixup, ">");
+  let OperandType = "OPERAND_PCREL";
+  let DecoderMethod = !strconcat("DecodeBFLabelOperand<", signed, ", ",
+                                 isNeg, ", ", size, ">");
+}
+def bflabel_u4  : BFLabelOp<"false", "false", "4",  "ARM::fixup_bf_branch">;
+def bflabel_s12 : BFLabelOp<"true",  "false", "12", "ARM::fixup_bfc_target">;
+def bflabel_s16 : BFLabelOp<"true",  "false", "16", "ARM::fixup_bf_target">;
+def bflabel_s18 : BFLabelOp<"true",  "false", "18", "ARM::fixup_bfl_target">;
+def wlslabel_u11 : BFLabelOp<"false", "false", "11", "ARM::fixup_wls">;
+def lelabel_u11 : BFLabelOp<"false", "true",  "11", "ARM::fixup_le">;
+
+def bfafter_target : Operand<OtherVT> {
+    let EncoderMethod = "getBFAfterTargetOpValue";
+    let OperandType = "OPERAND_PCREL";
+    let DecoderMethod = "DecodeBFAfterTargetOperand";
+}
+
+// pred operand excluding AL
+def pred_noal_asmoperand : AsmOperandClass {
+  let Name = "CondCodeNoAL";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeNoAL";
+  let ParserMethod = "parseITCondCode";
+}
+def pred_noal : Operand<i32> {
+  let PrintMethod = "printMandatoryPredicateOperand";
+  let ParserMatchClass = pred_noal_asmoperand;
+  let DecoderMethod = "DecodePredNoALOperand";
+}
+
+
+// CSEL aliases inverted predicate
+def pred_noal_inv_asmoperand : AsmOperandClass {
+  let Name = "CondCodeNoALInv";
+  let RenderMethod = "addITCondCodeInvOperands";
+  let PredicateMethod = "isITCondCodeNoAL";
+  let ParserMethod = "parseITCondCode";
+}
+def pred_noal_inv : Operand<i32> {
+  let PrintMethod = "printMandatoryInvertedPredicateOperand";
+  let ParserMatchClass = pred_noal_inv_asmoperand;
+}
 //===----------------------------------------------------------------------===//
 // Multiclass helpers...
 //
@@ -4911,3 +5008,213 @@ def : t2InstAlias<"pld${p} $addr",
 def : InstAlias<"pli${p} $addr",
                  (t2PLIpci  t2ldr_pcrel_imm12:$addr, pred:$p), 0>,
       Requires<[IsThumb2,HasV7]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARMv8.1m instructions
+//
+
+class V8_1MI<dag oops, dag iops, AddrMode am, InstrItinClass itin, string asm,
+             string ops, string cstr, list<dag> pattern>
+  : Thumb2XI<oops, iops, am, 4, itin, !strconcat(asm, "\t", ops), cstr,
+             pattern>,
+    Requires<[HasV8_1MMainline]>;
+
+def t2CLRM : V8_1MI<(outs),
+                    (ins pred:$p, reglist_with_apsr:$regs, variable_ops),
+                    AddrModeNone, NoItinerary, "clrm", "${p}\t$regs", "", []> {
+  bits<16> regs;
+
+  let Inst{31-16} = 0b1110100010011111;
+  let Inst{15-14} = regs{15-14};
+  let Inst{13} = 0b0;
+  let Inst{12-0} = regs{12-0};
+}
+
+class t2BF<dag iops, string asm, string ops>
+  : V8_1MI<(outs ), iops, AddrModeNone, NoItinerary, asm, ops, "", []> {
+
+  let Inst{31-27} = 0b11110;
+  let Inst{15-14} = 0b11;
+  let Inst{12} = 0b0;
+  let Inst{0} = 0b1;
+
+  let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+def t2BF_LabelPseudo
+  : t2PseudoInst<(outs ), (ins pclabel:$cp), 0, NoItinerary, []> {
+  let isTerminator = 1;
+  let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p),
+                 !strconcat("bf", "${p}"), "$b_label, $label"> {
+  bits<4> b_label;
+  bits<16> label;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22-21} = 0b10;
+  let Inst{20-16} = label{15-11};
+  let Inst{13} = 0b1;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def t2BFic : t2BF<(ins bflabel_u4:$b_label, bflabel_s12:$label,
+                   bfafter_target:$ba_label, pred_noal:$bcond), "bfcsel",
+                  "$b_label, $label, $ba_label, $bcond"> {
+  bits<4> bcond;
+  bits<12> label;
+  bits<1> ba_label;
+  bits<4> b_label;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22} = 0b0;
+  let Inst{21-18} = bcond{3-0};
+  let Inst{17} = ba_label{0};
+  let Inst{16} = label{11};
+  let Inst{13} = 0b1;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def t2BFr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
+                 !strconcat("bfx", "${p}"), "$b_label, $Rn"> {
+  bits<4> b_label;
+  bits<4> Rn;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22-20} = 0b110;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{13-1} = 0b1000000000000;
+}
+
+def t2BFLi : t2BF<(ins bflabel_u4:$b_label, bflabel_s18:$label, pred:$p),
+                  !strconcat("bfl", "${p}"), "$b_label, $label"> {
+  bits<4> b_label;
+  bits<18> label;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22-16} = label{17-11};
+  let Inst{13} = 0b0;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def t2BFLr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
+                  !strconcat("bflx", "${p}"), "$b_label, $Rn"> {
+  bits<4> b_label;
+  bits<4> Rn;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22-20} = 0b111;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{13-1} = 0b1000000000000;
+}
+
+class t2LOL<dag oops, dag iops, string asm, string ops>
+  : V8_1MI<oops, iops, AddrModeNone, NoItinerary, asm, ops, "", [] > {
+  let Inst{31-23} = 0b111100000;
+  let Inst{15-14} = 0b11;
+  let Inst{0} = 0b1;
+  let isBranch = 1;
+  let isTerminator = 1;
+  let DecoderMethod = "DecodeLOLoop";
+  let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+def t2WLS : t2LOL<(outs GPRlr:$LR),
+                  (ins rGPR:$Rn, wlslabel_u11:$label),
+                  "wls", "$LR, $Rn, $label"> {
+  bits<4> Rn;
+  bits<11> label;
+  let Inst{22-20} = 0b100;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{13-12} = 0b00;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+  let usesCustomInserter = 1;
+}
+
+def t2DLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn),
+                  "dls", "$LR, $Rn"> {
+  bits<4> Rn;
+  let isBranch = 0;
+  let isTerminator = 0;
+  let Inst{22-20} = 0b100;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{13-1} = 0b1000000000000;
+  let usesCustomInserter = 1;
+}
+
+def t2LEUpdate : t2LOL<(outs GPRlr:$LRout),
+                       (ins GPRlr:$LRin, lelabel_u11:$label),
+                       "le", "$LRin, $label"> {
+  bits<11> label;
+  let Inst{22-16} = 0b0001111;
+  let Inst{13-12} = 0b00;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+  let usesCustomInserter = 1;
+}
+
+def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
+  bits<11> label;
+  let Inst{22-16} = 0b0101111;
+  let Inst{13-12} = 0b00;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+let Uses = [CPSR] in {
+class CS<string iname, list<dag> pattern=[]>
+  : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZR:$Rn, GPRwithZR:$Rm, pred_noal:$fcond),
+           AddrModeNone, NoItinerary, iname, "$Rd, $Rn, $Rm, $fcond", "", pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<4> Rn;
+  bits<4> fcond;
+
+  let Inst{31-20} = 0b111010100101;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{11-8} = Rd{3-0};
+  let Inst{7-4} = fcond{3-0};
+  let Inst{3-0} = Rm{3-0};
+}
+}
+
+def t2CSEL : CS<"csel"> {
+  let Inst{15-12} = 0b1000;
+}
+
+def t2CSINC : CS<"csinc"> {
+  let Inst{15-12} = 0b1001;
+}
+
+def t2CSINV : CS<"csinv"> {
+  let Inst{15-12} = 0b1010;
+}
+
+def t2CSNEG : CS<"csneg"> {
+  let Inst{15-12} = 0b1011;
+}
+
+
+// CS aliases.
+let Predicates = [HasV8_1MMainline] in {
+  def : InstAlias<"csetm\t$Rd, $fcond",
+                 (t2CSINV rGPR:$Rd, ZR, ZR, pred_noal_inv:$fcond)>;
+
+  def : InstAlias<"cset\t$Rd, $fcond",
+                 (t2CSINC rGPR:$Rd, ZR, ZR, pred_noal_inv:$fcond)>;
+
+  def : InstAlias<"cinc\t$Rd, $Rn, $fcond",
+                 (t2CSINC rGPR:$Rd, GPRwithZR:$Rn, GPRwithZR:$Rn, pred_noal_inv:$fcond)>;
+
+  def : InstAlias<"cinv\t$Rd, $Rn, $fcond",
+                 (t2CSINV rGPR:$Rd, GPRwithZR:$Rn, GPRwithZR:$Rn, pred_noal_inv:$fcond)>;
+
+  def : InstAlias<"cneg\t$Rd, $Rn, $fcond",
+                 (t2CSNEG rGPR:$Rd, GPRwithZR:$Rn, GPRwithZR:$Rn, pred_noal_inv:$fcond)>;
+}
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 6498024869a..af84157cda5 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2299,14 +2299,14 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
   let Inst{3-0}   = 0b0000;
 }
 
-// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
-// to APSR.
-let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
-    Rt = 0b1111 /* apsr_nzcv */ in
-def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
-                        "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
-
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
+ // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+ // to APSR.
+ let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
+     Rt = 0b1111 /* apsr_nzcv */ in
+ def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+                         "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
+
  // Application level FPSCR -> GPR
  let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
  def VMRS :  MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
@@ -2331,6 +2331,33 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
                                 "vmrs", "\t$Rt, fpinst", []>;
    def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPRnopc:$Rt),
                                  (ins), "vmrs", "\t$Rt, fpinst2", []>;
+   let Predicates = [HasV8_1MMainline, HasFPRegs] in {
+     // System level FPSCR_NZCVQC -> GPR
+     def VMRS_FPSCR_NZCVQC
+       : MovFromVFP<0b0010 /* fpscr_nzcvqc */,
+                    (outs GPR:$Rt), (ins cl_FPSCR_NZCV:$fpscr_in),
+                    "vmrs", "\t$Rt, fpscr_nzcvqc", []>;
+   }
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+   // System level FPSCR -> GPR, with context saving for security extensions
+   def VMRS_FPCXTNS : MovFromVFP<0b1110 /* fpcxtns */, (outs GPR:$Rt), (ins),
+                                 "vmrs", "\t$Rt, fpcxtns", []>;
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+   // System level FPSCR -> GPR, with context saving for security extensions
+   def VMRS_FPCXTS : MovFromVFP<0b1111 /* fpcxts */, (outs GPR:$Rt), (ins),
+                                "vmrs", "\t$Rt, fpcxts", []>;
+ }
+
+ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+   // System level VPR/P0 -> GPR
+   let Uses = [VPR] in
+   def VMRS_VPR : MovFromVFP<0b1100 /* vpr */, (outs GPR:$Rt), (ins),
+                             "vmrs", "\t$Rt, vpr", []>;
+
+   def VMRS_P0  : MovFromVFP<0b1101 /* p0 */, (outs GPR:$Rt), (ins VCCR:$cond),
+                             "vmrs", "\t$Rt, p0", []>;
  }
 }
 
@@ -2353,6 +2380,7 @@ class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
   let Inst{11-8}  = 0b1010;
   let Inst{7}     = 0;
   let Inst{4}     = 1;
+  let Predicates = [HasVFP2];
 }
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
@@ -2373,6 +2401,33 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
    def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$src),
                                "vmsr", "\tfpinst2, $src", []>;
  }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+   // System level GPR -> FPSCR with context saving for security extensions
+   def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$src),
+                               "vmsr", "\tfpcxtns, $src", []>;
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+   // System level GPR -> FPSCR with context saving for security extensions
+   def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$src),
+                              "vmsr", "\tfpcxts, $src", []>;
+ }
+ let Predicates = [HasV8_1MMainline, HasFPRegs] in {
+   // System level GPR -> FPSCR_NZCVQC
+   def VMSR_FPSCR_NZCVQC
+     : MovToVFP<0b0010 /* fpscr_nzcvqc */,
+                (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$src),
+                "vmsr", "\tfpscr_nzcvqc, $src", []>;
+ }
+
+ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+   // System level GPR -> VPR/P0
+   let Defs = [VPR] in
+   def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$src),
+                           "vmsr", "\tvpr, $src", []>;
+
+   def VMSR_P0  : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$src),
+                           "vmsr", "\tp0, $src", []>;
+ }
 }
 
 //===----------------------------------------------------------------------===//
@@ -2549,3 +2604,126 @@ def : VFP3InstAlias<"fconstd${p} $Dd, $val",
                     (FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>;
 def : VFP3InstAlias<"fconsts${p} $Sd, $val",
                     (FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>;
+
+def VSCCLRMD : VFPXI<(outs), (ins pred:$p, fp_dreglist_with_vpr:$regs, variable_ops),
+                      AddrModeNone, 4, IndexModeNone, VFPMiscFrm, NoItinerary,
+                      "vscclrm{$p}\t$regs", "", []>, Sched<[]> {
+  bits<13> regs;
+  let Inst{31-23} = 0b111011001;
+  let Inst{22} = regs{12};
+  let Inst{21-16} = 0b011111;
+  let Inst{15-12} = regs{11-8};
+  let Inst{11-8} = 0b1011;
+  let Inst{7-0} = regs{7-0};
+
+  let DecoderMethod = "DecodeVSCCLRM";
+
+  list<Predicate> Predicates = [HasV8_1MMainline, Has8MSecExt];
+}
+
+def VSCCLRMS : VFPXI<(outs), (ins pred:$p, fp_sreglist_with_vpr:$regs, variable_ops),
+                      AddrModeNone, 4, IndexModeNone, VFPMiscFrm, NoItinerary,
+                      "vscclrm{$p}\t$regs", "", []>, Sched<[]> {
+  bits<13> regs;
+  let Inst{31-23} = 0b111011001;
+  let Inst{22} = regs{8};
+  let Inst{21-16} = 0b011111;
+  let Inst{15-12} = regs{12-9};
+  let Inst{11-8} = 0b1010;
+  let Inst{7-0} = regs{7-0};
+
+  let DecoderMethod = "DecodeVSCCLRM";
+
+  list<Predicate> Predicates = [HasV8_1MMainline, Has8MSecExt];
+}
+
+//===----------------------------------------------------------------------===//
+// Store VFP System Register to memory.
+//
+
+class vfp_vstrldr<bit opc, bit P, bit W, bits<4> SysReg, string sysreg,
+                  dag oops, dag iops, IndexMode im, string Dest, string cstr>
+    : VFPI<oops, iops, AddrModeT2_i7s4, 4, im, VFPLdStFrm, IIC_fpSTAT,
+           !if(opc,"vldr","vstr"), !strconcat("\t", sysreg, ", ", Dest), cstr, []>,
+      Sched<[]> {
+  bits<12> addr;
+  let Inst{27-25} = 0b110;
+  let Inst{24} = P;
+  let Inst{23} = addr{7};
+  let Inst{22} = SysReg{3};
+  let Inst{21} = W;
+  let Inst{20} = opc;
+  let Inst{19-16} = addr{11-8};
+  let Inst{15-13} = SysReg{2-0};
+  let Inst{12-7} = 0b011111;
+  let Inst{6-0} = addr{6-0};
+  list<Predicate> Predicates = [HasFPRegs, HasV8_1MMainline];
+  let mayLoad = opc;
+  let mayStore = !if(opc, 0b0, 0b1);
+  let hasSideEffects = 1;
+}
+
+multiclass vfp_vstrldr_sysreg<bit opc, bits<4> SysReg, string sysreg,
+                              dag oops=(outs), dag iops=(ins)> {
+  def _off :
+    vfp_vstrldr<opc, 1, 0, SysReg, sysreg,
+                oops, !con(iops, (ins t2addrmode_imm7s4:$addr)),
+                IndexModePost, "$addr", "" > {
+    let DecoderMethod = "DecodeVSTRVLDR_SYSREG<false>";
+  }
+
+  def _pre :
+    vfp_vstrldr<opc, 1, 1, SysReg, sysreg,
+                !con(oops, (outs GPRnopc:$wb)),
+                !con(iops, (ins t2addrmode_imm7s4_pre:$addr)),
+                IndexModePre, "$addr!", "$addr.base = $wb"> {
+    let DecoderMethod = "DecodeVSTRVLDR_SYSREG<true>";
+  }
+
+  def _post :
+    vfp_vstrldr<opc, 0, 1, SysReg, sysreg,
+                !con(oops, (outs GPRnopc:$wb)),
+                !con(iops, (ins t2_addr_offset_none:$Rn,
+                                t2am_imm7s4_offset:$addr)),
+                IndexModePost, "$Rn$addr", "$Rn.base = $wb"> {
+   bits<4> Rn;
+   let Inst{19-16} = Rn{3-0};
+   let DecoderMethod = "DecodeVSTRVLDR_SYSREG<true>";
+ }
+}
+
+let Defs = [FPSCR] in {
+  defm VSTR_FPSCR          : vfp_vstrldr_sysreg<0b0,0b0001, "fpscr">;
+  defm VSTR_FPSCR_NZCVQC   : vfp_vstrldr_sysreg<0b0,0b0010, "fpscr_nzcvqc">;
+
+  let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+    defm VSTR_FPCXTNS      : vfp_vstrldr_sysreg<0b0,0b1110, "fpcxtns">;
+    defm VSTR_FPCXTS       : vfp_vstrldr_sysreg<0b0,0b1111, "fpcxts">;
+  }
+}
+
+let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+  let Uses = [VPR] in {
+    defm VSTR_VPR          : vfp_vstrldr_sysreg<0b0,0b1100, "vpr">;
+  }
+  defm VSTR_P0             : vfp_vstrldr_sysreg<0b0,0b1101, "p0",
+                                                (outs), (ins VCCR:$P0)>;
+}
+
+let Uses = [FPSCR] in {
+  defm VLDR_FPSCR          : vfp_vstrldr_sysreg<0b1,0b0001, "fpscr">;
+  defm VLDR_FPSCR_NZCVQC   : vfp_vstrldr_sysreg<0b1,0b0010, "fpscr_nzcvqc">;
+
+  let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+    defm VLDR_FPCXTNS      : vfp_vstrldr_sysreg<0b1,0b1110, "fpcxtns">;
+    defm VLDR_FPCXTS       : vfp_vstrldr_sysreg<0b1,0b1111, "fpcxts">;
+  }
+}
+
+let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+  let Defs = [VPR] in {
+    defm VLDR_VPR          : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
+  }
+  defm VLDR_P0             : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
+                                                (outs VCCR:$P0), (ins)>;
+}
diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td
index 1df22e60be5..0b6b40de80d 100644
--- a/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/llvm/lib/Target/ARM/ARMPredicates.td
@@ -95,6 +95,8 @@ def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
 def HasRAS           : Predicate<"Subtarget->hasRAS()">,
                                  AssemblerPredicate<"FeatureRAS", "ras">;
+def HasLOB           : Predicate<"Subtarget->hasLOB()">,
+                                 AssemblerPredicate<"FeatureLOB", "lob">;
 def HasFP16          : Predicate<"Subtarget->hasFP16()">,
                                  AssemblerPredicate<"FeatureFP16","half-float conversions">;
 def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index d6ac723d1cf..43e6b2aa1b9 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -197,6 +197,17 @@ def MVFR0   : ARMReg<7,  "mvfr0">;
 def FPEXC   : ARMReg<8,  "fpexc">;
 def FPINST  : ARMReg<9,  "fpinst">;
 def FPINST2 : ARMReg<10, "fpinst2">;
+// These encodings aren't actual instruction encodings, their encoding depends
+// on the instruction they are used in and for VPR 32 was chosen such that it
+// always comes last in spr_reglist_with_vpr.
+def VPR     : ARMReg<32, "vpr">;
+def FPSCR_NZCVQC
+            : ARMReg<2, "fpscr_nzcvqc">;
+def P0      : ARMReg<13, "p0">;
+def FPCXTNS : ARMReg<14, "fpcxtns">;
+def FPCXTS  : ARMReg<15, "fpcxts">;
+
+def ZR  : ARMReg<15, "zr">,  DwarfRegNum<[15]>;
 
 // Register classes.
 //
@@ -245,6 +256,21 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)
   let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv";
 }
 
+// GPRs without the PC and SP registers but with APSR. Used by CLRM instruction.
+def GPRwithAPSRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, APSR)> {
+  let isAllocatable = 0;
+}
+
+def GPRwithZR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
+                                              LR, ZR)> {
+
+  let AltOrders = [(add LR, GPRwithZR), (trunc GPRwithZR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+  let DiagnosticString = "operand must be a register in range [r0, r12] or r14 or zr";
+}
+
 // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
 // implied SP argument list.
 // FIXME: It would be better to not use this at all and refactor the
@@ -254,6 +280,10 @@ def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)> {
   let DiagnosticString = "operand must be a register sp";
 }
 
+// GPRlr - Only LR is legal. Used by ARMv8.1-M Low Overhead Loop instructions
+// where LR is the only legal loop counter register.
+def GPRlr : RegisterClass<"ARM", [i32], 32, (add LR)>;
+
 // restricted GPR register class. Many Thumb2 instructions allow the full
 // register range for operands, but have undefined behaviours when PC
 // or SP (R13 or R15) are used. The ARM ISA refers to these operands
@@ -266,6 +296,15 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
   let DiagnosticType = "rGPR";
 }
 
+// t2rGPR : All Thumb 2 registers with the exception of SP and PC.
+def t2rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
+  let AltOrders = [(add LR, t2rGPR), (trunc t2rGPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+  let DiagnosticType = "rGPR";
+}
+
 // Thumb registers are R0-R7 normally. Some instructions can still use
 // the general GPR register class above (MOV, e.g.)
 def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)> {
@@ -298,6 +337,15 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
   let isAllocatable = 0;
 }
 
+// MVE Condition code register.
+def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1], 32, (add VPR)> {
+//  let CopyCost = -1;  // Don't allow copying of status registers.
+}
+
+// FPSCR, when the flags at the top of it are used as the input or
+// output to an instruction such as MVE VADC.
+def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)>;
+
 // Scalar single precision floating point register class..
 // FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
 // to avoid partial-write dependencies on D or Q (depending on platform)
@@ -348,6 +396,13 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 6
   let DiagnosticType = "DPR";
 }
 
+// Scalar single and double precision floating point and VPR register class,
+// this is only used for parsing, don't use it anywhere else as the size and
+// types don't match!
+def FPWithVPR : RegisterClass<"ARM", [f32], 32, (add SPR, DPR, VPR)> {
+    let isAllocatable = 0;
+}
+
 // Subset of DPR that are accessible with VFP2 (and so that also have
 // 32-bit SPR subregs).
 def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index de970ae2e17..a79f3348f33 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -94,6 +94,9 @@ def CortexA57Model : SchedMachineModel {
 
   // FIXME: Remove when all errors have been fixed.
   let FullInstRWOverlapCheck = 0;
+
+  let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat,
+                             HasFPRegsV8_1M];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 8123ef6224b..83113efb8db 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -343,6 +343,9 @@ protected:
   /// HasRAS - if true, the processor supports RAS extensions
   bool HasRAS = false;
 
+  /// HasLOB - if true, the processor supports the Low Overhead Branch extension
+  bool HasLOB = false;
+
   /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
   /// particularly effective at zeroing a VFP register.
   bool HasZeroCycleZeroing = false;
@@ -608,6 +611,7 @@ public:
   bool hasDotProd() const { return HasDotProd; }
   bool hasCRC() const { return HasCRC; }
   bool hasRAS() const { return HasRAS; }
+  bool hasLOB() const { return HasLOB; }
   bool hasVirtualization() const { return HasVirtualization; }
 
   bool useNEONForSinglePrecisionFP() const {
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f8a00f713e4..7451b931c84 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -384,7 +384,7 @@ class ARMAsmParser : public MCTargetAsmParser {
   int tryParseRegister();
   bool tryParseRegisterWithWriteBack(OperandVector &);
   int tryParseShiftRegister(OperandVector &);
-  bool parseRegisterList(OperandVector &);
+  bool parseRegisterList(OperandVector &, bool EnforceOrder = true);
   bool parseMemory(OperandVector &);
   bool parseOperand(OperandVector &, StringRef Mnemonic);
   bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
@@ -479,7 +479,9 @@ class ARMAsmParser : public MCTargetAsmParser {
   bool hasV8MMainline() const {
     return getSTI().getFeatureBits()[ARM::HasV8MMainlineOps];
   }
-
+  bool hasV8_1MMainline() const {
+    return getSTI().getFeatureBits()[ARM::HasV8_1MMainlineOps];
+  }
   bool has8MSecExt() const {
     return getSTI().getFeatureBits()[ARM::Feature8MSecExt];
   }
@@ -660,8 +662,11 @@ class ARMOperand : public MCParsedAsmOperand {
     k_VectorIndex,
     k_Register,
     k_RegisterList,
+    k_RegisterListWithAPSR,
     k_DPRRegisterList,
     k_SPRRegisterList,
+    k_FPSRegisterListWithVPR,
+    k_FPDRegisterListWithVPR,
     k_VectorList,
     k_VectorListAllLanes,
     k_VectorListIndexed,
@@ -862,8 +867,11 @@ public:
   }
 
   const SmallVectorImpl<unsigned> &getRegList() const {
-    assert((Kind == k_RegisterList || Kind == k_DPRRegisterList ||
-            Kind == k_SPRRegisterList) && "Invalid access!");
+    assert((Kind == k_RegisterList || Kind == k_RegisterListWithAPSR ||
+            Kind == k_DPRRegisterList || Kind == k_SPRRegisterList ||
+            Kind == k_FPSRegisterListWithVPR ||
+            Kind == k_FPDRegisterListWithVPR) &&
+           "Invalid access!");
     return Registers;
   }
 
@@ -1027,6 +1035,9 @@ public:
   bool isImm8s4() const {
     return isImmediateS4<-1020, 1020>();
   }
+  bool isImm7s4() const {
+    return isImmediateS4<-508, 508>();
+  }
   bool isImm0_1020s4() const {
     return isImmediateS4<0, 1020>();
   }
@@ -1168,8 +1179,13 @@ public:
 
   bool isReg() const override { return Kind == k_Register; }
   bool isRegList() const { return Kind == k_RegisterList; }
+  bool isRegListWithAPSR() const {
+    return Kind == k_RegisterListWithAPSR || Kind == k_RegisterList;
+  }
   bool isDPRRegList() const { return Kind == k_DPRRegisterList; }
   bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
+  bool isFPSRegListWithVPR() const { return Kind == k_FPSRegisterListWithVPR; }
+  bool isFPDRegListWithVPR() const { return Kind == k_FPDRegisterListWithVPR; }
   bool isToken() const override { return Kind == k_Token; }
   bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
   bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; }
@@ -1250,6 +1266,30 @@ public:
     return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
      (alignOK || Memory.Alignment == Alignment);
   }
+  bool isMemNoOffsetT2(bool alignOK = false, unsigned Alignment = 0) const {
+    if (!isMem())
+      return false;
+
+    if (!ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
+            Memory.BaseRegNum))
+      return false;
+
+    // No offset of any kind.
+    return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+     (alignOK || Memory.Alignment == Alignment);
+  }
+  bool isMemNoOffsetT2NoSp(bool alignOK = false, unsigned Alignment = 0) const {
+    if (!isMem())
+      return false;
+
+    if (!ARMMCRegisterClasses[ARM::rGPRRegClassID].contains(
+            Memory.BaseRegNum))
+      return false;
+
+    // No offset of any kind.
+    return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+     (alignOK || Memory.Alignment == Alignment);
+  }
   bool isMemPCRelImm12() const {
     if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
@@ -1521,7 +1561,22 @@ public:
     return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) ||
            Val == std::numeric_limits<int32_t>::min();
   }
-
+  bool isMemImm7s4Offset() const {
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm() && !isa<MCConstantExpr>(getImm()))
+      return true;
+    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0 ||
+        !ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
+            Memory.BaseRegNum))
+      return false;
+    // Immediate offset a multiple of 4 in range [-508, 508].
+    if (!Memory.OffsetImm) return true;
+    int64_t Val = Memory.OffsetImm->getValue();
+    // Special case, #-0 is INT32_MIN.
+    return (Val >= -508 && Val <= 508 && (Val & 3) == 0) || Val == INT32_MIN;
+  }
   bool isMemImm0_1020s4Offset() const {
     if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
@@ -1993,6 +2048,12 @@ public:
     return (Value % Angle == Remainder && Value <= 270);
   }
 
+  bool isITCondCodeNoAL() const {
+    if (!isITCondCode()) return false;
+    auto CC = (ARMCC::CondCodes) getCondCode();
+    return CC != ARMCC::AL;
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
@@ -2045,6 +2106,11 @@ public:
     Inst.addOperand(MCOperand::createImm(unsigned(getCondCode())));
   }
 
+  void addITCondCodeInvOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(unsigned(ARMCC::getOppositeCondition(getCondCode()))));
+  }
+
   void addCCOutOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getReg()));
@@ -2090,6 +2156,14 @@ public:
       Inst.addOperand(MCOperand::createReg(*I));
   }
 
+  void addRegListWithAPSROperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const SmallVectorImpl<unsigned> &RegList = getRegList();
+    for (SmallVectorImpl<unsigned>::const_iterator
+           I = RegList.begin(), E = RegList.end(); I != E; ++I)
+      Inst.addOperand(MCOperand::createReg(*I));
+  }
+
   void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
     addRegListOperands(Inst, N);
   }
@@ -2098,6 +2172,14 @@ public:
     addRegListOperands(Inst, N);
   }
 
+  void addFPSRegListWithVPROperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
+  void addFPDRegListWithVPROperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
   void addRotImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // Encoded as val>>3. The printer handles display as 8, 16, 24.
@@ -2185,6 +2267,14 @@ public:
     Inst.addOperand(MCOperand::createImm(CE->getValue()));
   }
 
+  void addImm7s4Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // FIXME: We really want to scale the value here, but the VSTR/VLDR_VSYSR
+    // instruction don't encode operands that way yet.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
+  }
+
   void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // The immediate is scaled by four in the encoding and is stored
@@ -2319,6 +2409,11 @@ public:
     Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
   }
 
+  void addMemNoOffsetT2Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+  }
+
   void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     int32_t Imm = Memory.OffsetImm->getValue();
@@ -2536,6 +2631,22 @@ public:
     Inst.addOperand(MCOperand::createImm(Val));
   }
 
+  void addMemImm7s4OffsetOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm()) {
+      Inst.addOperand(MCOperand::createExpr(getImm()));
+      Inst.addOperand(MCOperand::createImm(0));
+      return;
+    }
+
+    int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
+  }
+
   void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     // The lower two bits are always zero and as such are not encoded.
@@ -3045,19 +3156,31 @@ public:
     assert(Regs.size() > 0 && "RegList contains no registers?");
     KindTy Kind = k_RegisterList;
 
-    if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second))
-      Kind = k_DPRRegisterList;
-    else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
-             contains(Regs.front().second))
-      Kind = k_SPRRegisterList;
+    if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+            Regs.front().second)) {
+      if (Regs.back().second == ARM::VPR)
+        Kind = k_FPDRegisterListWithVPR;
+      else
+        Kind = k_DPRRegisterList;
+    } else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(
+                   Regs.front().second)) {
+      if (Regs.back().second == ARM::VPR)
+        Kind = k_FPSRegisterListWithVPR;
+      else
+        Kind = k_SPRRegisterList;
+    }
 
     // Sort based on the register encoding values.
     array_pod_sort(Regs.begin(), Regs.end());
 
+    if (Kind == k_RegisterList && Regs.back().second == ARM::APSR)
+      Kind = k_RegisterListWithAPSR;
+
     auto Op = make_unique<ARMOperand>(Kind);
     for (SmallVectorImpl<std::pair<unsigned, unsigned>>::const_iterator
            I = Regs.begin(), E = Regs.end(); I != E; ++I)
       Op->Registers.push_back(I->second);
+
     Op->StartLoc = StartLoc;
     Op->EndLoc = EndLoc;
     return Op;
@@ -3325,8 +3448,11 @@ void ARMOperand::print(raw_ostream &OS) const {
        << ", width: " << Bitfield.Width << ">";
     break;
   case k_RegisterList:
+  case k_RegisterListWithAPSR:
   case k_DPRRegisterList:
-  case k_SPRRegisterList: {
+  case k_SPRRegisterList:
+  case k_FPSRegisterListWithVPR:
+  case k_FPDRegisterListWithVPR: {
     OS << "<register_list ";
 
     const SmallVectorImpl<unsigned> &RegList = getRegList();
@@ -3753,7 +3879,8 @@ static unsigned getNextRegister(unsigned Reg) {
 }
 
 /// Parse a register list.
-bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
+bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
+                                     bool EnforceOrder) {
   MCAsmParser &Parser = getParser();
   if (Parser.getTok().isNot(AsmToken::LCurly))
     return TokError("Token is not a Left Curly Brace");
@@ -3786,6 +3913,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
     RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
   else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
     RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
+  else if (ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
+    RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID];
   else
     return Error(RegLoc, "invalid register in register list");
 
@@ -3839,14 +3968,32 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
       Reg = getDRegFromQReg(Reg);
       isQReg = true;
     }
+    if (!RC->contains(Reg) &&
+        RC->getID() == ARMMCRegisterClasses[ARM::GPRRegClassID].getID() &&
+        ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg)) {
+      // switch the register classes, as GPRwithAPSRnospRegClassID is a partial
+      // subset of GPRRegClassId except it contains APSR as well.
+      RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID];
+    }
+    if (Reg == ARM::VPR && (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] ||
+                            RC == &ARMMCRegisterClasses[ARM::DPRRegClassID])) {
+      RC = &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID];
+      EReg = MRI->getEncodingValue(Reg);
+      Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+      continue;
+    }
     // The register must be in the same register class as the first.
     if (!RC->contains(Reg))
       return Error(RegLoc, "invalid register in register list");
-    // List must be monotonically increasing.
-    if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
+    // In most cases, the list must be monotonically increasing. An
+    // exception is CLRM, which is order-independent anyway, so
+    // there's no potential for confusion if you write clrm {r2,r1}
+    // instead of clrm {r1,r2}.
+    if (EnforceOrder &&
+        MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
       if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
         Warning(RegLoc, "register list not in ascending order");
-      else
+      else if (!ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
         return Error(RegLoc, "register list not in ascending order");
     }
     if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
@@ -3856,6 +4003,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
     }
     // VFP register lists must also be contiguous.
     if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
+        RC != &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID] &&
         Reg != OldReg + 1)
       return Error(RegLoc, "non-contiguous register range");
     EReg = MRI->getEncodingValue(Reg);
@@ -5464,7 +5612,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
   case AsmToken::LBrac:
     return parseMemory(Operands);
   case AsmToken::LCurly:
-    return parseRegisterList(Operands);
+    return parseRegisterList(Operands, !Mnemonic.startswith("clr"));
   case AsmToken::Dollar:
   case AsmToken::Hash:
     // #42 -> immediate.
@@ -5653,7 +5801,12 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic == "bxns"  || Mnemonic == "blxns" ||
       Mnemonic == "vudot" || Mnemonic == "vsdot" ||
       Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
-      Mnemonic == "vfmal" || Mnemonic == "vfmsl")
+      Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
+      Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" ||
+      Mnemonic == "csel" || Mnemonic == "csinc" ||
+      Mnemonic == "csinv" || Mnemonic == "csneg" || Mnemonic == "cinc" ||
+      Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" ||
+      Mnemonic == "csetm")
     return Mnemonic;
 
   // First, split out any predication code. Ignore mnemonics we know aren't
@@ -5746,7 +5899,12 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
       Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
       Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
       Mnemonic == "sb"    || Mnemonic == "ssbb"  ||
-      Mnemonic == "pssbb") {
+      Mnemonic == "pssbb" ||
+      Mnemonic == "bfcsel" || Mnemonic == "wls" ||
+      Mnemonic == "dls" || Mnemonic == "le" || Mnemonic == "csel" ||
+      Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
+      Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
+      Mnemonic == "cset" || Mnemonic == "csetm") {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
@@ -6478,7 +6636,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
   } else if (isThumbTwo() && MCID.isPredicable() &&
              Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
              ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
-             Inst.getOpcode() != ARM::t2Bcc) {
+             Inst.getOpcode() != ARM::t2Bcc &&
+             Inst.getOpcode() != ARM::t2BFic) {
     return Error(Loc, "predicated instructions must be in IT block");
   } else if (!isThumb() && !useImplicitITARM() && MCID.isPredicable() &&
              Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
@@ -6876,6 +7035,77 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                                                "code specified");
     break;
   }
+  case ARM::t2WLS: {
+    int idx = Opcode == ARM::t2WLS ? 3 : 4;
+    if (!static_cast<ARMOperand &>(*Operands[idx]).isUnsignedOffset<11, 1>())
+      return Error(Operands[idx]->getStartLoc(),
+                   "loop end is out of range or not a positive multiple of 2");
+    break;
+  }
+  case ARM::t2LEUpdate: {
+    if (Inst.getOperand(2).isImm() &&
+        !(Inst.getOperand(2).getImm() < 0 &&
+          Inst.getOperand(2).getImm() >= -4094 &&
+          (Inst.getOperand(2).getImm() & 1) == 0))
+      return Error(Operands[2]->getStartLoc(),
+                   "loop start is out of range or not a negative multiple of 2");
+    break;
+  }
+  case ARM::t2BFi:
+  case ARM::t2BFr:
+  case ARM::t2BFLi:
+  case ARM::t2BFLr: {
+    if (!static_cast<ARMOperand &>(*Operands[2]).isUnsignedOffset<4, 1>() ||
+        (Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == 0))
+      return Error(Operands[2]->getStartLoc(),
+                   "branch location out of range or not a multiple of 2");
+
+    if (Opcode == ARM::t2BFi) {
+      if (!static_cast<ARMOperand &>(*Operands[3]).isSignedOffset<16, 1>())
+        return Error(Operands[3]->getStartLoc(),
+                     "branch target out of range or not a multiple of 2");
+    } else if (Opcode == ARM::t2BFLi) {
+      if (!static_cast<ARMOperand &>(*Operands[3]).isSignedOffset<18, 1>())
+        return Error(Operands[3]->getStartLoc(),
+                     "branch target out of range or not a multiple of 2");
+    }
+    break;
+  }
+  case ARM::t2BFic: {
+    if (!static_cast<ARMOperand &>(*Operands[1]).isUnsignedOffset<4, 1>() ||
+        (Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == 0))
+      return Error(Operands[1]->getStartLoc(),
+                   "branch location out of range or not a multiple of 2");
+
+    if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<16, 1>())
+      return Error(Operands[2]->getStartLoc(),
+                   "branch target out of range or not a multiple of 2");
+
+    assert(Inst.getOperand(0).isImm() == Inst.getOperand(2).isImm() &&
+           "branch location and else branch target should either both be "
+           "immediates or both labels");
+
+    if (Inst.getOperand(0).isImm() && Inst.getOperand(2).isImm()) {
+      int Diff = Inst.getOperand(2).getImm() - Inst.getOperand(0).getImm();
+      if (Diff != 4 && Diff != 2)
+        return Error(
+            Operands[3]->getStartLoc(),
+            "else branch target must be 2 or 4 greater than the branch location");
+    }
+    break;
+  }
+  case ARM::t2CLRM: {
+    for (unsigned i = 2; i < Inst.getNumOperands(); i++) {
+      if (Inst.getOperand(i).isReg() &&
+          !ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(
+              Inst.getOperand(i).getReg())) {
+        return Error(Operands[2]->getStartLoc(),
+                     "invalid register in register list. Valid registers are "
+                     "r0-r12, lr/r14 and APSR.");
+      }
+    }
+    break;
+  }
   case ARM::DSB:
   case ARM::t2DSB: {
 
@@ -9169,11 +9399,29 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
       return Match_RequiresV8;
   }
 
-  // Use of SP for VMRS/VMSR is only allowed in ARM mode with the exception of
-  // ARMv8-A.
-  if ((Inst.getOpcode() == ARM::VMRS || Inst.getOpcode() == ARM::VMSR) &&
-      Inst.getOperand(0).getReg() == ARM::SP && (isThumb() && !hasV8Ops()))
-    return Match_InvalidOperand;
+  switch (Inst.getOpcode()) {
+  case ARM::VMRS:
+  case ARM::VMSR:
+  case ARM::VMRS_FPCXTS:
+  case ARM::VMRS_FPCXTNS:
+  case ARM::VMSR_FPCXTS:
+  case ARM::VMSR_FPCXTNS:
+  case ARM::VMRS_FPSCR_NZCVQC:
+  case ARM::VMSR_FPSCR_NZCVQC:
+  case ARM::FMSTAT:
+  case ARM::VMRS_VPR:
+  case ARM::VMRS_P0:
+  case ARM::VMSR_VPR:
+  case ARM::VMSR_P0:
+    // Use of SP for VMRS/VMSR is only allowed in ARM mode with the exception of
+    // ARMv8-A.
+    if (Inst.getOperand(0).isReg() && Inst.getOperand(0).getReg() == ARM::SP &&
+        (isThumb() && !hasV8Ops()))
+      return Match_InvalidOperand;
+    break;
+  default:
+    break;
+  }
 
   for (unsigned I = 0; I < MCID.NumOperands; ++I)
     if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
@@ -10636,6 +10884,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
     { ARM::AEK_FP16, {Feature_HasV8_2aBit},
       {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
     { ARM::AEK_RAS, {Feature_HasV8Bit}, {ARM::FeatureRAS} },
+    { ARM::AEK_LOB, {Feature_HasV8_1MMainlineBit}, {ARM::FeatureLOB} },
     // FIXME: Unsupported extensions.
     { ARM::AEK_OS, {}, {} },
     { ARM::AEK_IWMMXT, {}, {} },
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 6948f7af469..df6a9a15099 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -144,12 +144,17 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
 // Definitions are further down.
 static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
                                                unsigned RegNo, uint64_t Address,
                                                const void *Decoder);
 static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
                                                unsigned RegNo, uint64_t Address,
                                                const void *Decoder);
+static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst,
+                                               unsigned RegNo, uint64_t Address,
+                                               const void *Decoder);
 static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -361,8 +366,13 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void* Decoder);
 static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
+                                           uint64_t Address,
+                                           const void *Decoder);
 static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
@@ -411,6 +421,23 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
 static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
                                          uint64_t Address, const void *Decoder);
 
+template <bool isSigned, bool isNeg, int size>
+static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned val,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned val,
+                                               uint64_t Address,
+                                               const void *Decoder);
+static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder);
+static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                  const void *Decoder);
+template<bool Writeback>
+static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder);
 #include "ARMGenDisassemblerTables.inc"
 
 static MCDisassembler *createARMDisassembler(const Target &T,
@@ -604,6 +631,10 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
     case ARM::t2CPS3p:
     case ARM::t2CPS2p:
     case ARM::t2CPS1p:
+    case ARM::t2CSEL:
+    case ARM::t2CSINC:
+    case ARM::t2CSINV:
+    case ARM::t2CSNEG:
     case ARM::tMOVSr:
     case ARM::tSETEND:
       // Some instructions (mostly conditional branches) are not
@@ -892,6 +923,13 @@ static const uint16_t GPRDecoderTable[] = {
   ARM::R12, ARM::SP, ARM::LR, ARM::PC
 };
 
+static const uint16_t CLRMGPRDecoderTable[] = {
+  ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+  ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+  ARM::R12, 0, ARM::LR, ARM::APSR
+};
+
 static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 15)
@@ -902,6 +940,20 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  if (RegNo > 15)
+    return MCDisassembler::Fail;
+
+  unsigned Register = CLRMGPRDecoderTable[RegNo];
+  if (Register == 0)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus
 DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
                            uint64_t Address, const void *Decoder) {
@@ -930,6 +982,24 @@ DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
   return S;
 }
 
+static DecodeStatus
+DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
+                             uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  if (RegNo == 15)
+  {
+    Inst.addOperand(MCOperand::createReg(ARM::ZR));
+    return MCDisassembler::Success;
+  }
+
+  if (RegNo == 13)
+    S = MCDisassembler::SoftFail;
+
+  Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+  return S;
+}
+
 static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 7)
@@ -1239,6 +1309,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
 
   bool NeedDisjointWriteback = false;
   unsigned WritebackReg = 0;
+  bool CLRM = false;
   switch (Inst.getOpcode()) {
   default:
     break;
@@ -1253,17 +1324,26 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
     NeedDisjointWriteback = true;
     WritebackReg = Inst.getOperand(0).getReg();
     break;
+  case ARM::t2CLRM:
+    CLRM = true;
+    break;
   }
 
   // Empty register lists are not allowed.
   if (Val == 0) return MCDisassembler::Fail;
   for (unsigned i = 0; i < 16; ++i) {
     if (Val & (1 << i)) {
-      if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
-        return MCDisassembler::Fail;
-      // Writeback not allowed if Rn is in the target list.
-      if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
-        Check(S, MCDisassembler::SoftFail);
+      if (CLRM) {
+        if (!Check(S, DecodeCLRMGPRRegisterClass(Inst, i, Address, Decoder))) {
+          return MCDisassembler::Fail;
+        }
+      } else {
+        if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
+          return MCDisassembler::Fail;
+        // Writeback not allowed if Rn is in the target list.
+        if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
+          Check(S, MCDisassembler::SoftFail);
+      }
     }
   }
 
@@ -1356,6 +1436,8 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
   unsigned imm = fieldFromInstruction(Insn, 0, 8);
   unsigned Rn = fieldFromInstruction(Insn, 16, 4);
   unsigned U = fieldFromInstruction(Insn, 23, 1);
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
 
   switch (Inst.getOpcode()) {
     case ARM::LDC_OFFSET:
@@ -1390,15 +1472,42 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
     case ARM::t2STCL_PRE:
     case ARM::t2STCL_POST:
     case ARM::t2STCL_OPTION:
-      if (coproc == 0xA || coproc == 0xB)
+    case ARM::t2LDC2_OFFSET:
+    case ARM::t2LDC2L_OFFSET:
+    case ARM::t2LDC2_PRE:
+    case ARM::t2LDC2L_PRE:
+    case ARM::t2STC2_OFFSET:
+    case ARM::t2STC2L_OFFSET:
+    case ARM::t2STC2_PRE:
+    case ARM::t2STC2L_PRE:
+    case ARM::LDC2_OFFSET:
+    case ARM::LDC2L_OFFSET:
+    case ARM::LDC2_PRE:
+    case ARM::LDC2L_PRE:
+    case ARM::STC2_OFFSET:
+    case ARM::STC2L_OFFSET:
+    case ARM::STC2_PRE:
+    case ARM::STC2L_PRE:
+    case ARM::t2LDC2_OPTION:
+    case ARM::t2STC2_OPTION:
+    case ARM::t2LDC2_POST:
+    case ARM::t2LDC2L_POST:
+    case ARM::t2STC2_POST:
+    case ARM::t2STC2L_POST:
+    case ARM::LDC2_POST:
+    case ARM::LDC2L_POST:
+    case ARM::STC2_POST:
+    case ARM::STC2L_POST:
+      if (coproc == 0xA || coproc == 0xB ||
+          (featureBits[ARM::HasV8_1MMainlineOps] &&
+           (coproc == 0x8 || coproc == 0x9 || coproc == 0xA || coproc == 0xB ||
+            coproc == 0xE || coproc == 0xF)))
         return MCDisassembler::Fail;
       break;
     default:
       break;
   }
 
-  const FeatureBitset &featureBits =
-    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
   if (featureBits[ARM::HasV8Ops] && (coproc != 14))
     return MCDisassembler::Fail;
 
@@ -3735,6 +3844,21 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
+                                   const void *Decoder) {
+  if (Val == 0)
+    Inst.addOperand(MCOperand::createImm(INT32_MIN));
+  else {
+    int imm = Val & 0x7F;
+
+    if (!(Val & 0x80))
+      imm *= -1;
+    Inst.addOperand(MCOperand::createImm(imm * 4));
+  }
+
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -3750,6 +3874,22 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
   return S;
 }
 
+static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Rn = fieldFromInstruction(Val, 8, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeT2Imm7S4(Inst, imm, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
 static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -4005,6 +4145,13 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
   if (featureBits[ARM::HasV8Ops] && !(Val == 14 || Val == 15))
     return MCDisassembler::Fail;
 
+  // For Armv8.1-M Mainline coprocessors matching 100x,101x or 111x should
+  // decode as VFP/MVE instructions.
+  if (featureBits[ARM::HasV8_1MMainlineOps] &&
+      ((Val & 0xE) == 0x8 || (Val & 0xE) == 0xA ||
+       (Val & 0xE) == 0xE))
+    return MCDisassembler::Fail;
+
   Inst.addOperand(MCOperand::createImm(Val));
   return MCDisassembler::Success;
 }
@@ -5370,14 +5517,37 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
       ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction(Val, 12, 4);
+  // Add explicit operand for the destination sysreg, for cases where
+  // we have to model it for code generation purposes.
+  switch (Inst.getOpcode()) {
+  case ARM::VMSR_FPSCR_NZCVQC:
+    Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+    break;
+  case ARM::VMSR_P0:
+    Inst.addOperand(MCOperand::createReg(ARM::VPR));
+    break;
+  }
 
-  if (featureBits[ARM::ModeThumb] && !featureBits[ARM::HasV8Ops]) {
-    if (Rt == 13 || Rt == 15)
-      S = MCDisassembler::SoftFail;
-    Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder));
-  } else
-    Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder));
+  if (Inst.getOpcode() != ARM::FMSTAT) {
+    unsigned Rt = fieldFromInstruction(Val, 12, 4);
+
+    if (featureBits[ARM::ModeThumb] && !featureBits[ARM::HasV8Ops]) {
+      if (Rt == 13 || Rt == 15)
+        S = MCDisassembler::SoftFail;
+      Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder));
+    } else
+      Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder));
+  }
+
+  // Add explicit operand for the source sysreg, similarly to above.
+  switch (Inst.getOpcode()) {
+  case ARM::VMRS_FPSCR_NZCVQC:
+    Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+    break;
+  case ARM::VMRS_P0:
+    Inst.addOperand(MCOperand::createReg(ARM::VPR));
+    break;
+  }
 
   if (featureBits[ARM::ModeThumb]) {
     Inst.addOperand(MCOperand::createImm(ARMCC::AL));
@@ -5390,3 +5560,169 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
 
   return S;
 }
+
+template <bool isSigned, bool isNeg, int size>
+static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  if (Val == 0)
+    S = MCDisassembler::SoftFail;
+
+  uint64_t DecVal;
+  if (isSigned)
+    DecVal = SignExtend32<size + 1>(Val << 1);
+  else
+    DecVal = (Val << 1);
+
+  if (!tryAddingSymbolicOperand(Address, Address + DecVal + 4, true, 4, Inst,
+                                Decoder))
+    Inst.addOperand(MCOperand::createImm(isNeg ? -DecVal : DecVal));
+  return S;
+}
+
+static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+
+  uint64_t LocImm = Inst.getOperand(0).getImm();
+  Val = LocImm + (2 << Val);
+  if (!tryAddingSymbolicOperand(Address, Address + Val + 4, true, 4, Inst,
+                                Decoder))
+    Inst.addOperand(MCOperand::createImm(Val));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  if (Val >= ARMCC::AL)  // also exclude the non-condition NV
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createImm(Val));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Imm = fieldFromInstruction(Insn, 11, 1) |
+                 fieldFromInstruction(Insn, 1, 10) << 1;
+  switch (Inst.getOpcode()) {
+  case ARM::t2LEUpdate:
+    Inst.addOperand(MCOperand::createReg(ARM::LR));
+    Inst.addOperand(MCOperand::createReg(ARM::LR));
+    LLVM_FALLTHROUGH;
+  case ARM::t2LE:
+    if (!Check(S, DecodeBFLabelOperand<false, true, 11>(Inst, Imm, Address,
+                                                        Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  case ARM::t2WLS:
+    Inst.addOperand(MCOperand::createReg(ARM::LR));
+    if (!Check(S,
+               DecoderGPRRegisterClass(Inst, fieldFromInstruction(Insn, 16, 4),
+                                       Address, Decoder)) ||
+        !Check(S, DecodeBFLabelOperand<false, false, 11>(Inst, Imm, Address,
+                                                         Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  case ARM::t2DLS:
+    unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+    if (Rn == 0xF) {
+      return MCDisassembler::Fail;
+    } else {
+      Inst.addOperand(MCOperand::createReg(ARM::LR));
+      if (!Check(S, DecoderGPRRegisterClass(Inst,
+                                            fieldFromInstruction(Insn, 16, 4),
+                                            Address, Decoder)))
+        return MCDisassembler::Fail;
+    }
+    break;
+  }
+  return S;
+}
+
+static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                  const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  Inst.addOperand(MCOperand::createImm(ARMCC::AL));
+  Inst.addOperand(MCOperand::createReg(0));
+  if (Inst.getOpcode() == ARM::VSCCLRMD) {
+    unsigned reglist = (fieldFromInstruction(Insn, 1, 7) << 1) |
+                       (fieldFromInstruction(Insn, 12, 4) << 8) |
+                       (fieldFromInstruction(Insn, 22, 1) << 12);
+    if (!Check(S, DecodeDPRRegListOperand(Inst, reglist, Address, Decoder))) {
+      return MCDisassembler::Fail;
+    }
+  } else {
+    unsigned reglist = fieldFromInstruction(Insn, 0, 8) |
+                       (fieldFromInstruction(Insn, 22, 1) << 8) |
+                       (fieldFromInstruction(Insn, 12, 4) << 9);
+    if (!Check(S, DecodeSPRRegListOperand(Inst, reglist, Address, Decoder))) {
+      return MCDisassembler::Fail;
+    }
+  }
+  Inst.addOperand(MCOperand::createReg(ARM::VPR));
+
+  return S;
+}
+
+static unsigned FixedRegForVSTRVLDR_SYSREG(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::VSTR_P0_off:
+  case ARM::VSTR_P0_pre:
+  case ARM::VSTR_P0_post:
+  case ARM::VLDR_P0_off:
+  case ARM::VLDR_P0_pre:
+  case ARM::VLDR_P0_post:
+    return ARM::P0;
+  default:
+    return 0;
+  }
+}
+
+template<bool Writeback>
+static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  switch (Inst.getOpcode()) {
+  case ARM::VSTR_FPSCR_pre:
+  case ARM::VSTR_FPSCR_NZCVQC_pre:
+  case ARM::VLDR_FPSCR_pre:
+  case ARM::VLDR_FPSCR_NZCVQC_pre:
+  case ARM::VSTR_FPSCR_off:
+  case ARM::VSTR_FPSCR_NZCVQC_off:
+  case ARM::VLDR_FPSCR_off:
+  case ARM::VLDR_FPSCR_NZCVQC_off:
+  case ARM::VSTR_FPSCR_post:
+  case ARM::VSTR_FPSCR_NZCVQC_post:
+  case ARM::VLDR_FPSCR_post:
+  case ARM::VLDR_FPSCR_NZCVQC_post:
+    const FeatureBitset &featureBits =
+        ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+
+    if (!featureBits[ARM::HasMVEIntegerOps] && !featureBits[ARM::FeatureVFP2])
+      return MCDisassembler::Fail;
+  }
+
+  DecodeStatus S = MCDisassembler::Success;
+  if (unsigned Sysreg = FixedRegForVSTRVLDR_SYSREG(Inst.getOpcode()))
+    Inst.addOperand(MCOperand::createReg(Sysreg));
+  unsigned Rn = fieldFromInstruction(Val, 16, 4);
+  unsigned addr = fieldFromInstruction(Val, 0, 7) |
+                  (fieldFromInstruction(Val, 23, 1) << 7) | (Rn << 8);
+
+  if (Writeback) {
+    if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+      return MCDisassembler::Fail;
+  }
+  if (!Check(S, DecodeT2AddrModeImm7s4(Inst, addr, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createImm(ARMCC::AL));
+  Inst.addOperand(MCOperand::createReg(0));
+
+  return S;
+}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 538bc2594c3..d1c891bb3be 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -29,6 +29,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -104,6 +105,13 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_t2_movw_lo16", 0, 20, 0},
       {"fixup_arm_mod_imm", 0, 12, 0},
       {"fixup_t2_so_imm", 0, 26, 0},
+      {"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bf_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfl_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfc_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfcsel_else_target", 0, 32, 0},
+      {"fixup_wls", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_le", 0, 32, MCFixupKindInfo::FKF_IsPCRel}
   };
   const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
       // This table *must* be in the order that the fixup_* kinds are defined in
@@ -155,6 +163,13 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_t2_movw_lo16", 12, 20, 0},
       {"fixup_arm_mod_imm", 20, 12, 0},
       {"fixup_t2_so_imm", 26, 6, 0},
+      {"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bf_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfl_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfc_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfcsel_else_target", 0, 32, 0},
+      {"fixup_wls", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_le", 0, 32, MCFixupKindInfo::FKF_IsPCRel}
   };
 
   if (Kind < FirstTargetFixupKind)
@@ -256,6 +271,56 @@ const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
       return "will be converted to nop";
     break;
   }
+  case ARM::fixup_bf_branch: {
+    int64_t Offset = int64_t(Value) - 4;
+    if (Offset < 0 || Offset > 30)
+      return "out of range pc-relative fixup value";
+    break;
+  }
+  case ARM::fixup_bf_target: {
+    int64_t Offset = int64_t(Value) - 4;
+    if (Offset > 65534 || Offset < -65536)
+      return "out of range pc-relative fixup value";
+    break;
+  }
+  case ARM::fixup_bfl_target: {
+    int64_t Offset = int64_t(Value) - 4;
+    if (Offset > 262142 || Offset < -262144)
+      return "out of range pc-relative fixup value";
+    break;
+  }
+  case ARM::fixup_bfc_target: {
+    int64_t Offset = int64_t(Value) - 4;
+    if (Offset > 4094 || Offset < -4096)
+      return "out of range pc-relative fixup value";
+    break;
+  }
+  case ARM::fixup_bfcsel_else_target: {
+    if (Value != 2 && Value != 4)
+      return "out of range label-relative fixup value";
+    break;
+  }
+  case ARM::fixup_wls: {
+    uint64_t Offset = Value - 4;
+    if (Offset > 4094)
+      return "out of range pc-relative fixup value";
+    break;
+  }
+  case ARM::fixup_le: {
+    // The offset field in the LE and LETP instructions is an 11-bit
+    // value shifted left by 2 (i.e. 0,2,4,...,4094), and it is
+    // interpreted as a negative offset from the value read from pc,
+    // i.e. from instruction_address+4.
+    //
+    // So an LE instruction can in principle address the instruction
+    // immediately after itself, or (not very usefully) the address
+    // half way through the 4-byte LE.
+    uint64_t Offset = -Value + 4;
+    if (Offset > 4094)
+      return "out of range pc-relative fixup value";
+    break;
+  }
+
   default:
     llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!");
   }
@@ -760,6 +825,60 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
     EncValue |= (Value & 0xff);
     return swapHalfWords(EncValue, Endian == support::little);
   }
+  case ARM::fixup_bf_branch: {
+    const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+    if (FixupDiagnostic) {
+      Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+      return 0;
+    }
+    uint32_t out = (((Value - 4) >> 1) & 0xf) << 23;
+    return swapHalfWords(out, Endian == support::little);
+  }
+  case ARM::fixup_bf_target:
+  case ARM::fixup_bfl_target:
+  case ARM::fixup_bfc_target: {
+    const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+    if (FixupDiagnostic) {
+      Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+      return 0;
+    }
+    uint32_t out = 0;
+    uint32_t HighBitMask = (Kind == ARM::fixup_bf_target ? 0xf800 :
+                            Kind == ARM::fixup_bfl_target ? 0x3f800 : 0x800);
+    out |= (((Value - 4) >> 1) & 0x1) << 11;
+    out |= (((Value - 4) >> 1) & 0x7fe);
+    out |= (((Value - 4) >> 1) & HighBitMask) << 5;
+    return swapHalfWords(out, Endian == support::little);
+  }
+  case ARM::fixup_bfcsel_else_target: {
+    // If this is a fixup of a branch future's else target then it should be a
+    // constant MCExpr representing the distance between the branch targetted
+    // and the instruction after that same branch.
+    Value = Target.getConstant();
+
+    const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+    if (FixupDiagnostic) {
+      Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+      return 0;
+    }
+    uint32_t out = ((Value >> 2) & 1) << 17;
+    return swapHalfWords(out, Endian == support::little);
+  }
+  case ARM::fixup_wls:
+  case ARM::fixup_le: {
+    const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+    if (FixupDiagnostic) {
+      Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+      return 0;
+    }
+    uint64_t real_value = Value - 4;
+    uint32_t out = 0;
+    if (Kind == ARM::fixup_le)
+      real_value = -real_value;
+    out |= ((real_value >> 1) & 0x1) << 11;
+    out |= ((real_value >> 1) & 0x7fe);
+    return swapHalfWords(out, Endian == support::little);
+  }
   }
 }
 
@@ -854,6 +973,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movw_lo16:
   case ARM::fixup_t2_so_imm:
+  case ARM::fixup_bf_branch:
+  case ARM::fixup_bf_target:
+  case ARM::fixup_bfl_target:
+  case ARM::fixup_bfc_target:
+  case ARM::fixup_bfcsel_else_target:
+  case ARM::fixup_wls:
+  case ARM::fixup_le:
     return 4;
 
   case FK_SecRel_2:
@@ -910,6 +1036,13 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
   case ARM::fixup_t2_movw_lo16:
   case ARM::fixup_arm_mod_imm:
   case ARM::fixup_t2_so_imm:
+  case ARM::fixup_bf_branch:
+  case ARM::fixup_bf_target:
+  case ARM::fixup_bfl_target:
+  case ARM::fixup_bfc_target:
+  case ARM::fixup_bfcsel_else_target:
+  case ARM::fixup_wls:
+  case ARM::fixup_le:
     // Instruction size is 4 bytes.
     return 4;
   }
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 1d51a9696cd..fda19eea1de 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -137,6 +137,12 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
       default:
         return ELF::R_ARM_THM_CALL;
       }
+    case ARM::fixup_bf_target:
+      return ELF::R_ARM_THM_BF16;
+    case ARM::fixup_bfc_target:
+      return ELF::R_ARM_THM_BF12;
+    case ARM::fixup_bfl_target:
+      return ELF::R_ARM_THM_BF18;
     }
   }
   switch ((unsigned)Fixup.getKind()) {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 533bf6d4df9..bdf04a208b2 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -103,6 +103,15 @@ enum Fixups {
   // Fixup for Thumb2 8-bit rotated operand
   fixup_t2_so_imm,
 
+  // Fixups for Branch Future.
+  fixup_bf_branch,
+  fixup_bf_target,
+  fixup_bfl_target,
+  fixup_bfc_target,
+  fixup_bfcsel_else_target,
+  fixup_wls,
+  fixup_le,
+
   // Marker
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
index ec5fd16cd7d..271959c38de 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -771,11 +771,13 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
 void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
                                        const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
-  assert(std::is_sorted(MI->begin() + OpNum, MI->end(),
-                        [&](const MCOperand &LHS, const MCOperand &RHS) {
-                          return MRI.getEncodingValue(LHS.getReg()) <
-                                 MRI.getEncodingValue(RHS.getReg());
-                        }));
+  if (MI->getOpcode() != ARM::t2CLRM) {
+    assert(std::is_sorted(MI->begin() + OpNum, MI->end(),
+                          [&](const MCOperand &LHS, const MCOperand &RHS) {
+                            return MRI.getEncodingValue(LHS.getReg()) <
+                                   MRI.getEncodingValue(RHS.getReg());
+                          }));
+  }
 
   O << "{";
   for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
@@ -930,6 +932,15 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
     O << ARMCondCodeToString(CC);
 }
 
+void ARMInstPrinter::printMandatoryRestrictedPredicateOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
+  if ((ARMCC::CondCodes)MI->getOperand(OpNum).getImm() == ARMCC::HS)
+    O << "cs";
+  else
+    printMandatoryPredicateOperand(MI, OpNum, STI, O);
+}
+
 void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
                                                     unsigned OpNum,
                                                     const MCSubtargetInfo &STI,
@@ -938,6 +949,14 @@ void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
   O << ARMCondCodeToString(CC);
 }
 
+void ARMInstPrinter::printMandatoryInvertedPredicateOperand(const MCInst *MI,
+                                                            unsigned OpNum,
+                                                            const MCSubtargetInfo &STI,
+                                                            raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  O << ARMCondCodeToString(ARMCC::getOppositeCondition(CC));
+}
+
 void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
                                               const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index d07daa7bd15..ea65cde81a9 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -170,6 +170,13 @@ public:
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &O);
+  void printMandatoryRestrictedPredicateOperand(const MCInst *MI,
+                                                unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O);
+  void printMandatoryInvertedPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O);
   void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
                                 const MCSubtargetInfo &STI, raw_ostream &O);
   void printRegisterList(const MCInst *MI, unsigned OpNum,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index acc00c70c02..e84fe35ee20 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -49,7 +49,7 @@ namespace {
 
 class ARMMCCodeEmitter : public MCCodeEmitter {
   const MCInstrInfo &MCII;
-  const MCContext &CTX;
+  MCContext &CTX;
   bool IsLittleEndian;
 
 public:
@@ -180,18 +180,24 @@ public:
                                    SmallVectorImpl<MCFixup> &Fixups,
                                    const MCSubtargetInfo &STI) const;
 
+  /// getT2AddrModeImm7s4OpValue - Return encoding info for 'reg +/- imm7<<2'
+  /// operand.
+  uint32_t getT2AddrModeImm7s4OpValue(const MCInst &MI, unsigned OpIdx,
+                                      SmallVectorImpl<MCFixup> &Fixups,
+                                      const MCSubtargetInfo &STI) const;
+
   /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2'
   /// operand.
   uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
                                    SmallVectorImpl<MCFixup> &Fixups,
                                    const MCSubtargetInfo &STI) const;
 
-  /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2'
+  /// getT2ScaledImmOpValue - Return encoding info for '+/- immX<<Y'
   /// operand.
-  uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
-                              SmallVectorImpl<MCFixup> &Fixups,
-                              const MCSubtargetInfo &STI) const;
-
+  template<unsigned Bits, unsigned Shift>
+  uint32_t getT2ScaledImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
 
   /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
   /// operand as needed by load/store instructions.
@@ -416,6 +422,15 @@ public:
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
+
+  template <bool isNeg, ARM::Fixups fixup>
+  uint32_t getBFTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+
+  uint32_t getBFAfterTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups,
+                                   const MCSubtargetInfo &STI) const;
 };
 
 } // end anonymous namespace
@@ -894,12 +909,11 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
   return Binary;
 }
 
-/// getT2Imm8s4OpValue - Return encoding info for
-/// '+/- imm8<<2' operand.
+template<unsigned Bits, unsigned Shift>
 uint32_t ARMMCCodeEmitter::
-getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
-                   SmallVectorImpl<MCFixup> &Fixups,
-                   const MCSubtargetInfo &STI) const {
+getT2ScaledImmOpValue(const MCInst &MI, unsigned OpIdx,
+                      SmallVectorImpl<MCFixup> &Fixups,
+                      const MCSubtargetInfo &STI) const {
   // FIXME: The immediate operand should have already been encoded like this
   // before ever getting here. The encoder method should just need to combine
   // the MI operands for the register and the offset into a single
@@ -907,25 +921,23 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
   // style, unfortunately. As-is, we can't represent the distinct encoding
   // for #-0.
 
-  // {8}    = (U)nsigned (add == '1', sub == '0')
-  // {7-0}  = imm8
-  int32_t Imm8 = MI.getOperand(OpIdx).getImm();
-  bool isAdd = Imm8 >= 0;
+  // {Bits}    = (U)nsigned (add == '1', sub == '0')
+  // {(Bits-1)-0}  = immediate
+  int32_t Imm = MI.getOperand(OpIdx).getImm();
+  bool isAdd = Imm >= 0;
 
   // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
-  if (Imm8 < 0)
-    Imm8 = -(uint32_t)Imm8;
+  if (Imm < 0)
+    Imm = -(uint32_t)Imm;
 
-  // Scaled by 4.
-  Imm8 /= 4;
+  Imm >>= Shift;
 
-  uint32_t Binary = Imm8 & 0xff;
+  uint32_t Binary = Imm & ((1U << Bits) - 1);
   // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
   if (isAdd)
-    Binary |= (1 << 8);
+    Binary |= (1U << Bits);
   return Binary;
 }
-
 /// getT2AddrModeImm8s4OpValue - Return encoding info for
 /// 'reg +/- imm8<<2' operand.
 uint32_t ARMMCCodeEmitter::
@@ -967,6 +979,33 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
   return Binary;
 }
 
+/// getT2AddrModeImm7s4OpValue - Return encoding info for
+/// 'reg +/- imm7<<2' operand.
+uint32_t
+ARMMCCodeEmitter::getT2AddrModeImm7s4OpValue(const MCInst &MI, unsigned OpIdx,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI) const {
+  // {11-8} = reg
+  // {7}    = (A)dd (add == '1', sub == '0')
+  // {6-0}  = imm7
+  unsigned Reg, Imm7;
+  // If The first operand isn't a register, we have a label reference.
+  bool isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm7, Fixups, STI);
+
+  // FIXME: The immediate operand should have already been encoded like this
+  // before ever getting here. The encoder method should just need to combine
+  // the MI operands for the register and the offset into a single
+  // representation for the complex operand in the .td file. This isn't just
+  // style, unfortunately. As-is, we can't represent the distinct encoding
+  // for #-0.
+  uint32_t Binary = (Imm7 >> 2) & 0xff;
+  // Immediate is always encoded as positive. The 'A' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 7);
+  Binary |= (Reg << 8);
+  return Binary;
+}
+
 /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for
 /// 'reg + imm8<<2' operand.
 uint32_t ARMMCCodeEmitter::
@@ -1499,7 +1538,7 @@ unsigned ARMMCCodeEmitter::
 getRegisterListOpValue(const MCInst &MI, unsigned Op,
                        SmallVectorImpl<MCFixup> &Fixups,
                        const MCSubtargetInfo &STI) const {
-  // VLDM/VSTM:
+  // VLDM/VSTM/VSCCLRM:
   //   {12-8} = Vd
   //   {7-0}  = Number of registers
   //
@@ -1508,28 +1547,40 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
   unsigned Reg = MI.getOperand(Op).getReg();
   bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
   bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+  bool CLRMRegs = MI.getOpcode() == ARM::t2CLRM;
 
   unsigned Binary = 0;
 
   if (SPRRegs || DPRRegs) {
-    // VLDM/VSTM
+    // VLDM/VSTM/VSCCLRM
     unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
     unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
     Binary |= (RegNo & 0x1f) << 8;
+
+    // Ignore VPR
+    if (MI.getOpcode() == ARM::VSCCLRMD || MI.getOpcode() == ARM::VSCCLRMS)
+      --NumRegs;
     if (SPRRegs)
       Binary |= NumRegs;
     else
       Binary |= NumRegs * 2;
   } else {
     const MCRegisterInfo &MRI = *CTX.getRegisterInfo();
-    assert(std::is_sorted(MI.begin() + Op, MI.end(),
-                          [&](const MCOperand &LHS, const MCOperand &RHS) {
-                            return MRI.getEncodingValue(LHS.getReg()) <
-                                   MRI.getEncodingValue(RHS.getReg());
-                          }));
+    if (!CLRMRegs) {
+      assert(std::is_sorted(MI.begin() + Op, MI.end(),
+                            [&](const MCOperand &LHS, const MCOperand &RHS) {
+                              return MRI.getEncodingValue(LHS.getReg()) <
+                                     MRI.getEncodingValue(RHS.getReg());
+                            }));
+    }
 
     for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
-      unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
+      unsigned RegNo;
+      if (CLRMRegs && MI.getOperand(I).getReg() == ARM::APSR) {
+        RegNo = 15;
+      } else {
+        RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
+      }
       Binary |= 1 << RegNo;
     }
   }
@@ -1675,6 +1726,39 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
   ++MCNumEmitted;  // Keep track of the # of mi's emitted.
 }
 
+template <bool isNeg, ARM::Fixups fixup>
+uint32_t
+ARMMCCodeEmitter::getBFTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups,
+                                     const MCSubtargetInfo &STI) const {
+  const MCOperand MO = MI.getOperand(OpIdx);
+  if (MO.isExpr())
+    return ::getBranchTargetOpValue(MI, OpIdx, fixup, Fixups, STI);
+  return isNeg ? -(MO.getImm() >> 1) : (MO.getImm() >> 1);
+}
+
+uint32_t
+ARMMCCodeEmitter::getBFAfterTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
+  const MCOperand MO = MI.getOperand(OpIdx);
+  const MCOperand BranchMO = MI.getOperand(0);
+
+  if (MO.isExpr()) {
+    assert(BranchMO.isExpr());
+    const MCExpr *DiffExpr = MCBinaryExpr::createSub(
+        MO.getExpr(), BranchMO.getExpr(), CTX);
+    MCFixupKind Kind = MCFixupKind(ARM::fixup_bfcsel_else_target);
+    Fixups.push_back(llvm::MCFixup::create(0, DiffExpr, Kind, MI.getLoc()));
+    return 0;
+  }
+
+  assert(MO.isImm() && BranchMO.isImm());
+  int Diff = MO.getImm() - BranchMO.getImm();
+  assert(Diff == 4 || Diff == 2);
+
+  return Diff == 4;
+}
 #include "ARMGenMCCodeEmitter.inc"
 
 MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 24491b35fa5..90022a8d88a 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -277,14 +277,29 @@ class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis {
 public:
   ThumbMCInstrAnalysis(const MCInstrInfo *Info) : ARMMCInstrAnalysis(Info) {}
 
-  bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
-                      uint64_t Size, uint64_t &Target) const override {
+  bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+                      uint64_t &Target) const override {
+    unsigned OpId;
+    switch (Inst.getOpcode()) {
+    default:
+      OpId = 0;
+      break;
+    case ARM::t2WLS:
+    case ARM::t2LEUpdate:
+      OpId = 2;
+      break;
+    case ARM::t2LE:
+      OpId = 1;
+      break;
+    }
+
     // We only handle PCRel branches for now.
-    if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+    if (Info->get(Inst.getOpcode()).OpInfo[OpId].OperandType !=
+        MCOI::OPERAND_PCREL)
       return false;
 
-    int64_t Imm = Inst.getOperand(0).getImm();
-    Target = Addr+Imm+4; // In Thumb mode the PC is always off by 4 bytes.
+    // In Thumb mode the PC is always off by 4 bytes.
+    Target = Addr + Inst.getOperand(OpId).getImm() + 4;
     return true;
   }
 };
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 9a79089a960..abc870aecde 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -161,7 +161,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     // otherwise).
     if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
       MachineRegisterInfo *MRI = &MF.getRegInfo();
-      MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+      MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass);
     }
 
     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
@@ -203,7 +203,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
       MachineRegisterInfo *MRI = &MF.getRegInfo();
       MRI->constrainRegClass(DestReg,
-                             &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+                             &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass);
     }
 
     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
author	Simon Tatham <simon.tatham@arm.com>	2019-06-10 15:36:34 +0000
committer	Simon Tatham <simon.tatham@arm.com>	2019-06-10 15:36:34 +0000
commit	baeea9193370deeefb19ea7602606e262fec9be6 (patch)
tree	89e90d76800c89b18b403f893b27799bbf1a7e90 /llvm/lib
parent	05bf5f9328e2bcada093cc36e729621763b68823 (diff)
download	bcm5719-llvm-baeea9193370deeefb19ea7602606e262fec9be6.tar.gz bcm5719-llvm-baeea9193370deeefb19ea7602606e262fec9be6.zip