diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
74 files changed, 0 insertions, 38972 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h deleted file mode 100644 index 0297de120cb..00000000000 --- a/llvm/lib/Target/AArch64/AArch64.h +++ /dev/null @@ -1,46 +0,0 @@ -//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// AArch64 back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64_H -#define LLVM_TARGET_AARCH64_H - -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -class AArch64AsmPrinter; -class FunctionPass; -class AArch64TargetMachine; -class MachineInstr; -class MCInst; - -FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, - CodeGenOpt::Level OptLevel); - -FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); - -FunctionPass *createAArch64BranchFixupPass(); - -/// \brief Creates an AArch64-specific Target Transformation Info pass. -ImmutablePass *createAArch64TargetTransformInfoPass( - const AArch64TargetMachine *TM); - -void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AArch64AsmPrinter &AP); - - -} - -#endif diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td deleted file mode 100644 index e49afd60c8e..00000000000 --- a/llvm/lib/Target/AArch64/AArch64.td +++ /dev/null @@ -1,83 +0,0 @@ -//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the top level entry point for the AArch64 target. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// AArch64 Subtarget features. -// - -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", - "Enable ARMv8 FP">; - -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; - -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable cryptographic instructions">; - -//===----------------------------------------------------------------------===// -// AArch64 Processors -// - -include "AArch64Schedule.td" - -class ProcNoItin<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; - -def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>; - -def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", - "Cortex-A53 ARM processors", - [FeatureFPARMv8, - FeatureNEON, - FeatureCrypto]>; - -def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", - "Cortex-A57 ARM processors", - [FeatureFPARMv8, - FeatureNEON, - FeatureCrypto]>; - -def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; -def : Processor<"cortex-a57", NoItineraries, [ProcA57]>; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "AArch64RegisterInfo.td" - -include "AArch64CallingConv.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "AArch64InstrInfo.td" - -def AArch64InstrInfo : InstrInfo { - let noNamedPositionallyEncodedOperands = 1; -} - -//===----------------------------------------------------------------------===// -// Declare the target which we are implementing -//===----------------------------------------------------------------------===// - -def AArch64 : Target { - let InstructionSet = AArch64InstrInfo; -} diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp deleted file mode 100644 index 5b5148351fa..00000000000 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ /dev/null @@ -1,303 +0,0 @@ -//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format AArch64 assembly language. -// -//===----------------------------------------------------------------------===// - -#include "AArch64AsmPrinter.h" -#include "InstPrinter/AArch64InstPrinter.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -#define DEBUG_TYPE "asm-printer" - -/// Try to print a floating-point register as if it belonged to a specified -/// register-class. For example the inline asm operand modifier "b" requires its -/// argument to be printed as "bN". -static bool printModifiedFPRAsmOperand(const MachineOperand &MO, - const TargetRegisterInfo *TRI, - char RegType, raw_ostream &O) { - if (!MO.isReg()) - return true; - - for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (AArch64::FPR8RegClass.contains(*AR)) { - O << RegType << TRI->getEncodingValue(MO.getReg()); - return false; - } - } - - // The register doesn't correspond to anything floating-point like. - return true; -} - -/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR -/// with the obvious type and an immediate 0 as either wzr or xzr. -static bool printModifiedGPRAsmOperand(const MachineOperand &MO, - const TargetRegisterInfo *TRI, - const TargetRegisterClass &RegClass, - raw_ostream &O) { - char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x'; - - if (MO.isImm() && MO.getImm() == 0) { - O << Prefix << "zr"; - return false; - } else if (MO.isReg()) { - if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) { - O << (Prefix == 'x' ? "sp" : "wsp"); - return false; - } - - for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (RegClass.contains(*AR)) { - O << AArch64InstPrinter::getRegisterName(*AR); - return false; - } - } - } - - return true; -} - -bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, - bool PrintImmediatePrefix, - StringRef Suffix, raw_ostream &O) { - StringRef Name; - StringRef Modifier; - switch (MO.getType()) { - default: - return true; - case MachineOperand::MO_GlobalAddress: - Name = getSymbol(MO.getGlobal())->getName(); - - // Global variables may be accessed either via a GOT or in various fun and - // interesting TLS-model specific ways. Set the prefix modifier as - // appropriate here. - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) { - Reloc::Model RelocM = TM.getRelocationModel(); - if (GV->isThreadLocal()) { - switch (TM.getTLSModel(GV)) { - case TLSModel::GeneralDynamic: - Modifier = "tlsdesc"; - break; - case TLSModel::LocalDynamic: - Modifier = "dtprel"; - break; - case TLSModel::InitialExec: - Modifier = "gottprel"; - break; - case TLSModel::LocalExec: - Modifier = "tprel"; - break; - } - } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { - Modifier = "got"; - } - } - break; - case MachineOperand::MO_BlockAddress: - Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName(); - break; - case MachineOperand::MO_ConstantPoolIndex: - Name = GetCPISymbol(MO.getIndex())->getName(); - break; - } - - // Some instructions (notably ADRP) don't take the # prefix for - // immediates. Only print it if asked to. - if (PrintImmediatePrefix) - O << '#'; - - // Only need the joining "_" if both the prefix and the suffix are - // non-null. This little block simply takes care of the four possibly - // combinations involved there. - if (Modifier == "" && Suffix == "") - O << Name; - else if (Modifier == "" && Suffix != "") - O << ":" << Suffix << ':' << Name; - else if (Modifier != "" && Suffix == "") - O << ":" << Modifier << ':' << Name; - else - O << ":" << Modifier << '_' << Suffix << ':' << Name; - - return false; -} - -bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); - - if (!ExtraCode) - ExtraCode = ""; - - switch(ExtraCode[0]) { - default: - if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O)) - return false; - break; - case 'w': - // Output 32-bit general register operand, constant zero as wzr, or stack - // pointer as wsp. Ignored when used with other operand types. - if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::GPR32RegClass, O)) - return false; - break; - case 'x': - // Output 64-bit general register operand, constant zero as xzr, or stack - // pointer as sp. Ignored when used with other operand types. - if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::GPR64RegClass, O)) - return false; - break; - case 'H': - // Output higher numbered of a 64-bit general register pair - case 'Q': - // Output least significant register of a 64-bit general register pair - case 'R': - // Output most significant register of a 64-bit general register pair - - // FIXME note: these three operand modifiers will require, to some extent, - // adding a paired GPR64 register class. Initial investigation suggests that - // assertions are hit unless it has a type and is made legal for that type - // in ISelLowering. After that step is made, the number of modifications - // needed explodes (operation legality, calling conventions, stores, reg - // copies ...). - llvm_unreachable("FIXME: Unimplemented register pairs"); - case 'b': - case 'h': - case 's': - case 'd': - case 'q': - if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - ExtraCode[0], O)) - return false; - break; - case 'A': - // Output symbolic address with appropriate relocation modifier (also - // suitable for ADRP). - if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O)) - return false; - break; - case 'L': - // Output bits 11:0 of symbolic address with appropriate :lo12: relocation - // modifier. - if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O)) - return false; - break; - case 'G': - // Output bits 23:12 of symbolic address with appropriate :hi12: relocation - // modifier (currently only for TLS local exec). - if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O)) - return false; - break; - case 'a': - return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O); - } - - // There's actually no operand modifier, which leads to a slightly eclectic - // set of behaviour which we have to handle here. - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - default: - llvm_unreachable("Unexpected operand for inline assembly"); - case MachineOperand::MO_Register: - // GCC prints the unmodified operand of a 'w' constraint as the vector - // register. Technically, we could allocate the argument as a VPR128, but - // that leads to extremely dodgy copies being generated to get the data - // there. - if (printModifiedFPRAsmOperand(MO, TRI, 'v', O)) - O << AArch64InstPrinter::getRegisterName(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - O << '#' << MO.getImm(); - break; - case MachineOperand::MO_FPImmediate: - assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); - O << "#0.0"; - break; - case MachineOperand::MO_BlockAddress: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_GlobalAddress: - return printSymbolicAddress(MO, false, "", O); - } - - return false; -} - -bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - // Currently both the memory constraints (m and Q) behave the same and amount - // to the address as a single register. In future, we may allow "m" to provide - // both a base and an offset. - const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isReg() && "unexpected inline assembly memory operand"); - O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']'; - return false; -} - -#include "AArch64GenMCPseudoLowering.inc" - -void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { - // Do any auto-generated pseudo lowerings. - if (emitPseudoExpansionLowering(OutStreamer, MI)) - return; - - MCInst TmpInst; - LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); - EmitToStreamer(OutStreamer, TmpInst); -} - -void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } -} - -bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - return AsmPrinter::runOnMachineFunction(MF); -} - -// Force static initialization. -extern "C" void LLVMInitializeAArch64AsmPrinter() { - RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget); - RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget); -} - diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.h b/llvm/lib/Target/AArch64/AArch64AsmPrinter.h deleted file mode 100644 index f77553c7b8b..00000000000 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.h +++ /dev/null @@ -1,76 +0,0 @@ -// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the AArch64 assembly printer class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64ASMPRINTER_H -#define LLVM_AARCH64ASMPRINTER_H - -#include "AArch64.h" -#include "AArch64TargetMachine.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { - -class MCOperand; - -class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { - - /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can - /// make the right decision when printing asm code for different targets. - const AArch64Subtarget *Subtarget; - - // emitPseudoExpansionLowering - tblgen'erated. - bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, - const MachineInstr *MI); - - public: - explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<AArch64Subtarget>(); - } - - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; - - MCOperand lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Sym) const; - - void EmitInstruction(const MachineInstr *MI) override; - void EmitEndOfAsmFile(Module &M) override; - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) override; - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) override; - - /// printSymbolicAddress - Given some kind of reasonably bare symbolic - /// reference, print out the appropriate asm string to represent it. If - /// appropriate, a relocation-specifier will be produced, composed of a - /// general class derived from the MO parameter and an instruction-specific - /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is - /// given. - bool printSymbolicAddress(const MachineOperand &MO, - bool PrintImmediatePrefix, - StringRef Suffix, raw_ostream &O); - - const char *getPassName() const override { - return "AArch64 Assembly Printer"; - } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; -} // end namespace llvm - -#endif diff --git a/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp deleted file mode 100644 index 585cbee9966..00000000000 --- a/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp +++ /dev/null @@ -1,601 +0,0 @@ -//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that fixes AArch64 branches which have ended up out -// of range for their immediate operands. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "aarch64-branch-fixup" - -STATISTIC(NumSplit, "Number of uncond branches inserted"); -STATISTIC(NumCBrFixed, "Number of cond branches fixed"); - -/// Return the worst case padding that could result from unknown offset bits. -/// This does not include alignment padding caused by known offset bits. -/// -/// @param LogAlign log2(alignment) -/// @param KnownBits Number of known low offset bits. -static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { - if (KnownBits < LogAlign) - return (1u << LogAlign) - (1u << KnownBits); - return 0; -} - -namespace { - /// Due to limited PC-relative displacements, conditional branches to distant - /// blocks may need converting into an unconditional equivalent. For example: - /// tbz w1, #0, far_away - /// becomes - /// tbnz w1, #0, skip - /// b far_away - /// skip: - class AArch64BranchFixup : public MachineFunctionPass { - /// Information about the offset and size of a single basic block. - struct BasicBlockInfo { - /// Distance from the beginning of the function to the beginning of this - /// basic block. - /// - /// Offsets are computed assuming worst case padding before an aligned - /// block. This means that subtracting basic block offsets always gives a - /// conservative estimate of the real distance which may be smaller. - /// - /// Because worst case padding is used, the computed offset of an aligned - /// block may not actually be aligned. - unsigned Offset; - - /// Size of the basic block in bytes. If the block contains inline - /// assembly, this is a worst case estimate. - /// - /// The size does not include any alignment padding whether from the - /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; - - /// The number of low bits in Offset that are known to be exact. The - /// remaining bits of Offset are an upper bound. - uint8_t KnownBits; - - /// When non-zero, the block contains instructions (inline asm) of unknown - /// size. The real size may be smaller than Size bytes by a multiple of 1 - /// << Unalign. - uint8_t Unalign; - - BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} - - /// Compute the number of known offset bits internally to this block. - /// This number should be used to predict worst case padding when - /// splitting the block. - unsigned internalKnownBits() const { - unsigned Bits = Unalign ? Unalign : KnownBits; - // If the block size isn't a multiple of the known bits, assume the - // worst case padding. - if (Size & ((1u << Bits) - 1)) - Bits = countTrailingZeros(Size); - return Bits; - } - - /// Compute the offset immediately following this block. If LogAlign is - /// specified, return the offset the successor block will get if it has - /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - if (!LogAlign) - return PO; - // Add alignment padding from the terminator. - return PO + UnknownPadding(LogAlign, internalKnownBits()); - } - - /// Compute the number of known low bits of postOffset. If this block - /// contains inline asm, the number of known bits drops to the - /// instruction alignment. An aligned terminator may increase the number - /// of know bits. - /// If LogAlign is given, also consider the alignment of the next block. - unsigned postKnownBits(unsigned LogAlign = 0) const { - return std::max(LogAlign, internalKnownBits()); - } - }; - - std::vector<BasicBlockInfo> BBInfo; - - /// One per immediate branch, keeping the machine instruction pointer, - /// conditional or unconditional, the max displacement, and (if IsCond is - /// true) the corresponding inverted branch opcode. - struct ImmBranch { - MachineInstr *MI; - unsigned OffsetBits : 31; - bool IsCond : 1; - ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) - : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} - }; - - /// Keep track of all the immediate branch instructions. - /// - std::vector<ImmBranch> ImmBranches; - - MachineFunction *MF; - const AArch64InstrInfo *TII; - public: - static char ID; - AArch64BranchFixup() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &MF) override; - - const char *getPassName() const override { - return "AArch64 branch fixup pass"; - } - - private: - void initializeFunctionInfo(); - MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); - void adjustBBOffsetsAfter(MachineBasicBlock *BB); - bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, - unsigned OffsetBits); - bool fixupImmediateBr(ImmBranch &Br); - bool fixupConditionalBr(ImmBranch &Br); - - void computeBlockSize(MachineBasicBlock *MBB); - unsigned getOffsetOf(MachineInstr *MI) const; - void dumpBBs(); - void verify(); - }; - char AArch64BranchFixup::ID = 0; -} - -/// check BBOffsets -void AArch64BranchFixup::verify() { -#ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned MBBId = MBB->getNumber(); - assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); - } -#endif -} - -/// print block size and offset information - debugging -void AArch64BranchFixup::dumpBBs() { - DEBUG({ - for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { - const BasicBlockInfo &BBI = BBInfo[J]; - dbgs() << format("%08x BB#%u\t", BBI.Offset, J) - << " kb=" << unsigned(BBI.KnownBits) - << " ua=" << unsigned(BBI.Unalign) - << format(" size=%#x\n", BBInfo[J].Size); - } - }); -} - -/// Returns an instance of the branch fixup pass. -FunctionPass *llvm::createAArch64BranchFixupPass() { - return new AArch64BranchFixup(); -} - -bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - DEBUG(dbgs() << "***** AArch64BranchFixup ******"); - TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); - - // This pass invalidates liveness information when it splits basic blocks. - MF->getRegInfo().invalidateLiveness(); - - // Renumber all of the machine basic blocks in the function, guaranteeing that - // the numbers agree with the position of the block in the function. - MF->RenumberBlocks(); - - // Do the initial scan of the function, building up information about the - // sizes of each block and location of each immediate branch. - initializeFunctionInfo(); - - // Iteratively fix up branches until there is no change. - unsigned NoBRIters = 0; - bool MadeChange = false; - while (true) { - DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n'); - bool BRChange = false; - for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= fixupImmediateBr(ImmBranches[i]); - if (BRChange && ++NoBRIters > 30) - report_fatal_error("Branch Fix Up pass failed to converge!"); - DEBUG(dumpBBs()); - - if (!BRChange) - break; - MadeChange = true; - } - - // After a while, this might be made debug-only, but it is not expensive. - verify(); - - DEBUG(dbgs() << '\n'; dumpBBs()); - - BBInfo.clear(); - ImmBranches.clear(); - - return MadeChange; -} - -/// Return true if the specified basic block can fallthrough into the block -/// immediately after it. -static bool BBHasFallthrough(MachineBasicBlock *MBB) { - // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; - // Can't fall off end of function. - if (std::next(MBBI) == MBB->getParent()->end()) - return false; - - MachineBasicBlock *NextBB = std::next(MBBI); - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I == NextBB) - return true; - - return false; -} - -/// Do the initial scan of the function, building up information about the sizes -/// of each block, and each immediate branch. -void AArch64BranchFixup::initializeFunctionInfo() { - BBInfo.clear(); - BBInfo.resize(MF->getNumBlockIDs()); - - // First thing, compute the size of all basic blocks, and see if the function - // has any inline assembly in it. If so, we have to be conservative about - // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); - - // The known bits of the entry block offset are determined by the function - // alignment. - BBInfo.front().KnownBits = MF->getAlignment(); - - // Compute block offsets and known bits. - adjustBBOffsetsAfter(MF->begin()); - - // Now go back through the instructions and build up our data structures. - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - int Opc = I->getOpcode(); - if (I->isBranch()) { - bool IsCond = false; - - // The offsets encoded in instructions here scale by the instruction - // size (4 bytes), effectively increasing their range by 2 bits. - unsigned Bits = 0; - switch (Opc) { - default: - continue; // Ignore other JT branches - case AArch64::TBZxii: - case AArch64::TBZwii: - case AArch64::TBNZxii: - case AArch64::TBNZwii: - IsCond = true; - Bits = 14 + 2; - break; - case AArch64::Bcc: - case AArch64::CBZx: - case AArch64::CBZw: - case AArch64::CBNZx: - case AArch64::CBNZw: - IsCond = true; - Bits = 19 + 2; - break; - case AArch64::Bimm: - Bits = 26 + 2; - break; - } - - // Record this immediate branch. - ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); - } - } - } -} - -/// Compute the size and some alignment information for MBB. This function -/// updates BBInfo directly. -void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) { - BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; - BBI.Size = 0; - BBI.Unalign = 0; - - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - BBI.Size += TII->getInstSizeInBytes(*I); - // For inline asm, GetInstSizeInBytes returns a conservative estimate. - // The actual size may be smaller, but still a multiple of the instr size. - if (I->isInlineAsm()) - BBI.Unalign = 2; - } -} - -/// Return the current offset of the specified machine instruction from the -/// start of the function. This offset changes as stuff is moved around inside -/// the function. -unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const { - MachineBasicBlock *MBB = MI->getParent(); - - // The offset is composed of two things: the sum of the sizes of all MBB's - // before this instruction's block, and the offset from the start of the block - // it is in. - unsigned Offset = BBInfo[MBB->getNumber()].Offset; - - // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { - assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->getInstSizeInBytes(*I); - } - return Offset; -} - -/// Split the basic block containing MI into two blocks, which are joined by -/// an unconditional branch. Update data structures and renumber blocks to -/// account for this change and returns the newly created block. -MachineBasicBlock * -AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) { - MachineBasicBlock *OrigBB = MI->getParent(); - - // Create a new MBB for the code after the OrigBB. - MachineBasicBlock *NewBB = - MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; ++MBBI; - MF->insert(MBBI, NewBB); - - // Splice the instructions starting with MI over to NewBB. - NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); - - // Add an unconditional branch from OrigBB to NewBB. - // Note the new unconditional branch is not being recorded. - // There doesn't seem to be meaningful DebugInfo available; this doesn't - // correspond to anything in the source. - BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); - ++NumSplit; - - // Update the CFG. All succs of OrigBB are now succs of NewBB. - NewBB->transferSuccessors(OrigBB); - - // OrigBB branches to NewBB. - OrigBB->addSuccessor(NewBB); - - // Update internal data structures to account for the newly inserted MBB. - MF->RenumberBlocks(NewBB); - - // Insert an entry into BBInfo to align it properly with the (newly - // renumbered) block numbers. - BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); - - // Figure out how large the OrigBB is. As the first half of the original - // block, it cannot contain a tablejump. The size includes - // the new jump we added. (It should be possible to do this without - // recounting everything, but it's very confusing, and this is rarely - // executed.) - computeBlockSize(OrigBB); - - // Figure out how large the NewMBB is. As the second half of the original - // block, it may contain a tablejump. - computeBlockSize(NewBB); - - // All BBOffsets following these blocks must be modified. - adjustBBOffsetsAfter(OrigBB); - - return NewBB; -} - -void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) { - unsigned BBNum = BB->getNumber(); - for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { - // Get the offset and known bits at the end of the layout predecessor. - // Include the alignment of the current block. - unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); - unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); - unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); - - // This is where block i begins. Stop if the offset is already correct, - // and we have updated 2 blocks. This is the maximum number of blocks - // changed before calling this function. - if (i > BBNum + 2 && - BBInfo[i].Offset == Offset && - BBInfo[i].KnownBits == KnownBits) - break; - - BBInfo[i].Offset = Offset; - BBInfo[i].KnownBits = KnownBits; - } -} - -/// Returns true if the distance between specific MI and specific BB can fit in -/// MI's displacement field. -bool AArch64BranchFixup::isBBInRange(MachineInstr *MI, - MachineBasicBlock *DestBB, - unsigned OffsetBits) { - int64_t BrOffset = getOffsetOf(MI); - int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; - - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " bits available=" << OffsetBits - << " from " << getOffsetOf(MI) << " to " << DestOffset - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); - - return isIntN(OffsetBits, DestOffset - BrOffset); -} - -/// Fix up an immediate branch whose destination is too far away to fit in its -/// displacement field. -bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) { - MachineInstr *MI = Br.MI; - MachineBasicBlock *DestBB = nullptr; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).isMBB()) { - DestBB = MI->getOperand(i).getMBB(); - break; - } - } - assert(DestBB && "Branch with no destination BB?"); - - // Check to see if the DestBB is already in-range. - if (isBBInRange(MI, DestBB, Br.OffsetBits)) - return false; - - assert(Br.IsCond && "Only conditional branches should need fixup"); - return fixupConditionalBr(Br); -} - -/// Fix up a conditional branch whose destination is too far away to fit in its -/// displacement field. It is converted to an inverse conditional branch + an -/// unconditional branch to the destination. -bool -AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) { - MachineInstr *MI = Br.MI; - MachineBasicBlock *MBB = MI->getParent(); - unsigned CondBrMBBOperand = 0; - - // The general idea is to add an unconditional branch to the destination and - // invert the conditional branch to jump over it. Complications occur around - // fallthrough and unreachable ends to the block. - // b.lt L1 - // => - // b.ge L2 - // b L1 - // L2: - - // First we invert the conditional branch, by creating a replacement if - // necessary. This if statement contains all the special handling of different - // branch types. - if (MI->getOpcode() == AArch64::Bcc) { - // The basic block is operand number 1 for Bcc - CondBrMBBOperand = 1; - - A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); - CC = A64InvertCondCode(CC); - MI->getOperand(0).setImm(CC); - } else { - MachineInstrBuilder InvertedMI; - int InvertedOpcode; - switch (MI->getOpcode()) { - default: llvm_unreachable("Unknown branch type"); - case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; - case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; - case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; - case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; - case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; - case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; - case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; - case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; - } - - InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); - for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { - InvertedMI.addOperand(MI->getOperand(i)); - if (MI->getOperand(i).isMBB()) - CondBrMBBOperand = i; - } - - MI->eraseFromParent(); - MI = Br.MI = InvertedMI; - } - - // If the branch is at the end of its MBB and that has a fall-through block, - // direct the updated conditional branch to the fall-through - // block. Otherwise, split the MBB before the next instruction. - MachineInstr *BMI = &MBB->back(); - bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - - ++NumCBrFixed; - if (BMI != MI) { - if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) && - BMI->getOpcode() == AArch64::Bimm) { - // Last MI in the BB is an unconditional branch. We can swap destinations: - // b.eq L1 (temporarily b.ne L1 after first change) - // b L2 - // => - // b.ne L2 - // b L1 - MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (isBBInRange(MI, NewDest, Br.OffsetBits)) { - DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " - << *BMI); - MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); - BMI->getOperand(0).setMBB(DestBB); - MI->getOperand(CondBrMBBOperand).setMBB(NewDest); - return true; - } - } - } - - if (NeedSplit) { - MachineBasicBlock::iterator MBBI = MI; ++MBBI; - splitBlockBeforeInstr(MBBI); - // No need for the branch to the next block. We're adding an unconditional - // branch to the destination. - int delta = TII->getInstSizeInBytes(MBB->back()); - BBInfo[MBB->getNumber()].Size -= delta; - MBB->back().eraseFromParent(); - // BBInfo[SplitBB].Offset is wrong temporarily, fixed below - } - - // After splitting and removing the unconditional branch from the original BB, - // the structure is now: - // oldbb: - // [things] - // b.invertedCC L1 - // splitbb/fallthroughbb: - // [old b L2/real continuation] - // - // We now have to change the conditional branch to point to splitbb and add an - // unconditional branch after it to L1, giving the final structure: - // oldbb: - // [things] - // b.invertedCC splitbb - // b L1 - // splitbb/fallthroughbb: - // [old b L2/real continuation] - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); - - DEBUG(dbgs() << " Insert B to BB#" - << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() - << " also invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); - - // Insert a new unconditional branch and fixup the destination of the - // conditional one. Also update the ImmBranch as well as adding a new entry - // for the new branch. - BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) - .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); - MI->getOperand(CondBrMBBOperand).setMBB(NextBB); - - BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); - - // 26 bits written down in Bimm, specifying a multiple of 4. - unsigned OffsetBits = 26 + 2; - ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); - - adjustBBOffsetsAfter(MBB); - return true; -} diff --git a/llvm/lib/Target/AArch64/AArch64CallingConv.td b/llvm/lib/Target/AArch64/AArch64CallingConv.td deleted file mode 100644 index 9fe6aae2e32..00000000000 --- a/llvm/lib/Target/AArch64/AArch64CallingConv.td +++ /dev/null @@ -1,197 +0,0 @@ -//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This describes the calling conventions for AArch64 architecture. -//===----------------------------------------------------------------------===// - - -// The AArch64 Procedure Call Standard is unfortunately specified at a slightly -// higher level of abstraction than LLVM's target interface presents. In -// particular, it refers (like other ABIs, in fact) directly to -// structs. However, generic LLVM code takes the liberty of lowering structure -// arguments to the component fields before we see them. -// -// As a result, the obvious direct map from LLVM IR to PCS concepts can't be -// implemented, so the goals of this calling convention are, in decreasing -// priority order: -// 1. Expose *some* way to express the concepts required to implement the -// generic PCS from a front-end. -// 2. Provide a sane ABI for pure LLVM. -// 3. Follow the generic PCS as closely as is naturally possible. -// -// The suggested front-end implementation of PCS features is: -// * Integer, float and vector arguments of all sizes which end up in -// registers are passed and returned via the natural LLVM type. -// * Structure arguments with size <= 16 bytes are passed and returned in -// registers as similar integer or composite types. For example: -// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed). -// * HFAs in registers follow rules similar to small structs: appropriate -// composite types. -// * Structure arguments with size > 16 bytes are passed via a pointer, -// handled completely by the front-end. -// * Structure return values > 16 bytes via an sret pointer argument. -// * Other stack-based arguments (not large structs) are passed using byval -// pointers. Padding arguments are added beforehand to guarantee a large -// struct doesn't later use integer registers. -// -// N.b. this means that it is the front-end's responsibility (if it cares about -// PCS compliance) to check whether enough registers are available for an -// argument when deciding how to pass it. - -class CCIfAlign<int Align, CCAction A>: - CCIf<"ArgFlags.getOrigAlign() == " # Align, A>; - -def CC_A64_APCS : CallingConv<[ - // SRet is an LLVM-specific concept, so it takes precedence over general ABI - // concerns. However, this rule will be used by C/C++ frontends to implement - // structure return. - CCIfSRet<CCAssignToReg<[X8]>>, - - // Put ByVal arguments directly on the stack. Minimum size and alignment of a - // slot is 64-bit. - CCIfByVal<CCPassByVal<8, 8>>, - - // Canonicalise the various types that live in different floating-point - // registers. This makes sense because the PCS does not distinguish Short - // Vectors and Floating-point types. - CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>, - CCIfType<[v1i32, v4i8, v2i16], CCBitConvertToType<f32>>, - CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCBitConvertToType<f128>>, - - // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision - // Floating-point or Short Vector Type and the NSRN is less than 8, then the - // argument is allocated to the least significant bits of register - // v[NSRN]. The NSRN is incremented by one. The argument has now been - // allocated." - CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, - CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated - // SIMD and Floating-point registers (NSRN - number of elements < 8), then the - // argument is allocated to SIMD and Floating-point registers (with one - // register per element of the HFA). The NSRN is incremented by the number of - // registers used. The argument has now been allocated." - // - // N.b. As above, this rule is the responsibility of the front-end. - - // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of - // the argument is rounded up to the nearest multiple of 8 bytes." - // - // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short - // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural - // Alignment of the Argument's type." - // - // It is expected that these will be satisfied by adding dummy arguments to - // the prototype. - - // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point - // type then the size of the argument is set to 8 bytes. The effect is as if - // the argument had been copied to the least significant bits of a 64-bit - // register and the remaining bits filled with unspecified values." - CCIfType<[f16, f32], CCPromoteToType<f64>>, - - // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad- - // precision Floating-point or Short Vector Type, then the argument is copied - // to memory at the adjusted NSAA. The NSAA is incremented by the size of the - // argument. The argument has now been allocated." - CCIfType<[f64], CCAssignToStack<8, 8>>, - CCIfType<[f128], CCAssignToStack<16, 16>>, - - // PCS: "C.7: If the argument is an Integral Type, the size of the argument is - // less than or equal to 8 bytes and the NGRN is less than 8, the argument is - // copied to the least significant bits of x[NGRN]. The NGRN is incremented by - // one. The argument has now been allocated." - - // First we implement C.8 and C.9 (128-bit types get even registers). i128 is - // represented as two i64s, the first one being split. If we delayed this - // operation C.8 would never be reached. - CCIfType<[i64], - CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>, - - // Note: the promotion also implements C.14. - CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, - - // And now the real implementation of C.7 - CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, - - // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded - // up to the next even number." - // - // "C.9: If the argument is an Integral Type, the size of the argument is - // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] - // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the - // memory representation of the argument. The NGRN is incremented by two. The - // argument has now been allocated." - // - // Subtlety here: what if alignment is 16 but it is not an integral type? All - // floating-point types have been allocated already, which leaves composite - // types: this is why a front-end may need to produce i128 for a struct <= 16 - // bytes. - - // PCS: "C.10 If the argument is a Composite Type and the size in double-words - // of the argument is not more than 8 minus NGRN, then the argument is copied - // into consecutive general-purpose registers, starting at x[NGRN]. The - // argument is passed as though it had been loaded into the registers from a - // double-word aligned address with an appropriate sequence of LDR - // instructions loading consecutive registers from memory (the contents of any - // unused parts of the registers are unspecified by this standard). The NGRN - // is incremented by the number of registers used. The argument has now been - // allocated." - // - // Another one that's the responsibility of the front-end (sigh). - - // PCS: "C.11: The NGRN is set to 8." - CCCustom<"CC_AArch64NoMoreRegs">, - - // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural - // Alignment of the argument's type." - // - // PCS: "C.13: If the argument is a composite type then the argument is copied - // to memory at the adjusted NSAA. The NSAA is by the size of the - // argument. The argument has now been allocated." - // - // Note that the effect of this corresponds to a memcpy rather than register - // stores so that the struct ends up correctly addressable at the adjusted - // NSAA. - - // PCS: "C.14: If the size of the argument is less than 8 bytes then the size - // of the argument is set to 8 bytes. The effect is as if the argument was - // copied to the least significant bits of a 64-bit register and the remaining - // bits filled with unspecified values." - // - // Integer types were widened above. Floating-point and composite types have - // already been allocated completely. Nothing to do. - - // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA - // is incremented by the size of the argument. The argument has now been - // allocated." - CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, - CCIfType<[i64], CCAssignToStack<8, 8>> - -]>; - -// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits -// of vector registers (8-15) are callee-saved. The order here is is picked up -// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of -// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at -// [sp-16], ... -def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19), - (sequence "D%u", 15, 8))>; - - -// TLS descriptor calls are extremely restricted in their changes, to allow -// optimisations in the (hopefully) more common fast path where no real action -// is needed. They actually have to preserve all registers, except for the -// unavoidable X30 and the return register X0. -def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1), - (sequence "Q%u", 31, 0))>; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp deleted file mode 100644 index 972e6f7617b..00000000000 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ /dev/null @@ -1,626 +0,0 @@ -//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64FrameLowering.h" -#include "AArch64InstrInfo.h" -#include "AArch64MachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Function.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -void AArch64FrameLowering::splitSPAdjustments(uint64_t Total, - uint64_t &Initial, - uint64_t &Residual) const { - // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP - // instructions have a 7-bit signed immediate scaled by 8, giving a reach of - // 0x1f8, but stack adjustment should always be a multiple of 16. - if (Total <= 0x1f0) { - Initial = Total; - Residual = 0; - } else { - Initial = 0x1f0; - Residual = Total - Initial; - } -} - -void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo<AArch64MachineFunctionInfo>(); - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - bool NeedsFrameMoves = MMI.hasDebugInfo() - || MF.getFunction()->needsUnwindTableEntry(); - - uint64_t NumInitialBytes, NumResidualBytes; - - // Currently we expect the stack to be laid out by - // sub sp, sp, #initial - // stp x29, x30, [sp, #offset] - // ... - // str xxx, [sp, #offset] - // sub sp, sp, #rest (possibly via extra instructions). - if (MFI->getCalleeSavedInfo().size()) { - // If there are callee-saved registers, we want to store them efficiently as - // a block, and virtual base assignment happens too early to do it for us so - // we adjust the stack in two phases: first just for callee-saved fiddling, - // then to allocate the rest of the frame. - splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); - } else { - // If there aren't any callee-saved registers, two-phase adjustment is - // inefficient. It's more efficient to adjust with NumInitialBytes too - // because when we're in a "callee pops argument space" situation, that pop - // must be tacked onto Initial for correctness. - NumInitialBytes = MFI->getStackSize(); - NumResidualBytes = 0; - } - - // Tell everyone else how much adjustment we're expecting them to use. In - // particular if an adjustment is required for a tail call the epilogue could - // have a different view of things. - FuncInfo->setInitialStackAdjust(NumInitialBytes); - - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, - MachineInstr::FrameSetup); - - if (NeedsFrameMoves && NumInitialBytes) { - // We emit this update even if the CFA is set from a frame pointer later so - // that the CFA is valid in the interim. - MachineLocation Dst(MachineLocation::VirtualFP); - unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, -NumInitialBytes)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - - // Otherwise we need to set the frame pointer and/or add a second stack - // adjustment. - - bool FPNeedsSetting = hasFP(MF); - for (; MBBI != MBB.end(); ++MBBI) { - // Note that this search makes strong assumptions about the operation used - // to store the frame-pointer: it must be "STP x29, x30, ...". This could - // change in future, but until then there's no point in implementing - // untestable more generic cases. - if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR - && MBBI->getOperand(0).getReg() == AArch64::X29) { - int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); - FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); - - ++MBBI; - emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, - AArch64::X29, - NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), - MachineInstr::FrameSetup); - - // The offset adjustment used when emitting debugging locations relative - // to whatever frame base is set. AArch64 uses the default frame base (FP - // or SP) and this adjusts the calculations to be correct. - MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) - - MFI->getStackSize()); - - if (NeedsFrameMoves) { - unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true); - unsigned Offset = MFI->getObjectOffset(X29FrameIdx); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - - FPNeedsSetting = false; - } - - if (!MBBI->getFlag(MachineInstr::FrameSetup)) - break; - } - - assert(!FPNeedsSetting && "Frame pointer couldn't be set"); - - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, - MachineInstr::FrameSetup); - - // Now we emit the rest of the frame setup information, if necessary: we've - // already noted the FP and initial SP moves so we're left with the prologue's - // final SP update and callee-saved register locations. - if (!NeedsFrameMoves) - return; - - // The rest of the stack adjustment - if (!hasFP(MF) && NumResidualBytes) { - MachineLocation Dst(MachineLocation::VirtualFP); - unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); - unsigned Offset = NumResidualBytes + NumInitialBytes; - unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - - // And any callee-saved registers (it's fine to leave them to the end here, - // because the old values are still valid at this point. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - if (CSI.size()) { - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(nullptr, Reg, Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } -} - -void -AArch64FrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo<AArch64MachineFunctionInfo>(); - - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - DebugLoc DL = MBBI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned RetOpcode = MBBI->getOpcode(); - - // Initial and residual are named for consitency with the prologue. Note that - // in the epilogue, the residual adjustment is executed first. - uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); - uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; - uint64_t ArgumentPopSize = 0; - if (RetOpcode == AArch64::TC_RETURNdi || - RetOpcode == AArch64::TC_RETURNxi) { - MachineOperand &JumpTarget = MBBI->getOperand(0); - MachineOperand &StackAdjust = MBBI->getOperand(1); - - MachineInstrBuilder MIB; - if (RetOpcode == AArch64::TC_RETURNdi) { - MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); - if (JumpTarget.isGlobal()) { - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - } else { - assert(JumpTarget.isSymbol() && "unexpected tail call destination"); - MIB.addExternalSymbol(JumpTarget.getSymbolName(), - JumpTarget.getTargetFlags()); - } - } else { - assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() - && "Unexpected tail call"); - - MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); - MIB.addReg(JumpTarget.getReg(), RegState::Kill); - } - - // Add the extra operands onto the new tail call instruction even though - // they're not used directly (so that liveness is tracked properly etc). - for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) - MIB->addOperand(MBBI->getOperand(i)); - - - // Delete the pseudo instruction TC_RETURN. - MachineInstr *NewMI = std::prev(MBBI); - MBB.erase(MBBI); - MBBI = NewMI; - - // For a tail-call in a callee-pops-arguments environment, some or all of - // the stack may actually be in use for the call's arguments, this is - // calculated during LowerCall and consumed here... - ArgumentPopSize = StackAdjust.getImm(); - } else { - // ... otherwise the amount to pop is *all* of the argument space, - // conveniently stored in the MachineFunctionInfo by - // LowerFormalArguments. This will, of course, be zero for the C calling - // convention. - ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); - } - - assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 - && "refusing to adjust stack by misaligned amt"); - - // We may need to address callee-saved registers differently, so find out the - // bound on the frame indices. - const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The "residual" stack update comes first from this direction and guarantees - // that SP is NumInitialBytes below its value on function entry, either by a - // direct update or restoring it from the frame pointer. - if (NumInitialBytes + ArgumentPopSize != 0) { - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, - NumInitialBytes + ArgumentPopSize); - --MBBI; - } - - - // MBBI now points to the instruction just past the last callee-saved - // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" - // otherwise). - - // Now we need to find out where to put the bulk of the stack adjustment - MachineBasicBlock::iterator FirstEpilogue = MBBI; - while (MBBI != MBB.begin()) { - --MBBI; - - unsigned FrameOp; - for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { - if (MBBI->getOperand(FrameOp).isFI()) - break; - } - - // If this instruction doesn't have a frame index we've reached the end of - // the callee-save restoration. - if (FrameOp == MBBI->getNumOperands()) - break; - - // Likewise if it *is* a local reference, but not to a callee-saved object. - int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); - if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) - break; - - FirstEpilogue = MBBI; - } - - if (MF.getFrameInfo()->hasVarSizedObjects()) { - int64_t StaticFrameBase; - StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); - emitRegUpdate(MBB, FirstEpilogue, DL, TII, - AArch64::XSP, AArch64::X29, AArch64::NoRegister, - StaticFrameBase); - } else { - emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); - } -} - -int64_t -AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, - int FrameIndex, - unsigned &FrameReg, - int SPAdj, - bool IsCalleeSaveOp) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo<AArch64MachineFunctionInfo>(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex); - - assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0) - && "callee-saved register in unexpected place"); - - // If the frame for this function is particularly large, we adjust the stack - // in two phases which means the callee-save related operations see a - // different (intermediate) stack size. - int64_t FrameRegPos; - if (IsCalleeSaveOp) { - FrameReg = AArch64::XSP; - FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust()); - } else if (useFPForAddressing(MF)) { - // Have to use the frame pointer since we have no idea where SP is. - FrameReg = AArch64::X29; - FrameRegPos = FuncInfo->getFramePointerOffset(); - } else { - FrameReg = AArch64::XSP; - FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj; - } - - return TopOfFrameOffset - FrameRegPos; -} - -void -AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - const AArch64RegisterInfo *RegInfo = - static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo()); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const AArch64InstrInfo &TII = - *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); - - if (hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(AArch64::X29); - MF.getRegInfo().setPhysRegUsed(AArch64::X30); - } - - // If addressing of local variables is going to be more complicated than - // shoving a base register and an offset into the instruction then we may well - // need to scavenge registers. We should either specifically add an - // callee-save register for this purpose or allocate an extra spill slot. - bool BigStack = - MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF) - || MFI->hasVarSizedObjects() // Access will be from X29: messes things up - || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); - - if (!BigStack) - return; - - // We certainly need some slack space for the scavenger, preferably an extra - // register. - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(); - MCPhysReg ExtraReg = AArch64::NoRegister; - - for (unsigned i = 0; CSRegs[i]; ++i) { - if (AArch64::GPR64RegClass.contains(CSRegs[i]) && - !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) { - ExtraReg = CSRegs[i]; - break; - } - } - - if (ExtraReg != 0) { - MF.getRegInfo().setPhysRegUsed(ExtraReg); - } else { - assert(RS && "Expect register scavenger to be available"); - - // Create a stack slot for scavenging purposes. PrologEpilogInserter - // helpfully places it near either SP or FP for us to avoid - // infinitely-regression during scavenging. - const TargetRegisterClass *RC = &AArch64::GPR64RegClass; - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } -} - -bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB, - unsigned Reg) const { - // If @llvm.returnaddress is called then it will refer to X30 by some means; - // the prologue store does not kill the register. - if (Reg == AArch64::X30) { - if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken() - && MBB.getParent()->getRegInfo().isLiveIn(Reg)) - return false; - } - - // In all other cases, physical registers are dead after they've been saved - // but live at the beginning of the prologue block. - MBB.addLiveIn(Reg); - return true; -} - -void -AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI, - const LoadStoreMethod PossClasses[], - unsigned NumClasses) const { - DebugLoc DL = MBB.findDebugLoc(MBBI); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - - // A certain amount of implicit contract is present here. The actual stack - // offsets haven't been allocated officially yet, so for strictly correct code - // we rely on the fact that the elements of CSI are allocated in order - // starting at SP, purely as dictated by size and alignment. In practice since - // this function handles the only accesses to those slots it's not quite so - // important. - // - // We have also ordered the Callee-saved register list in AArch64CallingConv - // so that the above scheme puts registers in order: in particular we want - // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) - for (unsigned i = 0, e = CSI.size(); i < e; ++i) { - unsigned Reg = CSI[i].getReg(); - - // First we need to find out which register class the register belongs to so - // that we can use the correct load/store instrucitons. - unsigned ClassIdx; - for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { - if (PossClasses[ClassIdx].RegClass->contains(Reg)) - break; - } - assert(ClassIdx != NumClasses - && "Asked to store register in unexpected class"); - const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; - - // Now we need to decide whether it's possible to emit a paired instruction: - // for this we want the next register to be in the same class. - MachineInstrBuilder NewMI; - bool Pair = false; - if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { - Pair = true; - unsigned StLow = 0, StHigh = 0; - if (isPrologue) { - // Most of these registers will be live-in to the MBB and killed by our - // store, though there are exceptions (see determinePrologueDeath). - StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); - StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); - } else { - StLow = RegState::Define; - StHigh = RegState::Define; - } - - NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) - .addReg(CSI[i+1].getReg(), StLow) - .addReg(CSI[i].getReg(), StHigh); - - // If it's a paired op, we've consumed two registers - ++i; - } else { - unsigned State; - if (isPrologue) { - State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); - } else { - State = RegState::Define; - } - - NewMI = BuildMI(MBB, MBBI, DL, - TII.get(PossClasses[ClassIdx].SingleOpcode)) - .addReg(CSI[i].getReg(), State); - } - - // Note that the FrameIdx refers to the second register in a pair: it will - // be allocated the smaller numeric address and so is the one an LDP/STP - // address must use. - int FrameIdx = CSI[i].getFrameIdx(); - MachineMemOperand::MemOperandFlags Flags; - Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - Flags, - Pair ? TheClass.getSize() * 2 : TheClass.getSize(), - MFI.getObjectAlignment(FrameIdx)); - - NewMI.addFrameIndex(FrameIdx) - .addImm(0) // address-register offset - .addMemOperand(MMO); - - if (isPrologue) - NewMI.setMIFlags(MachineInstr::FrameSetup); - - // For aesthetic reasons, during an epilogue we want to emit complementary - // operations to the prologue, but in the opposite order. So we still - // iterate through the CalleeSavedInfo list in order, but we put the - // instructions successively earlier in the MBB. - if (!isPrologue) - --MBBI; - } -} - -bool -AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - static const LoadStoreMethod PossibleClasses[] = { - {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, - {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, - }; - const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); - - emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, - PossibleClasses, NumClasses); - - return true; -} - -bool -AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - - if (CSI.empty()) - return false; - - static const LoadStoreMethod PossibleClasses[] = { - {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, - {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, - }; - const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); - - emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, - PossibleClasses, NumClasses); - - return true; -} - -bool -AArch64FrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); - - // This is a decision of ABI compliance. The AArch64 PCS gives various options - // for conformance, and even at the most stringent level more or less permits - // elimination for leaf functions because there's no loss of functionality - // (for debugging etc).. - if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls()) - return true; - - // The following are hard-limits: incorrect code will be generated if we try - // to omit the frame. - return (RI->needsStackRealignment(MF) || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); -} - -bool -AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const { - return MF.getFrameInfo()->hasVarSizedObjects(); -} - -bool -AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Of the various reasons for having a frame pointer, it's actually only - // variable-sized objects that prevent reservation of a call frame. - return !(hasFP(MF) && MFI->hasVarSizedObjects()); -} - -void -AArch64FrameLowering::eliminateCallFramePseudoInstr( - MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { - const AArch64InstrInfo &TII = - *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); - DebugLoc dl = MI->getDebugLoc(); - int Opcode = MI->getOpcode(); - bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); - uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; - - if (!hasReservedCallFrame(MF)) { - unsigned Align = getStackAlignment(); - - int64_t Amount = MI->getOperand(0).getImm(); - Amount = RoundUpToAlignment(Amount, Align); - if (!IsDestroy) Amount = -Amount; - - // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it - // doesn't have to pop anything), then the first operand will be zero too so - // this adjustment is a no-op. - if (CalleePopAmount == 0) { - // FIXME: in-function stack adjustment for calls is limited to 12-bits - // because there's no guaranteed temporary register available. Mostly call - // frames will be allocated at the start of a function so this is OK, but - // it is a limitation that needs dealing with. - assert(Amount > -0xfff && Amount < 0xfff && "call frame too large"); - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); - } - } else if (CalleePopAmount != 0) { - // If the calling convention demands that the callee pops arguments from the - // stack, we want to add it back if we have a reserved call frame. - assert(CalleePopAmount < 0xfff && "call frame too large"); - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); - } - - MBB.erase(MI); -} diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h deleted file mode 100644 index 6ec27e3104f..00000000000 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ /dev/null @@ -1,108 +0,0 @@ -//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class implements the AArch64-specific parts of the TargetFrameLowering -// class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_FRAMEINFO_H -#define LLVM_AARCH64_FRAMEINFO_H - -#include "AArch64Subtarget.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { -class AArch64Subtarget; - -class AArch64FrameLowering : public TargetFrameLowering { -private: - // In order to unify the spilling and restoring of callee-saved registers into - // emitFrameMemOps, we need to be able to specify which instructions to use - // for the relevant memory operations on each register class. An array of the - // following struct is populated and passed in to achieve this. - struct LoadStoreMethod { - const TargetRegisterClass *RegClass; // E.g. GPR64RegClass - - // The preferred instruction. - unsigned PairOpcode; // E.g. LSPair64_STR - - // Sometimes only a single register can be handled at once. - unsigned SingleOpcode; // E.g. LS64_STR - }; -protected: - const AArch64Subtarget &STI; - -public: - explicit AArch64FrameLowering(const AArch64Subtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16), - STI(sti) { - } - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const override; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - - /// Decides how much stack adjustment to perform in each phase of the prologue - /// and epilogue. - void splitSPAdjustments(uint64_t Total, uint64_t &Initial, - uint64_t &Residual) const; - - int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex, - unsigned &FrameReg, int SPAdj, - bool IsCalleeSaveOp) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; - - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const override; - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const override; - - void - eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const override; - - /// If the register is X30 (i.e. LR) and the return address is used in the - /// function then the callee-save store doesn't actually kill the register, - /// otherwise it does. - bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const; - - /// This function emits the loads or stores required during prologue and - /// epilogue as efficiently as possible. - /// - /// The operations involved in setting up and tearing down the frame are - /// similar enough to warrant a shared function, particularly as discrepancies - /// between the two would be disastrous. - void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI, - const LoadStoreMethod PossibleClasses[], - unsigned NumClasses) const; - - - bool hasFP(const MachineFunction &MF) const override; - - bool useFPForAddressing(const MachineFunction &MF) const; - - /// On AA - bool hasReservedCallFrame(const MachineFunction &MF) const override; - -}; - -} // End llvm namespace - -#endif diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp deleted file mode 100644 index d1d89af6e04..00000000000 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ /dev/null @@ -1,1576 +0,0 @@ -//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the AArch64 target. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/APSInt.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-isel" - -//===--------------------------------------------------------------------===// -/// AArch64 specific code to select AArch64 machine instructions for -/// SelectionDAG operations. -/// -namespace { - -class AArch64DAGToDAGISel : public SelectionDAGISel { - AArch64TargetMachine &TM; - - /// Keep a pointer to the AArch64Subtarget around so that we can - /// make the right decision when generating code for different targets. - const AArch64Subtarget *Subtarget; - -public: - explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, - CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { - } - - const char *getPassName() const override { - return "AArch64 Instruction Selection"; - } - - // Include the pieces autogenerated from the target description. -#include "AArch64GenDAGISel.inc" - - template<unsigned MemSize> - bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); - if (!CN || CN->getZExtValue() % MemSize != 0 - || CN->getZExtValue() / MemSize > 0xfff) - return false; - - UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); - return true; - } - - template<unsigned RegWidth> - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { - return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); - } - - /// Used for pre-lowered address-reference nodes, so we already know - /// the fields match. This operand's job is simply to add an - /// appropriate shift operand to the MOVZ/MOVK instruction. - template<unsigned LogShift> - bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { - Imm = N; - Shift = CurDAG->getTargetConstant(LogShift, MVT::i32); - return true; - } - - bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); - - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth); - - bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps) override; - - bool SelectLogicalImm(SDValue N, SDValue &Imm); - - template<unsigned RegWidth> - bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { - return SelectTSTBOperand(N, FixedPos, RegWidth); - } - - bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); - - SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, - unsigned Op64); - - /// Put the given constant into a pool and return a DAG which will give its - /// address. - SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV); - - SDNode *TrySelectToMoveImm(SDNode *N); - SDNode *LowerToFPLitPool(SDNode *Node); - SDNode *SelectToLitPool(SDNode *N); - - SDNode* Select(SDNode*) override; -private: - /// Get the opcode for table lookup instruction - unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec); - - /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4. - /// IsExt is to indicate if the result will be extended with an argument. - SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt); - - /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcode); - - /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); - - /// Form sequences of consecutive 64/128-bit registers for use in NEON - /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have - /// between 1 and 4 elements. If it contains a single element that is returned - /// unchanged; otherwise a REG_SEQUENCE value is returned. - SDValue createDTuple(ArrayRef<SDValue> Vecs); - SDValue createQTuple(ArrayRef<SDValue> Vecs); - - /// Generic helper for the createDTuple/createQTuple - /// functions. Those should almost always be called instead. - SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[], - unsigned SubRegs[]); - - /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4. - /// The opcode array specifies the instructions used for load. - SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); - - /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4. - /// The opcode arrays specify the instructions used for load/store. - SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, - unsigned NumVecs, const uint16_t *Opcodes); - - SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, - SDValue Operand); -}; -} - -bool -AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth) { - const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); - if (!CN) return false; - - // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits - // is between 1 and 32 for a destination w-register, or 1 and 64 for an - // x-register. - // - // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we - // want THIS_NODE to be 2^fbits. This is much easier to deal with using - // integers. - bool IsExact; - - // fbits is between 1 and 64 in the worst-case, which means the fmul - // could have 2^64 as an actual operand. Need 65 bits of precision. - APSInt IntVal(65, true); - CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); - - // N.b. isPowerOf2 also checks for > 0. - if (!IsExact || !IntVal.isPowerOf2()) return false; - unsigned FBits = IntVal.logBase2(); - - // Checks above should have guaranteed that we haven't lost information in - // finding FBits, but it must still be in range. - if (FBits == 0 || FBits > RegWidth) return false; - - FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); - return true; -} - -bool -AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps) { - switch (ConstraintCode) { - default: llvm_unreachable("Unrecognised AArch64 memory constraint"); - case 'm': - // FIXME: more freedom is actually permitted for 'm'. We can go - // hunting for a base and an offset if we want. Of course, since - // we don't really know how the operand is going to be used we're - // probably restricted to the load/store pair's simm7 as an offset - // range anyway. - case 'Q': - OutOps.push_back(Op); - } - - return false; -} - -bool -AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { - ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N); - if (!Imm || !Imm->getValueAPF().isPosZero()) - return false; - - // Doesn't actually carry any information, but keeps TableGen quiet. - Dummy = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} - -bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { - uint32_t Bits; - uint32_t RegWidth = N.getValueType().getSizeInBits(); - - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); - if (!CN) return false; - - if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) - return false; - - Imm = CurDAG->getTargetConstant(Bits, MVT::i32); - return true; -} - -SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { - SDNode *ResNode; - SDLoc dl(Node); - EVT DestType = Node->getValueType(0); - unsigned DestWidth = DestType.getSizeInBits(); - - unsigned MOVOpcode; - EVT MOVType; - int UImm16, Shift; - uint32_t LogicalBits; - - uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue(); - if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { - MOVType = DestType; - MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; - } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { - MOVType = DestType; - MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; - } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { - // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can - // use a 32-bit instruction: "movn w0, 0xedbc". - MOVType = MVT::i32; - MOVOpcode = AArch64::MOVNwii; - } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { - MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; - uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; - - return CurDAG->getMachineNode(MOVOpcode, dl, DestType, - CurDAG->getRegister(ZR, DestType), - CurDAG->getTargetConstant(LogicalBits, MVT::i32)); - } else { - // Can't handle it in one instruction. There's scope for permitting two (or - // more) instructions, but that'll need more thought. - return nullptr; - } - - ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, - CurDAG->getTargetConstant(UImm16, MVT::i32), - CurDAG->getTargetConstant(Shift, MVT::i32)); - - if (MOVType != DestType) { - ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - MVT::i64, MVT::i32, MVT::Other, - CurDAG->getTargetConstant(0, MVT::i64), - SDValue(ResNode, 0), - CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); - } - - return ResNode; -} - -SDValue -AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL, - const Constant *CV) { - EVT PtrVT = getTargetLowering()->getPointerTy(); - - switch (getTargetLowering()->getTargetMachine().getCodeModel()) { - case CodeModel::Small: { - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); - return CurDAG->getNode( - AArch64ISD::WrapperSmall, DL, PtrVT, - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12), - CurDAG->getConstant(Alignment, MVT::i32)); - } - case CodeModel::Large: { - SDNode *LitAddr; - LitAddr = CurDAG->getMachineNode( - AArch64::MOVZxii, DL, PtrVT, - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3), - CurDAG->getTargetConstant(3, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), - CurDAG->getTargetConstant(2, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), - CurDAG->getTargetConstant(1, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC), - CurDAG->getTargetConstant(0, MVT::i32)); - return SDValue(LitAddr, 0); - } - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { - SDLoc DL(Node); - uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue(); - int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue(); - EVT DestType = Node->getValueType(0); - - // Since we may end up loading a 64-bit constant from a 32-bit entry the - // constant in the pool may have a different type to the eventual node. - ISD::LoadExtType Extension; - EVT MemType; - - assert((DestType == MVT::i64 || DestType == MVT::i32) - && "Only expect integer constants at the moment"); - - if (DestType == MVT::i32) { - Extension = ISD::NON_EXTLOAD; - MemType = MVT::i32; - } else if (UnsignedVal <= UINT32_MAX) { - Extension = ISD::ZEXTLOAD; - MemType = MVT::i32; - } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { - Extension = ISD::SEXTLOAD; - MemType = MVT::i32; - } else { - Extension = ISD::NON_EXTLOAD; - MemType = MVT::i64; - } - - Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), - MemType.getSizeInBits()), - UnsignedVal); - SDValue PoolAddr = getConstantPoolItemAddress(DL, CV); - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); - - return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), - PoolAddr, - MachinePointerInfo::getConstantPool(), MemType, - /* isVolatile = */ false, - /* isNonTemporal = */ false, - Alignment).getNode(); -} - -SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { - SDLoc DL(Node); - const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue(); - EVT DestType = Node->getValueType(0); - - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType()); - SDValue PoolAddr = getConstantPoolItemAddress(DL, FV); - - return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /* isVolatile = */ false, - /* isNonTemporal = */ false, - /* isInvariant = */ true, - Alignment).getNode(); -} - -bool -AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth) { - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); - if (!CN) return false; - - uint64_t Val = CN->getZExtValue(); - - if (!isPowerOf2_64(Val)) return false; - - unsigned TestedBit = Log2_64(Val); - // Checks above should have guaranteed that we haven't lost information in - // finding TestedBit, but it must still be in range. - if (TestedBit >= RegWidth) return false; - - FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); - return true; -} - -SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16,unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast<AtomicSDNode>(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) - Op = Op64; - else - llvm_unreachable("Unexpected atomic operation"); - - SmallVector<SDValue, 4> Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); - - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end - - return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, Ops); -} - -SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { - static unsigned RegClassIDs[] = { AArch64::DPairRegClassID, - AArch64::DTripleRegClassID, - AArch64::DQuadRegClassID }; - static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1, - AArch64::dsub_2, AArch64::dsub_3 }; - - return createTuple(Regs, RegClassIDs, SubRegs); -} - -SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { - static unsigned RegClassIDs[] = { AArch64::QPairRegClassID, - AArch64::QTripleRegClassID, - AArch64::QQuadRegClassID }; - static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1, - AArch64::qsub_2, AArch64::qsub_3 }; - - return createTuple(Regs, RegClassIDs, SubRegs); -} - -SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, - unsigned RegClassIDs[], - unsigned SubRegs[]) { - // There's no special register-class for a vector-list of 1 element: it's just - // a vector. - if (Regs.size() == 1) - return Regs[0]; - - assert(Regs.size() >= 2 && Regs.size() <= 4); - - SDLoc DL(Regs[0].getNode()); - - SmallVector<SDValue, 4> Ops; - - // First operand of REG_SEQUENCE is the desired RegClass. - Ops.push_back( - CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); - - // Then we get pairs of source & subregister-position for the components. - for (unsigned i = 0; i < Regs.size(); ++i) { - Ops.push_back(Regs[i]); - Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); - } - - SDNode *N = - CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); - return SDValue(N, 0); -} - - -// Get the register stride update opcode of a VLD/VST instruction that -// is otherwise equivalent to the given fixed stride updating instruction. -static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { - switch (Opc) { - default: break; - case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register; - case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register; - case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register; - case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register; - case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register; - case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register; - case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register; - case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register; - - case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register; - case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register; - case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register; - case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register; - case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register; - case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register; - case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register; - - case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register; - case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register; - case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register; - case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register; - case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register; - case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register; - case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register; - - case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register; - case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register; - case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register; - case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register; - case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register; - case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register; - case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register; - - case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register; - case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register; - case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register; - case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register; - case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register; - case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register; - case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register; - case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register; - - case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register; - case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register; - case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register; - case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register; - case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register; - case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register; - case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register; - case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register; - - case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register; - case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register; - case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register; - case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register; - case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register; - case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register; - case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register; - case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register; - - case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register; - case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register; - case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register; - case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register; - case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register; - case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register; - case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register; - case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register; - - case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register; - case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register; - case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register; - case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register; - case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register; - case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register; - case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register; - - case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register; - case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register; - case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register; - case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register; - case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register; - case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register; - case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register; - - case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register; - case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register; - case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register; - case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register; - case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register; - case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register; - case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register; - - case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register; - case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register; - case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register; - case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register; - case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register; - case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register; - case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register; - case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register; - - case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register; - case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register; - case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register; - case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register; - case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register; - case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register; - case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register; - case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register; - - case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register; - case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register; - case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register; - case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register; - case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register; - case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register; - case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register; - case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register; - - // Post-index of duplicate loads - case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register; - case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register; - case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register; - case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register; - case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register; - case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register; - case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register; - case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register; - - case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register; - case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register; - case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register; - case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register; - case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register; - case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register; - case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register; - case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register; - - case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register; - case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register; - case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register; - case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register; - case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register; - case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register; - case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register; - case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register; - - // Post-index of lane loads - case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register; - case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register; - case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register; - case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register; - - case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register; - case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register; - case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register; - case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register; - - case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register; - case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register; - case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register; - case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register; - - // Post-index of lane stores - case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register; - case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register; - case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register; - case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register; - - case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register; - case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register; - case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register; - case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register; - - case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register; - case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register; - case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register; - case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register; - } - return Opc; // If not one we handle, return it unchanged. -} - -SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); - - EVT VT = N->getValueType(0); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector load type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SmallVector<SDValue, 2> Ops; - unsigned AddrOpIdx = isUpdating ? 1 : 2; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address - - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - - Ops.push_back(N->getOperand(0)); // Push back the Chain - - SmallVector<EVT, 3> ResTys; - // Push back the type of return super register - if (NumVecs == 1) - ResTys.push_back(VT); - else if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of the Chain - SDLoc dl(N); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); - - if (NumVecs == 1) - return VLd; - - // If NumVecs > 1, the return result is a super register containing 2-4 - // consecutive vector registers. - SDValue SuperReg = SDValue(VLd, 0); - - unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); - // Update users of the Chain - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); - - return nullptr; -} - -SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); - SDLoc dl(N); - - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - - unsigned AddrOpIdx = isUpdating ? 1 : 2; - unsigned Vec0Idx = 3; - EVT VT = N->getOperand(Vec0Idx).getValueType(); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector store type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SmallVector<EVT, 2> ResTys; - if (isUpdating) - ResTys.push_back(MVT::i64); - ResTys.push_back(MVT::Other); // Type for the Chain - - SmallVector<SDValue, 6> Ops; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address - - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - - SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs); - Ops.push_back(SrcReg); - - // Push back the Chain - Ops.push_back(N->getOperand(0)); - - // Transfer memoperands. - SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); - - return VSt; -} - -SDValue -AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, - SDValue Operand) { - SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, - VT, VTD, MVT::Other, - CurDAG->getTargetConstant(0, MVT::i64), - Operand, - CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32)); - return SDValue(Reg, 0); -} - -SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range"); - SDLoc dl(N); - - EVT VT = N->getValueType(0); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector duplicate lane load type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SDValue SuperReg; - SmallVector<SDValue, 6> Ops; - Ops.push_back(N->getOperand(1)); // Push back the Memory Address - if (isUpdating) { - SDValue Inc = N->getOperand(2); - if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - Ops.push_back(N->getOperand(0)); // Push back the Chain - - SmallVector<EVT, 3> ResTys; - // Push back the type of return super register - if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of the Chain - SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); - - SuperReg = SDValue(VLdDup, 0); - unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; - // Update uses of each registers in super register - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); - // Update uses of the Chain - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); - return nullptr; -} - -// We only have 128-bit vector type of load/store lane instructions. -// If it is 64-bit vector, we also select it to the 128-bit instructions. -// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and -// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output. -SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, - bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); - SDLoc dl(N); - unsigned AddrOpIdx = isUpdating ? 1 : 2; - unsigned Vec0Idx = 3; - - SDValue Chain = N->getOperand(0); - unsigned Lane = - cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); - EVT VT = N->getOperand(Vec0Idx).getValueType(); - bool is64BitVector = VT.is64BitVector(); - EVT VT64; // 64-bit Vector Type - - if (is64BitVector) { - VT64 = VT; - VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(), - VT.getVectorNumElements() * 2); - } - - unsigned OpcodeIndex; - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = 0; break; - case 16: OpcodeIndex = 1; break; - case 32: OpcodeIndex = 2; break; - case 64: OpcodeIndex = 3; break; - default: llvm_unreachable("unhandled vector lane load/store type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SmallVector<EVT, 3> ResTys; - if (IsLoad) { - // Push back the type of return super register - if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - } - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of Chain - SmallVector<SDValue, 5> Ops; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - - SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - if (is64BitVector) - for (unsigned i = 0; i < Regs.size(); i++) - Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]); - SDValue SuperReg = createQTuple(Regs); - - Ops.push_back(SuperReg); // Source Reg - SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32); - Ops.push_back(LaneValue); - Ops.push_back(Chain); // Push back the Chain - - SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); - if (!IsLoad) - return VLdLn; - - // Extract the subregisters. - SuperReg = SDValue(VLdLn, 0); - unsigned Sub0 = AArch64::qsub_0; - // Update uses of each registers in super register - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg); - if (is64BitVector) { - SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0); - } - ReplaceUses(SDValue(N, Vec), SUB0); - } - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); - return nullptr; -} - -unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit, - unsigned NumOfVec) { - assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range"); - - unsigned Opc = 0; - switch (NumOfVec) { - default: - break; - case 1: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b; - else - Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b; - break; - case 2: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b; - else - Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b; - break; - case 3: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b; - else - Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b; - break; - case 4: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b; - else - Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b; - break; - } - - return Opc; -} - -SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs, - bool IsExt) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); - SDLoc dl(N); - - // Check the element of look up table is 64-bit or not - unsigned Vec0Idx = IsExt ? 2 : 1; - assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() && - "The element of lookup table for vtbl and vtbx must be 128-bit"); - - // Check the return value type is 64-bit or not - EVT ResVT = N->getValueType(0); - bool is64BitRes = ResVT.is64BitVector(); - - // Create new SDValue for vector list - SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - SDValue TblReg = createQTuple(Regs); - unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs); - - SmallVector<SDValue, 3> Ops; - if (IsExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(TblReg); - Ops.push_back(N->getOperand(Vec0Idx + NumVecs)); - return CurDAG->getMachineNode(Opc, dl, ResVT, Ops); -} - -SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { - // Dump information about the Node being selected - DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); - - if (Node->isMachineOpcode()) { - DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); - Node->setNodeId(-1); - return nullptr; - } - - switch (Node->getOpcode()) { - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_ADD_I8, - AArch64::ATOMIC_LOAD_ADD_I16, - AArch64::ATOMIC_LOAD_ADD_I32, - AArch64::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_SUB_I8, - AArch64::ATOMIC_LOAD_SUB_I16, - AArch64::ATOMIC_LOAD_SUB_I32, - AArch64::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_AND_I8, - AArch64::ATOMIC_LOAD_AND_I16, - AArch64::ATOMIC_LOAD_AND_I32, - AArch64::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_OR_I8, - AArch64::ATOMIC_LOAD_OR_I16, - AArch64::ATOMIC_LOAD_OR_I32, - AArch64::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_XOR_I8, - AArch64::ATOMIC_LOAD_XOR_I16, - AArch64::ATOMIC_LOAD_XOR_I32, - AArch64::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_NAND_I8, - AArch64::ATOMIC_LOAD_NAND_I16, - AArch64::ATOMIC_LOAD_NAND_I32, - AArch64::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_MIN_I8, - AArch64::ATOMIC_LOAD_MIN_I16, - AArch64::ATOMIC_LOAD_MIN_I32, - AArch64::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_MAX_I8, - AArch64::ATOMIC_LOAD_MAX_I16, - AArch64::ATOMIC_LOAD_MAX_I32, - AArch64::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_UMIN_I8, - AArch64::ATOMIC_LOAD_UMIN_I16, - AArch64::ATOMIC_LOAD_UMIN_I32, - AArch64::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_UMAX_I8, - AArch64::ATOMIC_LOAD_UMAX_I16, - AArch64::ATOMIC_LOAD_UMAX_I32, - AArch64::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(Node, - AArch64::ATOMIC_SWAP_I8, - AArch64::ATOMIC_SWAP_I16, - AArch64::ATOMIC_SWAP_I32, - AArch64::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(Node, - AArch64::ATOMIC_CMP_SWAP_I8, - AArch64::ATOMIC_CMP_SWAP_I16, - AArch64::ATOMIC_CMP_SWAP_I32, - AArch64::ATOMIC_CMP_SWAP_I64); - case ISD::FrameIndex: { - int FI = cast<FrameIndexSDNode>(Node)->getIndex(); - EVT PtrTy = getTargetLowering()->getPointerTy(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); - return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, - TFI, CurDAG->getTargetConstant(0, PtrTy)); - } - case ISD::Constant: { - SDNode *ResNode = nullptr; - if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) { - // XZR and WZR are probably even better than an actual move: most of the - // time they can be folded into another instruction with *no* cost. - - EVT Ty = Node->getValueType(0); - assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); - uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; - ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - SDLoc(Node), - Register, Ty).getNode(); - } - - // Next best option is a move-immediate, see if we can do that. - if (!ResNode) { - ResNode = TrySelectToMoveImm(Node); - } - - if (ResNode) - return ResNode; - - // If even that fails we fall back to a lit-pool entry at the moment. Future - // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions. - ResNode = SelectToLitPool(Node); - assert(ResNode && "We need *some* way to materialise a constant"); - - // We want to continue selection at this point since the litpool access - // generated used generic nodes for simplicity. - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - Node = ResNode; - break; - } - case ISD::ConstantFP: { - if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) { - // FMOV will take care of it from TableGen - break; - } - - SDNode *ResNode = LowerToFPLitPool(Node); - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - - // We want to continue selection at this point since the litpool access - // generated used generic nodes for simplicity. - Node = ResNode; - break; - } - case AArch64ISD::NEON_LD1_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed, - AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed, - AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed, - AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed - }; - return SelectVLD(Node, true, 1, Opcodes); - } - case AArch64ISD::NEON_LD2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed, - AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, - AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed, - AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed - }; - return SelectVLD(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed, - AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, - AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed, - AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed - }; - return SelectVLD(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed, - AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, - AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed, - AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed - }; - return SelectVLD(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD1x2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed, - AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, - AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed, - AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed - }; - return SelectVLD(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD1x3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed, - AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, - AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed, - AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed - }; - return SelectVLD(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD1x4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed, - AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, - AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed, - AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed - }; - return SelectVLD(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST1_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed, - AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed, - AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed, - AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed - }; - return SelectVST(Node, true, 1, Opcodes); - } - case AArch64ISD::NEON_ST2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed, - AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, - AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed, - AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed - }; - return SelectVST(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed, - AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, - AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed, - AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed - }; - return SelectVST(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed, - AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, - AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed, - AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed - }; - return SelectVST(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD2DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S, - AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H, - AArch64::LD2R_4S, AArch64::LD2R_2D - }; - return SelectVLDDup(Node, false, 2, Opcodes); - } - case AArch64ISD::NEON_LD3DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S, - AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H, - AArch64::LD3R_4S, AArch64::LD3R_2D - }; - return SelectVLDDup(Node, false, 3, Opcodes); - } - case AArch64ISD::NEON_LD4DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S, - AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H, - AArch64::LD4R_4S, AArch64::LD4R_2D - }; - return SelectVLDDup(Node, false, 4, Opcodes); - } - case AArch64ISD::NEON_LD2DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed, - AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed, - AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed, - AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed, - AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed, - AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed, - AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed, - AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed, - AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed, - AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD2LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed, - AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed, - AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed, - AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST2LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed, - AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST3LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed, - AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST4LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed, - AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST1x2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed, - AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, - AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed, - AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed - }; - return SelectVST(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST1x3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed, - AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, - AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed, - AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed - }; - return SelectVST(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST1x4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed, - AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, - AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed, - AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed - }; - return SelectVST(Node, true, 4, Opcodes); - } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); - bool IsExt = false; - switch (IntNo) { - default: - break; - case Intrinsic::aarch64_neon_vtbx1: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl1: - return SelectVTBL(Node, 1, IsExt); - case Intrinsic::aarch64_neon_vtbx2: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl2: - return SelectVTBL(Node, 2, IsExt); - case Intrinsic::aarch64_neon_vtbx3: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl3: - return SelectVTBL(Node, 3, IsExt); - case Intrinsic::aarch64_neon_vtbx4: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl4: - return SelectVTBL(Node, 4, IsExt); - } - break; - } - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - switch (IntNo) { - default: - break; - case Intrinsic::arm_neon_vld1: { - static const uint16_t Opcodes[] = { - AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D, - AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D - }; - return SelectVLD(Node, false, 1, Opcodes); - } - case Intrinsic::arm_neon_vld2: { - static const uint16_t Opcodes[] = { - AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D, - AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D - }; - return SelectVLD(Node, false, 2, Opcodes); - } - case Intrinsic::arm_neon_vld3: { - static const uint16_t Opcodes[] = { - AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D, - AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D - }; - return SelectVLD(Node, false, 3, Opcodes); - } - case Intrinsic::arm_neon_vld4: { - static const uint16_t Opcodes[] = { - AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D, - AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D - }; - return SelectVLD(Node, false, 4, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x2: { - static const uint16_t Opcodes[] = { - AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S, - AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H, - AArch64::LD1x2_4S, AArch64::LD1x2_2D - }; - return SelectVLD(Node, false, 2, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x3: { - static const uint16_t Opcodes[] = { - AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S, - AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H, - AArch64::LD1x3_4S, AArch64::LD1x3_2D - }; - return SelectVLD(Node, false, 3, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x4: { - static const uint16_t Opcodes[] = { - AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S, - AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H, - AArch64::LD1x4_4S, AArch64::LD1x4_2D - }; - return SelectVLD(Node, false, 4, Opcodes); - } - case Intrinsic::arm_neon_vst1: { - static const uint16_t Opcodes[] = { - AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D, - AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D - }; - return SelectVST(Node, false, 1, Opcodes); - } - case Intrinsic::arm_neon_vst2: { - static const uint16_t Opcodes[] = { - AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D, - AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D - }; - return SelectVST(Node, false, 2, Opcodes); - } - case Intrinsic::arm_neon_vst3: { - static const uint16_t Opcodes[] = { - AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D, - AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D - }; - return SelectVST(Node, false, 3, Opcodes); - } - case Intrinsic::arm_neon_vst4: { - static const uint16_t Opcodes[] = { - AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D, - AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D - }; - return SelectVST(Node, false, 4, Opcodes); - } - case Intrinsic::aarch64_neon_vst1x2: { - static const uint16_t Opcodes[] = { - AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S, - AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H, - AArch64::ST1x2_4S, AArch64::ST1x2_2D - }; - return SelectVST(Node, false, 2, Opcodes); - } - case Intrinsic::aarch64_neon_vst1x3: { - static const uint16_t Opcodes[] = { - AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S, - AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H, - AArch64::ST1x3_4S, AArch64::ST1x3_2D - }; - return SelectVST(Node, false, 3, Opcodes); - } - case Intrinsic::aarch64_neon_vst1x4: { - static const uint16_t Opcodes[] = { - AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S, - AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H, - AArch64::ST1x4_4S, AArch64::ST1x4_2D - }; - return SelectVST(Node, false, 4, Opcodes); - } - case Intrinsic::arm_neon_vld2lane: { - static const uint16_t Opcodes[] = { - AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D - }; - return SelectVLDSTLane(Node, true, false, 2, Opcodes); - } - case Intrinsic::arm_neon_vld3lane: { - static const uint16_t Opcodes[] = { - AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D - }; - return SelectVLDSTLane(Node, true, false, 3, Opcodes); - } - case Intrinsic::arm_neon_vld4lane: { - static const uint16_t Opcodes[] = { - AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D - }; - return SelectVLDSTLane(Node, true, false, 4, Opcodes); - } - case Intrinsic::arm_neon_vst2lane: { - static const uint16_t Opcodes[] = { - AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D - }; - return SelectVLDSTLane(Node, false, false, 2, Opcodes); - } - case Intrinsic::arm_neon_vst3lane: { - static const uint16_t Opcodes[] = { - AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D - }; - return SelectVLDSTLane(Node, false, false, 3, Opcodes); - } - case Intrinsic::arm_neon_vst4lane: { - static const uint16_t Opcodes[] = { - AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D - }; - return SelectVLDSTLane(Node, false, false, 4, Opcodes); - } - } // End of switch IntNo - break; - } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN - default: - break; // Let generic code handle it - } - - SDNode *ResNode = SelectCode(Node); - - DEBUG(dbgs() << "=> "; - if (ResNode == nullptr || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - dbgs() << "\n"); - - return ResNode; -} - -/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for -/// instruction scheduling. -FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new AArch64DAGToDAGISel(TM, OptLevel); -} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp deleted file mode 100644 index d02a03ccb2a..00000000000 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ /dev/null @@ -1,5564 +0,0 @@ -//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that AArch64 uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64ISelLowering.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" -#include "AArch64TargetObjectFile.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/Support/MathExtras.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-isel" - -static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { - assert (TM.getSubtarget<AArch64Subtarget>().isTargetELF() && - "unknown subtarget type"); - return new AArch64ElfTargetObjectFile(); -} - -AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) { - - const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>(); - - // SIMD compares set the entire lane's bits to 1 - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - - // Scalar register <-> type mapping - addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); - addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); - - if (Subtarget->hasFPARMv8()) { - addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); - addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); - addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); - } - - if (Subtarget->hasNEON()) { - // And the vectors - addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass); - addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); - addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass); - addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass); - } - - computeRegisterProperties(); - - // We combine OR nodes for bitfield and NEON BSL operations. - setTargetDAGCombine(ISD::OR); - - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SHL); - - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - - // AArch64 does not have i1 loads, or much of anything for i1 really. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - - setStackPointerRegisterToSaveRestore(AArch64::XSP); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - - // We'll lower globals to wrappers for selection. - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); - - // A64 instructions have the comparison predicate attached to the user of the - // result, but having a separate comparison is valuable for matching. - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::i64, Custom); - setOperationAction(ISD::BR_CC, MVT::f32, Custom); - setOperationAction(ISD::BR_CC, MVT::f64, Custom); - - setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SELECT, MVT::i64, Custom); - setOperationAction(ISD::SELECT, MVT::f32, Custom); - setOperationAction(ISD::SELECT, MVT::f64, Custom); - - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - - setOperationAction(ISD::BRCOND, MVT::Other, Custom); - - setOperationAction(ISD::SETCC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::i64, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::SETCC, MVT::f64, Custom); - - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); - - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction(ISD::VACOPY, MVT::Other, Custom); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - setOperationAction(ISD::VAARG, MVT::Other, Expand); - - setOperationAction(ISD::BlockAddress, MVT::i64, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTL, MVT::i64, Expand); - - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - setOperationAction(ISD::CTPOP, MVT::i64, Expand); - - // Legal floating-point operations. - setOperationAction(ISD::FABS, MVT::f32, Legal); - setOperationAction(ISD::FABS, MVT::f64, Legal); - - setOperationAction(ISD::FCEIL, MVT::f32, Legal); - setOperationAction(ISD::FCEIL, MVT::f64, Legal); - - setOperationAction(ISD::FFLOOR, MVT::f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::f64, Legal); - - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - - setOperationAction(ISD::FNEG, MVT::f32, Legal); - setOperationAction(ISD::FNEG, MVT::f64, Legal); - - setOperationAction(ISD::FRINT, MVT::f32, Legal); - setOperationAction(ISD::FRINT, MVT::f64, Legal); - - setOperationAction(ISD::FSQRT, MVT::f32, Legal); - setOperationAction(ISD::FSQRT, MVT::f64, Legal); - - setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::f64, Legal); - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f128, Legal); - - // Illegal floating-point operations. - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - - setOperationAction(ISD::FCOS, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - - setOperationAction(ISD::FEXP, MVT::f32, Expand); - setOperationAction(ISD::FEXP, MVT::f64, Expand); - - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - - setOperationAction(ISD::FLOG, MVT::f32, Expand); - setOperationAction(ISD::FLOG, MVT::f64, Expand); - - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - - setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - - setOperationAction(ISD::FPOWI, MVT::f32, Expand); - setOperationAction(ISD::FPOWI, MVT::f64, Expand); - - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - - // Virtually no operation on f128 is legal, but LLVM can't expand them when - // there's a valid register class, so we need custom operations in most cases. - setOperationAction(ISD::FABS, MVT::f128, Expand); - setOperationAction(ISD::FADD, MVT::f128, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); - setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FDIV, MVT::f128, Custom); - setOperationAction(ISD::FMA, MVT::f128, Expand); - setOperationAction(ISD::FMUL, MVT::f128, Custom); - setOperationAction(ISD::FNEG, MVT::f128, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); - setOperationAction(ISD::FP_ROUND, MVT::f128, Expand); - setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FSIN, MVT::f128, Expand); - setOperationAction(ISD::FSINCOS, MVT::f128, Expand); - setOperationAction(ISD::FSQRT, MVT::f128, Expand); - setOperationAction(ISD::FSUB, MVT::f128, Custom); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - setOperationAction(ISD::SETCC, MVT::f128, Custom); - setOperationAction(ISD::BR_CC, MVT::f128, Custom); - setOperationAction(ISD::SELECT, MVT::f128, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); - - // Lowering for many of the conversions is actually specified by the non-f128 - // type. The LowerXXX function will be trivial when f128 isn't involved. - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); - - // i128 shift operation support - setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); - - // This prevents LLVM trying to compress double constants into a floating - // constant-pool entry and trying to load from there. It's of doubtful benefit - // for A64: we'd need LDR followed by FCVT, I believe. - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); - - setTruncStoreAction(MVT::f128, MVT::f64, Expand); - setTruncStoreAction(MVT::f128, MVT::f32, Expand); - setTruncStoreAction(MVT::f128, MVT::f16, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setTruncStoreAction(MVT::f64, MVT::f16, Expand); - setTruncStoreAction(MVT::f32, MVT::f16, Expand); - - setExceptionPointerRegister(AArch64::X0); - setExceptionSelectorRegister(AArch64::X1); - - if (Subtarget->hasNEON()) { - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); - - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); - - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal); - - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); - - setOperationAction(ISD::SETCC, MVT::v8i8, Custom); - setOperationAction(ISD::SETCC, MVT::v16i8, Custom); - setOperationAction(ISD::SETCC, MVT::v4i16, Custom); - setOperationAction(ISD::SETCC, MVT::v8i16, Custom); - setOperationAction(ISD::SETCC, MVT::v2i32, Custom); - setOperationAction(ISD::SETCC, MVT::v4i32, Custom); - setOperationAction(ISD::SETCC, MVT::v1i64, Custom); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - setOperationAction(ISD::SETCC, MVT::v2f32, Custom); - setOperationAction(ISD::SETCC, MVT::v4f32, Custom); - setOperationAction(ISD::SETCC, MVT::v1f64, Custom); - setOperationAction(ISD::SETCC, MVT::v2f64, Custom); - - setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal); - setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); - - setOperationAction(ISD::FCEIL, MVT::v2f32, Legal); - setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); - setOperationAction(ISD::FCEIL, MVT::v1f64, Legal); - setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); - - setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal); - setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); - - setOperationAction(ISD::FRINT, MVT::v2f32, Legal); - setOperationAction(ISD::FRINT, MVT::v4f32, Legal); - setOperationAction(ISD::FRINT, MVT::v1f64, Legal); - setOperationAction(ISD::FRINT, MVT::v2f64, Legal); - - setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); - - setOperationAction(ISD::FROUND, MVT::v2f32, Legal); - setOperationAction(ISD::FROUND, MVT::v4f32, Legal); - setOperationAction(ISD::FROUND, MVT::v1f64, Legal); - setOperationAction(ISD::FROUND, MVT::v2f64, Legal); - - setOperationAction(ISD::SINT_TO_FP, MVT::v1i8, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); - - setOperationAction(ISD::UINT_TO_FP, MVT::v1i8, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); - - setOperationAction(ISD::FP_TO_SINT, MVT::v1i8, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v1i16, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v1i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Custom); - - setOperationAction(ISD::FP_TO_UINT, MVT::v1i8, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Custom); - - // Neon does not support vector divide/remainder operations except - // floating-point divide. - setOperationAction(ISD::SDIV, MVT::v1i8, Expand); - setOperationAction(ISD::SDIV, MVT::v8i8, Expand); - setOperationAction(ISD::SDIV, MVT::v16i8, Expand); - setOperationAction(ISD::SDIV, MVT::v1i16, Expand); - setOperationAction(ISD::SDIV, MVT::v4i16, Expand); - setOperationAction(ISD::SDIV, MVT::v8i16, Expand); - setOperationAction(ISD::SDIV, MVT::v1i32, Expand); - setOperationAction(ISD::SDIV, MVT::v2i32, Expand); - setOperationAction(ISD::SDIV, MVT::v4i32, Expand); - setOperationAction(ISD::SDIV, MVT::v1i64, Expand); - setOperationAction(ISD::SDIV, MVT::v2i64, Expand); - - setOperationAction(ISD::UDIV, MVT::v1i8, Expand); - setOperationAction(ISD::UDIV, MVT::v8i8, Expand); - setOperationAction(ISD::UDIV, MVT::v16i8, Expand); - setOperationAction(ISD::UDIV, MVT::v1i16, Expand); - setOperationAction(ISD::UDIV, MVT::v4i16, Expand); - setOperationAction(ISD::UDIV, MVT::v8i16, Expand); - setOperationAction(ISD::UDIV, MVT::v1i32, Expand); - setOperationAction(ISD::UDIV, MVT::v2i32, Expand); - setOperationAction(ISD::UDIV, MVT::v4i32, Expand); - setOperationAction(ISD::UDIV, MVT::v1i64, Expand); - setOperationAction(ISD::UDIV, MVT::v2i64, Expand); - - setOperationAction(ISD::SREM, MVT::v1i8, Expand); - setOperationAction(ISD::SREM, MVT::v8i8, Expand); - setOperationAction(ISD::SREM, MVT::v16i8, Expand); - setOperationAction(ISD::SREM, MVT::v1i16, Expand); - setOperationAction(ISD::SREM, MVT::v4i16, Expand); - setOperationAction(ISD::SREM, MVT::v8i16, Expand); - setOperationAction(ISD::SREM, MVT::v1i32, Expand); - setOperationAction(ISD::SREM, MVT::v2i32, Expand); - setOperationAction(ISD::SREM, MVT::v4i32, Expand); - setOperationAction(ISD::SREM, MVT::v1i64, Expand); - setOperationAction(ISD::SREM, MVT::v2i64, Expand); - - setOperationAction(ISD::UREM, MVT::v1i8, Expand); - setOperationAction(ISD::UREM, MVT::v8i8, Expand); - setOperationAction(ISD::UREM, MVT::v16i8, Expand); - setOperationAction(ISD::UREM, MVT::v1i16, Expand); - setOperationAction(ISD::UREM, MVT::v4i16, Expand); - setOperationAction(ISD::UREM, MVT::v8i16, Expand); - setOperationAction(ISD::UREM, MVT::v1i32, Expand); - setOperationAction(ISD::UREM, MVT::v2i32, Expand); - setOperationAction(ISD::UREM, MVT::v4i32, Expand); - setOperationAction(ISD::UREM, MVT::v1i64, Expand); - setOperationAction(ISD::UREM, MVT::v2i64, Expand); - - setOperationAction(ISD::FREM, MVT::v2f32, Expand); - setOperationAction(ISD::FREM, MVT::v4f32, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v2f64, Expand); - - setOperationAction(ISD::SELECT, MVT::v8i8, Expand); - setOperationAction(ISD::SELECT, MVT::v16i8, Expand); - setOperationAction(ISD::SELECT, MVT::v4i16, Expand); - setOperationAction(ISD::SELECT, MVT::v8i16, Expand); - setOperationAction(ISD::SELECT, MVT::v2i32, Expand); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); - setOperationAction(ISD::SELECT, MVT::v1i64, Expand); - setOperationAction(ISD::SELECT, MVT::v2i64, Expand); - setOperationAction(ISD::SELECT, MVT::v2f32, Expand); - setOperationAction(ISD::SELECT, MVT::v4f32, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v2f64, Expand); - - setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom); - - // Vector ExtLoad and TruncStore are expanded. - for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE; - I <= MVT::LAST_VECTOR_VALUETYPE; ++I) { - MVT VT = (MVT::SimpleValueType) I; - setLoadExtAction(ISD::SEXTLOAD, VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, Expand); - for (unsigned II = MVT::FIRST_VECTOR_VALUETYPE; - II <= MVT::LAST_VECTOR_VALUETYPE; ++II) { - MVT VT1 = (MVT::SimpleValueType) II; - // A TruncStore has two vector types of the same number of elements - // and different element sizes. - if (VT.getVectorNumElements() == VT1.getVectorNumElements() && - VT.getVectorElementType().getSizeInBits() - > VT1.getVectorElementType().getSizeInBits()) - setTruncStoreAction(VT, VT1, Expand); - } - - setOperationAction(ISD::MULHS, VT, Expand); - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::MULHU, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - - setOperationAction(ISD::BSWAP, VT, Expand); - } - - // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply. - // FIXME: For a v2i64 multiply, we copy VPR to GPR and do 2 i64 multiplies, - // and then copy back to VPR. This solution may be optimized by Following 3 - // NEON instructions: - // pmull v2.1q, v0.1d, v1.1d - // pmull2 v3.1q, v0.2d, v1.2d - // ins v2.d[1], v3.d[0] - // As currently we can't verify the correctness of such assumption, we can - // do such optimization in the future. - setOperationAction(ISD::MUL, MVT::v1i64, Expand); - setOperationAction(ISD::MUL, MVT::v2i64, Expand); - - setOperationAction(ISD::FCOS, MVT::v2f64, Expand); - setOperationAction(ISD::FCOS, MVT::v4f32, Expand); - setOperationAction(ISD::FCOS, MVT::v2f32, Expand); - setOperationAction(ISD::FSIN, MVT::v2f64, Expand); - setOperationAction(ISD::FSIN, MVT::v4f32, Expand); - setOperationAction(ISD::FSIN, MVT::v2f32, Expand); - setOperationAction(ISD::FPOW, MVT::v2f64, Expand); - setOperationAction(ISD::FPOW, MVT::v4f32, Expand); - setOperationAction(ISD::FPOW, MVT::v2f32, Expand); - } - - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::VSELECT); - - MaskAndBranchFoldingIsLegal = true; -} - -EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - // It's reasonably important that this value matches the "natural" legal - // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself - // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64). - if (!VT.isVector()) return MVT::i32; - return VT.changeVectorElementTypeToInteger(); -} - -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - unsigned &LdrOpc, - unsigned &StrOpc) { - static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, - AArch64::LDXR_word, AArch64::LDXR_dword}; - static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, - AArch64::LDAXR_word, AArch64::LDAXR_dword}; - static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, - AArch64::STXR_word, AArch64::STXR_dword}; - static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword, - AArch64::STLXR_word, AArch64::STLXR_dword}; - - const unsigned *LoadOps, *StoreOps; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 8 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)]; - StrOpc = StoreOps[Log2_32(Size)]; -} - -// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really -// have value type mapped, and they are both being defined as MVT::untyped. -// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost -// would fail to figure out the register pressure correctly. -std::pair<const TargetRegisterClass*, uint8_t> -AArch64TargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = nullptr; - uint8_t Cost = 1; - switch (VT.SimpleTy) { - default: - return TargetLowering::findRepresentativeClass(VT); - case MVT::v4i64: - RRC = &AArch64::QPairRegClass; - Cost = 2; - break; - case MVT::v8i64: - RRC = &AArch64::QQuadRegClass; - Cost = 4; - break; - } - return std::make_pair(RRC, Cost); -} - -MachineBasicBlock * -AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, - unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC - = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; - unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldxr dest, ptr - // <binop> scratch, dest, incr - // stxr stxr_status, scratch, ptr - // cbnz stxr_status, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (BinOpcode) { - // All arithmetic operations we'll be creating are designed to take an extra - // shift or extend operand, which we can conveniently set to zero. - - // Operand order needs to go the other way for NAND. - if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl) - BuildMI(BB, dl, TII->get(BinOpcode), scratch) - .addReg(incr).addReg(dest).addImm(0); - else - BuildMI(BB, dl, TII->get(BinOpcode), scratch) - .addReg(dest).addReg(incr).addImm(0); - } - - // From the stxr, the register is GPR32; from the cmp it's GPR32wsp - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); - - BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loopMBB); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - unsigned CmpOp, - A64CC::CondCodes Cond) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); - - unsigned oldval = dest; - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const TargetRegisterClass *TRC, *TRCsp; - if (Size == 8) { - TRC = &AArch64::GPR64RegClass; - TRCsp = &AArch64::GPR64xspRegClass; - } else { - TRC = &AArch64::GPR32RegClass; - TRCsp = &AArch64::GPR32wspRegClass; - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - unsigned scratch = MRI.createVirtualRegister(TRC); - MRI.constrainRegClass(scratch, TRCsp); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldxr dest, ptr - // cmp incr, dest (, sign extend if necessary) - // csel scratch, dest, incr, cond - // stxr stxr_status, scratch, ptr - // cbnz stxr_status, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - - // Build compare and cmov instructions. - MRI.constrainRegClass(incr, TRCsp); - BuildMI(BB, dl, TII->get(CmpOp)) - .addReg(incr).addReg(oldval).addImm(0); - - BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc), - scratch) - .addReg(oldval).addReg(incr).addImm(Cond); - - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); - - BuildMI(BB, dl, TII->get(strOpc), stxr_status) - .addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loopMBB); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const TargetRegisterClass *TRCsp; - TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldxr dest, [ptr] - // cmp dest, oldval - // b.ne exitMBB - BB = loop1MBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - - unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl; - MRI.constrainRegClass(dest, TRCsp); - BuildMI(BB, dl, TII->get(CmpOp)) - .addReg(dest).addReg(oldval).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(exitMBB); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex stxr_status, newval, [ptr] - // cbnz stxr_status, loop1MBB - BB = loop2MBB; - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); - - BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, - MachineBasicBlock *MBB) const { - // We materialise the F128CSEL pseudo-instruction using conditional branches - // and loads, giving an instruciton sequence like: - // str q0, [sp] - // b.ne IfTrue - // b Finish - // IfTrue: - // str q1, [sp] - // Finish: - // ldr q0, [sp] - // - // Using virtual registers would probably not be beneficial since COPY - // instructions are expensive for f128 (there's no actual instruction to - // implement them). - // - // An alternative would be to do an integer-CSEL on some address. E.g.: - // mov x0, sp - // add x1, sp, #16 - // str q0, [x0] - // str q1, [x1] - // csel x0, x0, x1, ne - // ldr q0, [x0] - // - // It's unclear which approach is actually optimal. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - MachineFunction *MF = MBB->getParent(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - DebugLoc DL = MI->getDebugLoc(); - MachineFunction::iterator It = MBB; - ++It; - - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned IfTrueReg = MI->getOperand(1).getReg(); - unsigned IfFalseReg = MI->getOperand(2).getReg(); - unsigned CondCode = MI->getOperand(3).getImm(); - bool NZCVKilled = MI->getOperand(4).isKill(); - - MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, TrueBB); - MF->insert(It, EndBB); - - // Transfer rest of current basic-block to EndBB - EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), - MBB->end()); - EndBB->transferSuccessorsAndUpdatePHIs(MBB); - - // We need somewhere to store the f128 value needed. - int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16); - - // [... start of incoming MBB ...] - // str qIFFALSE, [sp] - // b.cc IfTrue - // b Done - BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR)) - .addReg(IfFalseReg) - .addFrameIndex(ScratchFI) - .addImm(0); - BuildMI(MBB, DL, TII->get(AArch64::Bcc)) - .addImm(CondCode) - .addMBB(TrueBB); - BuildMI(MBB, DL, TII->get(AArch64::Bimm)) - .addMBB(EndBB); - MBB->addSuccessor(TrueBB); - MBB->addSuccessor(EndBB); - - if (!NZCVKilled) { - // NZCV is live-through TrueBB. - TrueBB->addLiveIn(AArch64::NZCV); - EndBB->addLiveIn(AArch64::NZCV); - } - - // IfTrue: - // str qIFTRUE, [sp] - BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR)) - .addReg(IfTrueReg) - .addFrameIndex(ScratchFI) - .addImm(0); - - // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the - // blocks. - TrueBB->addSuccessor(EndBB); - - // Done: - // ldr qDEST, [sp] - // [... rest of incoming MBB ...] - MachineInstr *StartOfEnd = EndBB->begin(); - BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg) - .addFrameIndex(ScratchFI) - .addImm(0); - - MI->eraseFromParent(); - return EndBB; -} - -MachineBasicBlock * -AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const { - switch (MI->getOpcode()) { - default: llvm_unreachable("Unhandled instruction with custom inserter"); - case AArch64::F128CSEL: - return EmitF128CSEL(MI, MBB); - case AArch64::ATOMIC_LOAD_ADD_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl); - - case AArch64::ATOMIC_LOAD_SUB_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl); - - case AArch64::ATOMIC_LOAD_AND_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl); - - case AArch64::ATOMIC_LOAD_OR_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl); - - case AArch64::ATOMIC_LOAD_XOR_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl); - - case AArch64::ATOMIC_LOAD_NAND_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl); - - case AArch64::ATOMIC_LOAD_MIN_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT); - - case AArch64::ATOMIC_LOAD_MAX_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT); - - case AArch64::ATOMIC_LOAD_UMIN_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI); - - case AArch64::ATOMIC_LOAD_UMAX_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO); - - case AArch64::ATOMIC_SWAP_I8: - return emitAtomicBinary(MI, MBB, 1, 0); - case AArch64::ATOMIC_SWAP_I16: - return emitAtomicBinary(MI, MBB, 2, 0); - case AArch64::ATOMIC_SWAP_I32: - return emitAtomicBinary(MI, MBB, 4, 0); - case AArch64::ATOMIC_SWAP_I64: - return emitAtomicBinary(MI, MBB, 8, 0); - - case AArch64::ATOMIC_CMP_SWAP_I8: - return emitAtomicCmpSwap(MI, MBB, 1); - case AArch64::ATOMIC_CMP_SWAP_I16: - return emitAtomicCmpSwap(MI, MBB, 2); - case AArch64::ATOMIC_CMP_SWAP_I32: - return emitAtomicCmpSwap(MI, MBB, 4); - case AArch64::ATOMIC_CMP_SWAP_I64: - return emitAtomicCmpSwap(MI, MBB, 8); - } -} - - -const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC"; - case AArch64ISD::Call: return "AArch64ISD::Call"; - case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV"; - case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad"; - case AArch64ISD::BFI: return "AArch64ISD::BFI"; - case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; - case AArch64ISD::Ret: return "AArch64ISD::Ret"; - case AArch64ISD::SBFX: return "AArch64ISD::SBFX"; - case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC"; - case AArch64ISD::SETCC: return "AArch64ISD::SETCC"; - case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; - case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; - case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; - case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; - case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; - - case AArch64ISD::NEON_MOVIMM: - return "AArch64ISD::NEON_MOVIMM"; - case AArch64ISD::NEON_MVNIMM: - return "AArch64ISD::NEON_MVNIMM"; - case AArch64ISD::NEON_FMOVIMM: - return "AArch64ISD::NEON_FMOVIMM"; - case AArch64ISD::NEON_CMP: - return "AArch64ISD::NEON_CMP"; - case AArch64ISD::NEON_CMPZ: - return "AArch64ISD::NEON_CMPZ"; - case AArch64ISD::NEON_TST: - return "AArch64ISD::NEON_TST"; - case AArch64ISD::NEON_QSHLs: - return "AArch64ISD::NEON_QSHLs"; - case AArch64ISD::NEON_QSHLu: - return "AArch64ISD::NEON_QSHLu"; - case AArch64ISD::NEON_VDUP: - return "AArch64ISD::NEON_VDUP"; - case AArch64ISD::NEON_VDUPLANE: - return "AArch64ISD::NEON_VDUPLANE"; - case AArch64ISD::NEON_REV16: - return "AArch64ISD::NEON_REV16"; - case AArch64ISD::NEON_REV32: - return "AArch64ISD::NEON_REV32"; - case AArch64ISD::NEON_REV64: - return "AArch64ISD::NEON_REV64"; - case AArch64ISD::NEON_UZP1: - return "AArch64ISD::NEON_UZP1"; - case AArch64ISD::NEON_UZP2: - return "AArch64ISD::NEON_UZP2"; - case AArch64ISD::NEON_ZIP1: - return "AArch64ISD::NEON_ZIP1"; - case AArch64ISD::NEON_ZIP2: - return "AArch64ISD::NEON_ZIP2"; - case AArch64ISD::NEON_TRN1: - return "AArch64ISD::NEON_TRN1"; - case AArch64ISD::NEON_TRN2: - return "AArch64ISD::NEON_TRN2"; - case AArch64ISD::NEON_LD1_UPD: - return "AArch64ISD::NEON_LD1_UPD"; - case AArch64ISD::NEON_LD2_UPD: - return "AArch64ISD::NEON_LD2_UPD"; - case AArch64ISD::NEON_LD3_UPD: - return "AArch64ISD::NEON_LD3_UPD"; - case AArch64ISD::NEON_LD4_UPD: - return "AArch64ISD::NEON_LD4_UPD"; - case AArch64ISD::NEON_ST1_UPD: - return "AArch64ISD::NEON_ST1_UPD"; - case AArch64ISD::NEON_ST2_UPD: - return "AArch64ISD::NEON_ST2_UPD"; - case AArch64ISD::NEON_ST3_UPD: - return "AArch64ISD::NEON_ST3_UPD"; - case AArch64ISD::NEON_ST4_UPD: - return "AArch64ISD::NEON_ST4_UPD"; - case AArch64ISD::NEON_LD1x2_UPD: - return "AArch64ISD::NEON_LD1x2_UPD"; - case AArch64ISD::NEON_LD1x3_UPD: - return "AArch64ISD::NEON_LD1x3_UPD"; - case AArch64ISD::NEON_LD1x4_UPD: - return "AArch64ISD::NEON_LD1x4_UPD"; - case AArch64ISD::NEON_ST1x2_UPD: - return "AArch64ISD::NEON_ST1x2_UPD"; - case AArch64ISD::NEON_ST1x3_UPD: - return "AArch64ISD::NEON_ST1x3_UPD"; - case AArch64ISD::NEON_ST1x4_UPD: - return "AArch64ISD::NEON_ST1x4_UPD"; - case AArch64ISD::NEON_LD2DUP: - return "AArch64ISD::NEON_LD2DUP"; - case AArch64ISD::NEON_LD3DUP: - return "AArch64ISD::NEON_LD3DUP"; - case AArch64ISD::NEON_LD4DUP: - return "AArch64ISD::NEON_LD4DUP"; - case AArch64ISD::NEON_LD2DUP_UPD: - return "AArch64ISD::NEON_LD2DUP_UPD"; - case AArch64ISD::NEON_LD3DUP_UPD: - return "AArch64ISD::NEON_LD3DUP_UPD"; - case AArch64ISD::NEON_LD4DUP_UPD: - return "AArch64ISD::NEON_LD4DUP_UPD"; - case AArch64ISD::NEON_LD2LN_UPD: - return "AArch64ISD::NEON_LD2LN_UPD"; - case AArch64ISD::NEON_LD3LN_UPD: - return "AArch64ISD::NEON_LD3LN_UPD"; - case AArch64ISD::NEON_LD4LN_UPD: - return "AArch64ISD::NEON_LD4LN_UPD"; - case AArch64ISD::NEON_ST2LN_UPD: - return "AArch64ISD::NEON_ST2LN_UPD"; - case AArch64ISD::NEON_ST3LN_UPD: - return "AArch64ISD::NEON_ST3LN_UPD"; - case AArch64ISD::NEON_ST4LN_UPD: - return "AArch64ISD::NEON_ST4LN_UPD"; - case AArch64ISD::NEON_VEXTRACT: - return "AArch64ISD::NEON_VEXTRACT"; - default: - return nullptr; - } -} - -static const MCPhysReg AArch64FPRArgRegs[] = { - AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, - AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7 -}; -static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs); - -static const MCPhysReg AArch64ArgRegs[] = { - AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, - AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 -}; -static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs); - -static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - // Mark all remaining general purpose registers as allocated. We don't - // backtrack: if (for example) an i128 gets put on the stack, no subsequent - // i64 will go in registers (C.11). - for (unsigned i = 0; i < NumArgRegs; ++i) - State.AllocateReg(AArch64ArgRegs[i]); - - return false; -} - -#include "AArch64GenCallingConv.inc" - -CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { - - switch(CC) { - default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - case CallingConv::C: - return CC_A64_APCS; - } -} - -void -AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, - SDLoc DL, SDValue &Chain) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo<AArch64MachineFunctionInfo>(); - - SmallVector<SDValue, 8> MemOps; - - unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs, - NumArgRegs); - unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs, - NumFPRArgRegs); - - unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR); - int GPRIdx = 0; - if (GPRSaveSize != 0) { - GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); - - SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 8), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(8, getPointerTy())); - } - } - - if (getSubtarget()->hasFPARMv8()) { - unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); - int FPRIdx = 0; - // According to the AArch64 Procedure Call Standard, section B.1/B.3, we - // can omit a register save area if we know we'll never use registers of - // that class. - if (FPRSaveSize != 0) { - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], - &AArch64::FPR128RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); - } - } - FuncInfo->setVariadicFPRIdx(FPRIdx); - FuncInfo->setVariadicFPRSize(FPRSaveSize); - } - - unsigned StackOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), 8); - int StackIdx = MFI->CreateFixedObject(8, StackOffset, true); - - FuncInfo->setVariadicStackIdx(StackIdx); - FuncInfo->setVariadicGPRIdx(GPRIdx); - FuncInfo->setVariadicGPRSize(GPRSaveSize); - - if (!MemOps.empty()) { - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); - } -} - - -SDValue -AArch64TargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo<AArch64MachineFunctionInfo>(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; - - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); - - SmallVector<SDValue, 16> ArgValues; - - SDValue ArgValue; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - ISD::ArgFlagsTy Flags = Ins[i].Flags; - - if (Flags.isByVal()) { - // Byval is used for small structs and HFAs in the PCS, but the system - // should work in a non-compliant manner for larger structs. - EVT PtrTy = getPointerTy(); - int Size = Flags.getByValSize(); - unsigned NumRegs = (Size + 7) / 8; - - uint32_t BEAlign = 0; - if (Size < 8 && !getSubtarget()->isLittle()) - BEAlign = 8-Size; - unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs, - VA.getLocMemOffset() + BEAlign, - false); - SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); - InVals.push_back(FrameIdxN); - - continue; - } else if (VA.isRegLoc()) { - MVT RegVT = VA.getLocVT(); - const TargetRegisterClass *RC = getRegClassFor(RegVT); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - - ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); - } else { // VA.isRegLoc() - assert(VA.isMemLoc()); - - int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, - VA.getLocMemOffset(), true); - - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); - - - } - - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::BCvt: - ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue); - break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - case CCValAssign::FPExt: { - unsigned DestSize = VA.getValVT().getSizeInBits(); - unsigned DestSubReg; - - switch (DestSize) { - case 8: DestSubReg = AArch64::sub_8; break; - case 16: DestSubReg = AArch64::sub_16; break; - case 32: DestSubReg = AArch64::sub_32; break; - case 64: DestSubReg = AArch64::sub_64; break; - default: llvm_unreachable("Unexpected argument promotion"); - } - - ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, - VA.getValVT(), ArgValue, - DAG.getTargetConstant(DestSubReg, MVT::i32)), - 0); - break; - } - } - - InVals.push_back(ArgValue); - } - - if (isVarArg) - SaveVarArgRegisters(CCInfo, DAG, dl, Chain); - - unsigned StackArgSize = CCInfo.getNextStackOffset(); - if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { - // This is a non-standard ABI so by fiat I say we're allowed to make full - // use of the stack area to be popped, which must be aligned to 16 bytes in - // any case: - StackArgSize = RoundUpToAlignment(StackArgSize, 16); - - // If we're expected to restore the stack (e.g. fastcc) then we'll be adding - // a multiple of 16. - FuncInfo->setArgumentStackToRestore(StackArgSize); - - // This realignment carries over to the available bytes below. Our own - // callers will guarantee the space is free by giving an aligned value to - // CALLSEQ_START. - } - // Even if we're not expected to free up the space, it's useful to know how - // much is there while considering tail calls (because we can reuse it). - FuncInfo->setBytesInStackArgArea(StackArgSize); - - return Chain; -} - -SDValue -AArch64TargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const { - // CCValAssign - represent the assignment of the return value to a location. - SmallVector<CCValAssign, 16> RVLocs; - - // CCState - Info about the registers and stack slots. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - // Analyze outgoing return values. - CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv)); - - SDValue Flag; - SmallVector<SDValue, 4> RetOps(1, Chain); - - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - // PCS: "If the type, T, of the result of a function is such that - // void func(T arg) would require that arg be passed as a value in a - // register (or set of registers) according to the rules in 5.4, then the - // result is returned in the same registers as would be used for such an - // argument. - // - // Otherwise, the caller shall reserve a block of memory of sufficient - // size and alignment to hold the result. The address of the memory block - // shall be passed as an additional argument to the function in x8." - // - // This is implemented in two places. The register-return values are dealt - // with here, more complex returns are passed as an sret parameter, which - // means we don't have to worry about it during actual return. - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Only register-returns should be created by PCS"); - - - SDValue Arg = OutVals[i]; - - // There's no convenient note in the ABI about this as there is for normal - // arguments, but it says return values are passed in the same registers as - // an argument would be. I believe that includes the comments about - // unspecified higher bits, putting the burden of widening on the *caller* - // for return values. - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - // Floating-point values should only be extended when they're going into - // memory, which can't happen here so an integer extend is acceptable. - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); - break; - } - - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); - } - - RetOps[0] = Chain; // Update chain. - - // Add the flag if we have it. - if (Flag.getNode()) - RetOps.push_back(Flag); - - return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, RetOps); -} - -unsigned AArch64TargetLowering::getByValTypeAlignment(Type *Ty) const { - // This is a new backend. For anything more precise than this a FE should - // set an explicit alignment. - return 4; -} - -SDValue -AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const { - SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; - SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; - SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; - SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; - bool IsVarArg = CLI.IsVarArg; - - MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo<AArch64MachineFunctionInfo>(); - bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; - bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet(); - bool IsSibCall = false; - - if (IsTailCall) { - IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, OutVals, Ins, DAG); - - if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall()) - report_fatal_error("failed to perform tail call elimination on a call " - "site marked musttail"); - - // A sibling call is one where we're under the usual C ABI and not planning - // to change that but can still do a tail call: - if (!TailCallOpt && IsTailCall) - IsSibCall = true; - } - - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); - - // On AArch64 (and all other architectures I'm aware of) the most this has to - // do is adjust the stack pointer. - unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16); - if (IsSibCall) { - // Since we're not changing the ABI to make this a tail call, the memory - // operands are already available in the caller's incoming argument space. - NumBytes = 0; - } - - // FPDiff is the byte offset of the call's argument area from the callee's. - // Stores to callee stack arguments will be placed in FixedStackSlots offset - // by this amount for a tail call. In a sibling call it must be 0 because the - // caller will deallocate the entire stack and the callee still expects its - // arguments to begin at SP+0. Completely unused for non-tail calls. - int FPDiff = 0; - - if (IsTailCall && !IsSibCall) { - unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); - - // FPDiff will be negative if this tail call requires more space than we - // would automatically have in our incoming argument space. Positive if we - // can actually shrink the stack. - FPDiff = NumReusableBytes - NumBytes; - - // The stack pointer must be 16-byte aligned at all times it's used for a - // memory operation, which in practice means at *all* times and in - // particular across call boundaries. Therefore our own arguments started at - // a 16-byte aligned SP and the delta applied for the tail call should - // satisfy the same constraint. - assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); - } - - if (!IsSibCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), - dl); - - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, - getPointerTy()); - - SmallVector<SDValue, 8> MemOpChains; - SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; - - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; - SDValue Arg = OutVals[i]; - - // Callee does the actual widening, so all extensions just use an implicit - // definition of the rest of the Loc. Aesthetically, this would be nicer as - // an ANY_EXTEND, but that isn't valid for floating-point types and this - // alternative works on integer types too. - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - case CCValAssign::FPExt: { - unsigned SrcSize = VA.getValVT().getSizeInBits(); - unsigned SrcSubReg; - - switch (SrcSize) { - case 8: SrcSubReg = AArch64::sub_8; break; - case 16: SrcSubReg = AArch64::sub_16; break; - case 32: SrcSubReg = AArch64::sub_32; break; - case 64: SrcSubReg = AArch64::sub_64; break; - default: llvm_unreachable("Unexpected argument promotion"); - } - - Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VA.getLocVT(), - DAG.getUNDEF(VA.getLocVT()), - Arg, - DAG.getTargetConstant(SrcSubReg, MVT::i32)), - 0); - - break; - } - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); - break; - } - - if (VA.isRegLoc()) { - // A normal register (sub-) argument. For now we just note it down because - // we want to copy things into registers as late as possible to avoid - // register-pressure (and possibly worse). - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - continue; - } - - assert(VA.isMemLoc() && "unexpected argument location"); - - SDValue DstAddr; - MachinePointerInfo DstInfo; - if (IsTailCall) { - uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() : - VA.getLocVT().getSizeInBits(); - OpSize = (OpSize + 7) / 8; - int32_t Offset = VA.getLocMemOffset() + FPDiff; - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); - - DstAddr = DAG.getFrameIndex(FI, getPointerTy()); - DstInfo = MachinePointerInfo::getFixedStack(FI); - - // Make sure any stack arguments overlapping with where we're storing are - // loaded before this eventual operation. Otherwise they'll be clobbered. - Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); - } else { - uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize()*8 : - VA.getLocVT().getSizeInBits(); - OpSize = (OpSize + 7) / 8; - uint32_t BEAlign = 0; - if (OpSize < 8 && !getSubtarget()->isLittle()) - BEAlign = 8-OpSize; - SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + BEAlign); - - DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset()); - } - - if (Flags.isByVal()) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64); - SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode, - Flags.getByValAlign(), - /*isVolatile = */ false, - /*alwaysInline = */ false, - DstInfo, MachinePointerInfo()); - MemOpChains.push_back(Cpy); - } else { - // Normal stack argument, put it where it's needed. - SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo, - false, false, 0); - MemOpChains.push_back(Store); - } - } - - // The loads and stores generated above shouldn't clash with each - // other. Combining them with this TokenFactor notes that fact for the rest of - // the backend. - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); - - // Most of the rest of the instructions need to be glued together; we don't - // want assignments to actual registers used by a call to be rearranged by a - // well-meaning scheduler. - SDValue InFlag; - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // The linker is responsible for inserting veneers when necessary to put a - // function call destination in range, so we don't need to bother with a - // wrapper here. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); - } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - const char *Sym = S->getSymbol(); - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); - } - - // We don't usually want to end the call-sequence here because we would tidy - // the frame up *after* the call, however in the ABI-changing tail-call case - // we've carefully laid out the parameters so that when sp is reset they'll be - // in the correct location. - if (IsTailCall && !IsSibCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, dl); - InFlag = Chain.getValue(1); - } - - // We produce the following DAG scheme for the actual call instruction: - // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag? - // - // Most arguments aren't going to be used and just keep the values live as - // far as LLVM is concerned. It's expected to be selected as simply "bl - // callee" (for a direct, non-tail call). - std::vector<SDValue> Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - - if (IsTailCall) { - // Each tail call may have to adjust the stack by a different amount, so - // this information must travel along with the operation for eventual - // consumption by emitEpilogue. - Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); - } - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - - // Add a register mask operand representing the call-preserved registers. This - // is used later in codegen to constrain register-allocation. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - - // If we needed glue, put it in as the last argument. - if (InFlag.getNode()) - Ops.push_back(InFlag); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - - if (IsTailCall) { - return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, Ops); - } - - Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, Ops); - InFlag = Chain.getValue(1); - - // Now we can reclaim the stack, just as well do it before working out where - // our return value is. - if (!IsSibCall) { - uint64_t CalleePopBytes - = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0; - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(CalleePopBytes, true), - InFlag, dl); - InFlag = Chain.getValue(1); - } - - return LowerCallResult(Chain, InFlag, CallConv, - IsVarArg, Ins, dl, DAG, InVals); -} - -SDValue -AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - // Assign locations to each value returned by this call. - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv)); - - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign VA = RVLocs[i]; - - // Return values that are too big to fit into registers should use an sret - // pointer, so this can be a lot simpler than the main argument code. - assert(VA.isRegLoc() && "Memory locations not expected for call return"); - - SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), - InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); - - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::BCvt: - Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); - break; - case CCValAssign::ZExt: - case CCValAssign::SExt: - case CCValAssign::AExt: - // Floating-point arguments only get extended/truncated if they're going - // in memory, so using the integer operation is acceptable here. - Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); - break; - } - - InVals.push_back(Val); - } - - return Chain; -} - -bool -AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool IsVarArg, - bool IsCalleeStructRet, - bool IsCallerStructRet, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SelectionDAG& DAG) const { - - // For CallingConv::C this function knows whether the ABI needs - // changing. That's not true for other conventions so they will have to opt in - // manually. - if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) - return false; - - const MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = MF.getFunction(); - CallingConv::ID CallerCC = CallerF->getCallingConv(); - bool CCMatch = CallerCC == CalleeCC; - - // Byval parameters hand the function a pointer directly into the stack area - // we want to reuse during a tail call. Working around this *is* possible (see - // X86) but less efficient and uglier in LowerCall. - for (Function::const_arg_iterator i = CallerF->arg_begin(), - e = CallerF->arg_end(); i != e; ++i) - if (i->hasByValAttr()) - return false; - - if (getTargetMachine().Options.GuaranteedTailCallOpt) { - if (IsTailCallConvention(CalleeCC) && CCMatch) - return true; - return false; - } - - // Now we search for cases where we can use a tail call without changing the - // ABI. Sibcall is used in some places (particularly gcc) to refer to this - // concept. - - // I want anyone implementing a new calling convention to think long and hard - // about this assert. - assert((!IsVarArg || CalleeCC == CallingConv::C) - && "Unexpected variadic calling convention"); - - if (IsVarArg && !Outs.empty()) { - // At least two cases here: if caller is fastcc then we can't have any - // memory arguments (we'd be expected to clean up the stack afterwards). If - // caller is C then we could potentially use its argument area. - - // FIXME: for now we take the most conservative of these in both cases: - // disallow all variadic memory operands. - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) - if (!ArgLocs[i].isRegLoc()) - return false; - } - - // If the calling conventions do not match, then we'd better make sure the - // results are returned in the same way as what the caller expects. - if (!CCMatch) { - SmallVector<CCValAssign, 16> RVLocs1; - CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs1, *DAG.getContext()); - CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC)); - - SmallVector<CCValAssign, 16> RVLocs2; - CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs2, *DAG.getContext()); - CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC)); - - if (RVLocs1.size() != RVLocs2.size()) - return false; - for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { - if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) - return false; - if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) - return false; - if (RVLocs1[i].isRegLoc()) { - if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) - return false; - } else { - if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) - return false; - } - } - } - - // Nothing more to check if the callee is taking no arguments - if (Outs.empty()) - return true; - - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); - - const AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo<AArch64MachineFunctionInfo>(); - - // If the stack arguments for this call would fit into our own save area then - // the call can be made tail. - return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); -} - -bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, - bool TailCallOpt) const { - return CallCC == CallingConv::Fast && TailCallOpt; -} - -bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { - return CallCC == CallingConv::Fast; -} - -SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, - SelectionDAG &DAG, - MachineFrameInfo *MFI, - int ClobberedFI) const { - SmallVector<SDValue, 8> ArgChains; - int64_t FirstByte = MFI->getObjectOffset(ClobberedFI); - int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1; - - // Include the original chain at the beginning of the list. When this is - // used by target LowerCall hooks, this helps legalize find the - // CALLSEQ_BEGIN node. - ArgChains.push_back(Chain); - - // Add a chain value for each stack argument corresponding - for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), - UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U) - if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) - if (FI->getIndex() < 0) { - int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex()); - int64_t InLastByte = InFirstByte; - InLastByte += MFI->getObjectSize(FI->getIndex()) - 1; - - if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || - (FirstByte <= InFirstByte && InFirstByte <= LastByte)) - ArgChains.push_back(SDValue(L, 1)); - } - - // Build a tokenfactor for all the chains. - return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); -} - -static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) { - switch (CC) { - case ISD::SETEQ: return A64CC::EQ; - case ISD::SETGT: return A64CC::GT; - case ISD::SETGE: return A64CC::GE; - case ISD::SETLT: return A64CC::LT; - case ISD::SETLE: return A64CC::LE; - case ISD::SETNE: return A64CC::NE; - case ISD::SETUGT: return A64CC::HI; - case ISD::SETUGE: return A64CC::HS; - case ISD::SETULT: return A64CC::LO; - case ISD::SETULE: return A64CC::LS; - default: llvm_unreachable("Unexpected condition code"); - } -} - -bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const { - // icmp is implemented using adds/subs immediate, which take an unsigned - // 12-bit immediate, optionally shifted left by 12 bits. - - // Symmetric by using adds/subs - if (Val < 0) - Val = -Val; - - return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0; -} - -SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS, - ISD::CondCode CC, SDValue &A64cc, - SelectionDAG &DAG, SDLoc &dl) const { - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { - int64_t C = 0; - EVT VT = RHSC->getValueType(0); - bool knownInvalid = false; - - // I'm not convinced the rest of LLVM handles these edge cases properly, but - // we can at least get it right. - if (isSignedIntSetCC(CC)) { - C = RHSC->getSExtValue(); - } else if (RHSC->getZExtValue() > INT64_MAX) { - // A 64-bit constant not representable by a signed 64-bit integer is far - // too big to fit into a SUBS immediate anyway. - knownInvalid = true; - } else { - C = RHSC->getZExtValue(); - } - - if (!knownInvalid && !isLegalICmpImmediate(C)) { - // Constant does not fit, try adjusting it by one? - switch (CC) { - default: break; - case ISD::SETLT: - case ISD::SETGE: - if (isLegalICmpImmediate(C-1)) { - CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; - RHS = DAG.getConstant(C-1, VT); - } - break; - case ISD::SETULT: - case ISD::SETUGE: - if (isLegalICmpImmediate(C-1)) { - CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; - RHS = DAG.getConstant(C-1, VT); - } - break; - case ISD::SETLE: - case ISD::SETGT: - if (isLegalICmpImmediate(C+1)) { - CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; - RHS = DAG.getConstant(C+1, VT); - } - break; - case ISD::SETULE: - case ISD::SETUGT: - if (isLegalICmpImmediate(C+1)) { - CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; - RHS = DAG.getConstant(C+1, VT); - } - break; - } - } - } - - A64CC::CondCodes CondCode = IntCCToA64CC(CC); - A64cc = DAG.getConstant(CondCode, MVT::i32); - return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); -} - -static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, - A64CC::CondCodes &Alternative) { - A64CC::CondCodes CondCode = A64CC::Invalid; - Alternative = A64CC::Invalid; - - switch (CC) { - default: llvm_unreachable("Unknown FP condition!"); - case ISD::SETEQ: - case ISD::SETOEQ: CondCode = A64CC::EQ; break; - case ISD::SETGT: - case ISD::SETOGT: CondCode = A64CC::GT; break; - case ISD::SETGE: - case ISD::SETOGE: CondCode = A64CC::GE; break; - case ISD::SETOLT: CondCode = A64CC::MI; break; - case ISD::SETOLE: CondCode = A64CC::LS; break; - case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break; - case ISD::SETO: CondCode = A64CC::VC; break; - case ISD::SETUO: CondCode = A64CC::VS; break; - case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break; - case ISD::SETUGT: CondCode = A64CC::HI; break; - case ISD::SETUGE: CondCode = A64CC::PL; break; - case ISD::SETLT: - case ISD::SETULT: CondCode = A64CC::LT; break; - case ISD::SETLE: - case ISD::SETULE: CondCode = A64CC::LE; break; - case ISD::SETNE: - case ISD::SETUNE: CondCode = A64CC::NE; break; - } - return CondCode; -} - -SDValue -AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT PtrVT = getPointerTy(); - const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - - switch(getTargetMachine().getCodeModel()) { - case CodeModel::Small: - // The most efficient code is PC-relative anyway for the small memory model, - // so we don't need to worry about relocation model. - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_LO12), - DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - - -// (BRCOND chain, val, dest) -SDValue -AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue Chain = Op.getOperand(0); - SDValue TheBit = Op.getOperand(1); - SDValue DestBB = Op.getOperand(2); - - // AArch64 BooleanContents is the default UndefinedBooleanContent, which means - // that as the consumer we are responsible for ignoring rubbish in higher - // bits. - TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, - DAG.getConstant(1, MVT::i32)); - - SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, - DAG.getConstant(0, TheBit.getValueType()), - DAG.getCondCode(ISD::SETNE)); - - return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain, - A64CMP, DAG.getConstant(A64CC::NE, MVT::i32), - DestBB); -} - -// (BR_CC chain, condcode, lhs, rhs, dest) -SDValue -AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue Chain = Op.getOperand(0); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue DestBB = Op.getOperand(4); - - if (LHS.getValueType() == MVT::f128) { - // f128 comparisons are lowered to runtime calls by a routine which sets - // LHS, RHS and CC appropriately for the rest of this function to continue. - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (!RHS.getNode()) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue A64cc; - - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); - - return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - Chain, CmpOp, A64cc, DestBB); - } - - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - Chain, SetCC, A64cc, DestBB); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - A64BR_CC, SetCC, A64cc, DestBB); - - } - - return A64BR_CC; -} - -SDValue -AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const { - ArgListTy Args; - ArgListEntry Entry; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { - EVT ArgVT = Op.getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; - Entry.isSExt = false; - Entry.isZExt = false; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy()); - - Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); - - // By default, the input chain to this libcall is the entry node of the - // function. If the libcall is going to be emitted as a tail call then - // isUsedByReturnOnly will change it to the right chain if the return - // node which is being folded has a non-entry input chain. - SDValue InChain = DAG.getEntryNode(); - - // isTailCall may be true since the callee does not reference caller stack - // frame. Check if it's in the right position. - SDValue TCChain = InChain; - bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain); - if (isTailCall) - InChain = TCChain; - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(SDLoc(Op)).setChain(InChain) - .setCallee(getLibcallCallingConv(Call), RetTy, Callee, &Args, 0) - .setTailCall(isTailCall); - - std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); - - if (!CallInfo.second.getNode()) - // It's a tailcall, return the chain (which is the DAG root). - return DAG.getRoot(); - - return CallInfo.first; -} - -SDValue -AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); - - SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; -} - -SDValue -AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); - - RTLIB::Libcall LC; - LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128ToCall(Op, DAG, LC); -} - -static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG, - bool IsSigned) { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - SDValue Vec = Op.getOperand(0); - EVT OpVT = Vec.getValueType(); - unsigned Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; - - if (VT.getVectorNumElements() == 1) { - assert(OpVT == MVT::v1f64 && "Unexpected vector type!"); - if (VT.getSizeInBits() == OpVT.getSizeInBits()) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); - } - - if (VT.getSizeInBits() > OpVT.getSizeInBits()) { - assert(Vec.getValueType() == MVT::v2f32 && VT == MVT::v2i64 && - "Unexpected vector type!"); - Vec = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Vec); - return DAG.getNode(Opc, dl, VT, Vec); - } else if (VT.getSizeInBits() < OpVT.getSizeInBits()) { - EVT CastVT = EVT::getIntegerVT(*DAG.getContext(), - OpVT.getVectorElementType().getSizeInBits()); - CastVT = - EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements()); - Vec = DAG.getNode(Opc, dl, CastVT, Vec); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Vec); - } - return DAG.getNode(Opc, dl, VT, Vec); -} - -static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { - // We custom lower concat_vectors with 4, 8, or 16 operands that are all the - // same operand and of type v1* using the DUP instruction. - unsigned NumOps = Op->getNumOperands(); - if (NumOps == 2) { - assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat"); - return Op; - } - - if (NumOps != 4 && NumOps != 8 && NumOps != 16) - return SDValue(); - - // Must be a single value for VDUP. - SDValue Op0 = Op.getOperand(0); - for (unsigned i = 1; i < NumOps; ++i) { - SDValue OpN = Op.getOperand(i); - if (Op0 != OpN) - return SDValue(); - } - - // Verify the value type. - EVT EltVT = Op0.getValueType(); - switch (NumOps) { - default: llvm_unreachable("Unexpected number of operands"); - case 4: - if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32) - return SDValue(); - break; - case 8: - if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16) - return SDValue(); - break; - case 16: - if (EltVT != MVT::v1i8) - return SDValue(); - break; - } - - SDLoc DL(Op); - EVT VT = Op.getValueType(); - // VDUP produces better code for constants. - if (Op0->getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0)); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0, - DAG.getConstant(0, MVT::i64)); -} - -SDValue -AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - bool IsSigned) const { - if (Op.getValueType().isVector()) - return LowerVectorFP_TO_INT(Op, DAG, IsSigned); - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - if (IsSigned) - LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128ToCall(Op, DAG, LC); -} - -SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setReturnAddressIsTaken(true); - - if (verifyReturnAddressArgumentIsConstant(Op, DAG)) - return SDValue(); - - EVT VT = Op.getValueType(); - SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - if (Depth) { - SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - SDValue Offset = DAG.getConstant(8, MVT::i64); - return DAG.getLoad(VT, dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); - } - - // Return X30, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64)); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64); -} - - -SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) - const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setFrameAddressIsTaken(true); - - EVT VT = Op.getValueType(); - SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = AArch64::X29; - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); - while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, - MachinePointerInfo(), - false, false, false, 0); - return FrameAddr; -} - -// FIXME? Maybe this could be a TableGen attribute on some registers and -// this table could be generated automatically from RegInfo. -unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { - unsigned Reg = StringSwitch<unsigned>(RegName) - .Case("sp", AArch64::XSP) - .Default(0); - if (Reg) - return Reg; - report_fatal_error("Invalid register name global variable"); -} - -SDValue -AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, - SelectionDAG &DAG) const { - assert(getTargetMachine().getCodeModel() == CodeModel::Large); - assert(getTargetMachine().getRelocationModel() == Reloc::Static); - - EVT PtrVT = getPointerTy(); - SDLoc dl(Op); - const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); - const GlobalValue *GV = GN->getGlobal(); - - SDValue GlobalAddr = DAG.getNode( - AArch64ISD::WrapperLarge, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); - - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalAddr; -} - -SDValue -AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op, - SelectionDAG &DAG) const { - assert(getTargetMachine().getCodeModel() == CodeModel::Small); - - EVT PtrVT = getPointerTy(); - SDLoc dl(Op); - const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); - const GlobalValue *GV = GN->getGlobal(); - unsigned Alignment = GV->getAlignment(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) { - // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate - // to zero when they remain undefined. In PIC mode the GOT can take care of - // this, but in absolute mode we use a constant pool load. - SDValue PoolAddr; - PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetConstantPool(GV, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetConstantPool(GV, PtrVT, 0, 0, - AArch64II::MO_LO12), - DAG.getConstant(8, MVT::i32)); - SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /*isVolatile=*/ false, - /*isNonTemporal=*/ true, - /*isInvariant=*/ true, 8); - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalAddr; - } - - if (Alignment == 0) { - const PointerType *GVPtrTy = cast<PointerType>(GV->getType()); - if (GVPtrTy->getElementType()->isSized()) { - Alignment - = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); - } else { - // Be conservative if we can't guess, not that it really matters: - // functions and labels aren't valid for loads, and the methods used to - // actually calculate an address work with any alignment. - Alignment = 1; - } - } - - unsigned char HiFixup, LoFixup; - bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM); - - if (UseGOT) { - HiFixup = AArch64II::MO_GOT; - LoFixup = AArch64II::MO_GOT_LO12; - Alignment = 8; - } else { - HiFixup = AArch64II::MO_NO_FLAG; - LoFixup = AArch64II::MO_LO12; - } - - // AArch64's small model demands the following sequence: - // ADRP x0, somewhere - // ADD x0, x0, #:lo12:somewhere ; (or LDR directly). - SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - HiFixup), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - LoFixup), - DAG.getConstant(Alignment, MVT::i32)); - - if (UseGOT) { - GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(), - GlobalRef); - } - - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalRef; -} - -SDValue -AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, - SelectionDAG &DAG) const { - // TableGen doesn't have easy access to the CodeModel or RelocationModel, so - // we make those distinctions here. - - switch (getTargetMachine().getCodeModel()) { - case CodeModel::Small: - return LowerGlobalAddressELFSmall(Op, DAG); - case CodeModel::Large: - return LowerGlobalAddressELFLarge(Op, DAG); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -SDValue -AArch64TargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT PtrVT = getPointerTy(); - ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Op); - const Constant *C = CN->getConstVal(); - - switch(getTargetMachine().getCodeModel()) { - case CodeModel::Small: - // The most efficient code is PC-relative anyway for the small memory model, - // so we don't need to worry about relocation model. - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetConstantPool(C, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, - AArch64II::MO_LO12), - DAG.getConstant(CN->getAlignment(), MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G3), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, - SDValue DescAddr, - SDLoc DL, - SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); - - // The function we need to call is simply the first entry in the GOT for this - // descriptor, load it in preparation. - SDValue Func, Chain; - Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), - DescAddr); - - // The function takes only one argument: the address of the descriptor itself - // in X0. - SDValue Glue; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); - Glue = Chain.getValue(1); - - // Finally, there's a special calling-convention which means that the lookup - // must preserve all registers (except X0, obviously). - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const AArch64RegisterInfo *A64RI - = static_cast<const AArch64RegisterInfo *>(TRI); - const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask(); - - // We're now ready to populate the argument list, as with a normal call: - std::vector<SDValue> Ops; - Ops.push_back(Chain); - Ops.push_back(Func); - Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); - Ops.push_back(DAG.getRegisterMask(Mask)); - Ops.push_back(Glue); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, Ops); - Glue = Chain.getValue(1); - - // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it - // back to the generic handling code. - return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); -} - -SDValue -AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - assert(getSubtarget()->isTargetELF() && - "TLS not implemented for non-ELF targets"); - assert(getTargetMachine().getCodeModel() == CodeModel::Small - && "TLS only supported in small memory model"); - const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); - - TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); - - SDValue TPOff; - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - const GlobalValue *GV = GA->getGlobal(); - - SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); - - if (Model == TLSModel::InitialExec) { - TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_GOTTPREL), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_GOTTPREL_LO12), - DAG.getConstant(8, MVT::i32)); - TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), - TPOff); - } else if (Model == TLSModel::LocalExec) { - SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_TPREL_G1); - SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_TPREL_G0_NC); - - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, - DAG.getTargetConstant(1, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, - TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), 0); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_TLSDESC); - SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_TLSDESC_LO12); - SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - HiDesc, LoDesc, - DAG.getConstant(8, MVT::i32)); - SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0); - - TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); - } else if (Model == TLSModel::LocalDynamic) { - // Local-dynamic accesses proceed in two phases. A general-dynamic TLS - // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate - // the beginning of the module's TLS region, followed by a DTPREL offset - // calculation. - - // These accesses will need deduplicating if there's more than one. - AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction() - .getInfo<AArch64MachineFunctionInfo>(); - MFI->incNumLocalDynamicTLSAccesses(); - - - // Get the location of _TLS_MODULE_BASE_: - SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLSDESC); - SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLSDESC_LO12); - SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - HiDesc, LoDesc, - DAG.getConstant(8, MVT::i32)); - SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT); - - ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); - - // Get the variable's offset from _TLS_MODULE_BASE_ - SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_DTPREL_G1); - SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_DTPREL_G0_NC); - - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, - DAG.getTargetConstant(0, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, - TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), 0); - } else - llvm_unreachable("Unsupported TLS access model"); - - - return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); -} - -static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG, - bool IsSigned) { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - SDValue Vec = Op.getOperand(0); - unsigned Opc = IsSigned ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; - - if (VT.getVectorNumElements() == 1) { - assert(VT == MVT::v1f64 && "Unexpected vector type!"); - if (VT.getSizeInBits() == Vec.getValueSizeInBits()) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); - } - - if (VT.getSizeInBits() < Vec.getValueSizeInBits()) { - assert(Vec.getValueType() == MVT::v2i64 && VT == MVT::v2f32 && - "Unexpected vector type!"); - Vec = DAG.getNode(Opc, dl, MVT::v2f64, Vec); - return DAG.getNode(ISD::FP_ROUND, dl, VT, Vec, DAG.getIntPtrConstant(0)); - } else if (VT.getSizeInBits() > Vec.getValueSizeInBits()) { - unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - EVT CastVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getVectorElementType().getSizeInBits()); - CastVT = - EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements()); - Vec = DAG.getNode(CastOpc, dl, CastVT, Vec); - } - - return DAG.getNode(Opc, dl, VT, Vec); -} - -SDValue -AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, - bool IsSigned) const { - if (Op.getValueType().isVector()) - return LowerVectorINT_TO_FP(Op, DAG, IsSigned); - if (Op.getValueType() != MVT::f128) { - // Legal for everything except f128. - return Op; - } - - RTLIB::Libcall LC; - if (IsSigned) - LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128ToCall(Op, DAG, LC); -} - - -SDValue -AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - SDLoc dl(JT); - EVT PtrVT = getPointerTy(); - - // When compiling PIC, jump tables get put in the code section so a static - // relocation-style is acceptable for both cases. - switch (getTargetMachine().getCodeModel()) { - case CodeModel::Small: - return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_LO12), - DAG.getConstant(1, MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, dl, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -// (SELECT testbit, iftrue, iffalse) -SDValue -AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue TheBit = Op.getOperand(0); - SDValue IfTrue = Op.getOperand(1); - SDValue IfFalse = Op.getOperand(2); - - // AArch64 BooleanContents is the default UndefinedBooleanContent, which means - // that as the consumer we are responsible for ignoring rubbish in higher - // bits. - TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, - DAG.getConstant(1, MVT::i32)); - SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, - DAG.getConstant(0, TheBit.getValueType()), - DAG.getCondCode(ISD::SETNE)); - - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - A64CMP, IfTrue, IfFalse, - DAG.getConstant(A64CC::NE, MVT::i32)); -} - -static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) { - SDLoc DL(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - EVT VT = Op.getValueType(); - bool Invert = false; - SDValue Op0, Op1; - unsigned Opcode; - - if (LHS.getValueType().isInteger()) { - - // Attempt to use Vector Integer Compare Mask Test instruction. - // TST = icmp ne (and (op0, op1), zero). - if (CC == ISD::SETNE) { - if (((LHS.getOpcode() == ISD::AND) && - ISD::isBuildVectorAllZeros(RHS.getNode())) || - ((RHS.getOpcode() == ISD::AND) && - ISD::isBuildVectorAllZeros(LHS.getNode()))) { - - SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS; - SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0)); - SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1)); - return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS); - } - } - - // Attempt to use Vector Integer Compare Mask against Zero instr (Signed). - // Note: Compare against Zero does not support unsigned predicates. - if ((ISD::isBuildVectorAllZeros(RHS.getNode()) || - ISD::isBuildVectorAllZeros(LHS.getNode())) && - !isUnsignedIntSetCC(CC)) { - - // If LHS is the zero value, swap operands and CondCode. - if (ISD::isBuildVectorAllZeros(LHS.getNode())) { - CC = getSetCCSwappedOperands(CC); - Op0 = RHS; - } else - Op0 = LHS; - - // Ensure valid CondCode for Compare Mask against Zero instruction: - // EQ, GE, GT, LE, LT. - if (ISD::SETNE == CC) { - Invert = true; - CC = ISD::SETEQ; - } - - // Using constant type to differentiate integer and FP compares with zero. - Op1 = DAG.getConstant(0, MVT::i32); - Opcode = AArch64ISD::NEON_CMPZ; - - } else { - // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned). - // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT. - bool Swap = false; - switch (CC) { - default: - llvm_unreachable("Illegal integer comparison."); - case ISD::SETEQ: - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - break; - case ISD::SETNE: - Invert = true; - CC = ISD::SETEQ; - break; - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETLT: - case ISD::SETLE: - Swap = true; - CC = getSetCCSwappedOperands(CC); - } - - if (Swap) - std::swap(LHS, RHS); - - Opcode = AArch64ISD::NEON_CMP; - Op0 = LHS; - Op1 = RHS; - } - - // Generate Compare Mask instr or Compare Mask against Zero instr. - SDValue NeonCmp = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); - - if (Invert) - NeonCmp = DAG.getNOT(DL, NeonCmp, VT); - - return NeonCmp; - } - - // Now handle Floating Point cases. - // Attempt to use Vector Floating Point Compare Mask against Zero instruction. - if (ISD::isBuildVectorAllZeros(RHS.getNode()) || - ISD::isBuildVectorAllZeros(LHS.getNode())) { - - // If LHS is the zero value, swap operands and CondCode. - if (ISD::isBuildVectorAllZeros(LHS.getNode())) { - CC = getSetCCSwappedOperands(CC); - Op0 = RHS; - } else - Op0 = LHS; - - // Using constant type to differentiate integer and FP compares with zero. - Op1 = DAG.getConstantFP(0, MVT::f32); - Opcode = AArch64ISD::NEON_CMPZ; - } else { - // Attempt to use Vector Floating Point Compare Mask instruction. - Op0 = LHS; - Op1 = RHS; - Opcode = AArch64ISD::NEON_CMP; - } - - SDValue NeonCmpAlt; - // Some register compares have to be implemented with swapped CC and operands, - // e.g.: OLT implemented as OGT with swapped operands. - bool SwapIfRegArgs = false; - - // Ensure valid CondCode for FP Compare Mask against Zero instruction: - // EQ, GE, GT, LE, LT. - // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT. - switch (CC) { - default: - llvm_unreachable("Illegal FP comparison"); - case ISD::SETUNE: - case ISD::SETNE: - Invert = true; // Fallthrough - case ISD::SETOEQ: - case ISD::SETEQ: - CC = ISD::SETEQ; - break; - case ISD::SETOLT: - case ISD::SETLT: - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETOGT: - case ISD::SETGT: - CC = ISD::SETGT; - break; - case ISD::SETOLE: - case ISD::SETLE: - CC = ISD::SETLE; - SwapIfRegArgs = true; - break; - case ISD::SETOGE: - case ISD::SETGE: - CC = ISD::SETGE; - break; - case ISD::SETUGE: - Invert = true; - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETULE: - Invert = true; - CC = ISD::SETGT; - break; - case ISD::SETUGT: - Invert = true; - CC = ISD::SETLE; - SwapIfRegArgs = true; - break; - case ISD::SETULT: - Invert = true; - CC = ISD::SETGE; - break; - case ISD::SETUEQ: - Invert = true; // Fallthrough - case ISD::SETONE: - // Expand this to (OGT |OLT). - NeonCmpAlt = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT)); - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETUO: - Invert = true; // Fallthrough - case ISD::SETO: - // Expand this to (OGE | OLT). - NeonCmpAlt = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE)); - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - } - - if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) { - CC = getSetCCSwappedOperands(CC); - std::swap(Op0, Op1); - } - - // Generate FP Compare Mask instr or FP Compare Mask against Zero instr - SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); - - if (NeonCmpAlt.getNode()) - NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt); - - if (Invert) - NeonCmp = DAG.getNOT(DL, NeonCmp, VT); - - return NeonCmp; -} - -// (SETCC lhs, rhs, condcode) -SDValue -AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - EVT VT = Op.getValueType(); - - if (VT.isVector()) - return LowerVectorSETCC(Op, DAG); - - if (LHS.getValueType() == MVT::f128) { - // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS - // for the rest of the function (some i32 or i64 values). - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, use it. - if (!RHS.getNode()) { - assert(LHS.getValueType() == Op.getValueType() && - "Unexpected setcc expansion!"); - return LHS; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue A64cc; - - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); - - return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, - CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT), - A64cc); - } - - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, - CmpOp, DAG.getConstant(1, VT), - DAG.getConstant(0, VT), A64cc); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(1, VT), A64SELECT_CC, A64cc); - } - - return A64SELECT_CC; -} - -static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) { - SDLoc dl(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue IfTrue = Op.getOperand(2); - SDValue IfFalse = Op.getOperand(3); - EVT IfTrueVT = IfTrue.getValueType(); - EVT CondVT = IfTrueVT.changeVectorElementTypeToInteger(); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - - // If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will - // use NEON compare. - if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) { - EVT EltVT = LHS.getValueType(); - unsigned EltNum = 128 / EltVT.getSizeInBits(); - EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum); - unsigned SubConstant = - (LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64; - EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64; - EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum); - - LHS - = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - VT, DAG.getTargetConstant(0, MVT::i32), LHS, - DAG.getTargetConstant(SubConstant, MVT::i32)), 0); - RHS - = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - VT, DAG.getTargetConstant(0, MVT::i32), RHS, - DAG.getTargetConstant(SubConstant, MVT::i32)), 0); - - SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC); - SDValue ResCC = LowerVectorSETCC(VSetCC, DAG); - if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) { - EVT DUPVT = - EVT::getVectorVT(*DAG.getContext(), CEltT, - IfTrueVT.getSizeInBits() / CEltT.getSizeInBits()); - ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC, - DAG.getConstant(0, MVT::i64, false)); - - ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC); - } else { - // FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function - // can't handle them and will hit this assert. - assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() && - "Vector of IfTrue & IfFalse is too small."); - - unsigned ExEltNum = - EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits(); - EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum); - ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC, - DAG.getConstant(0, MVT::i64, false)); - ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC); - } - SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(), - ResCC, IfTrue, IfFalse); - return VSelect; - } - - // Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are - // vectors. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - EVT SEVT = MVT::i32; - if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32) - SEVT = MVT::i64; - SDValue AllOne = DAG.getConstant(-1, SEVT); - SDValue AllZero = DAG.getConstant(0, SEVT); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC, - AllOne, AllZero, A64cc); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - SetCC, AllOne, A64SELECT_CC, A64cc); - } - SDValue VDup; - if (IfTrue.getValueType().getVectorNumElements() == 1) - VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, CondVT, A64SELECT_CC); - else - VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, CondVT, A64SELECT_CC); - SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(), - VDup, IfTrue, IfFalse); - return VSelect; -} - -// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) -SDValue -AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue IfTrue = Op.getOperand(2); - SDValue IfFalse = Op.getOperand(3); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - - if (IfTrue.getValueType().isVector()) - return LowerVectorSELECT_CC(Op, DAG); - - if (LHS.getValueType() == MVT::f128) { - // f128 comparisons are lowered to libcalls, but slot in nicely here - // afterwards. - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (!RHS.getNode()) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue A64cc; - - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); - - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp, - IfTrue, IfFalse, A64cc); - } - - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, - Op.getValueType(), - SetCC, IfTrue, IfFalse, A64cc); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - SetCC, IfTrue, A64SELECT_CC, A64cc); - - } - - return A64SELECT_CC; -} - -SDValue -AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { - const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); - const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - - // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes - // rather than just 8. - return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), - Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(32, MVT::i32), 8, false, false, - MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); -} - -SDValue -AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - // The layout of the va_list struct is specified in the AArch64 Procedure Call - // Standard, section B.3. - MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo<AArch64MachineFunctionInfo>(); - SDLoc DL(Op); - - SDValue Chain = Op.getOperand(0); - SDValue VAList = Op.getOperand(1); - const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - SmallVector<SDValue, 4> MemOps; - - // void *__stack at offset 0 - SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(), - getPointerTy()); - MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, - MachinePointerInfo(SV), false, false, 0)); - - // void *__gr_top at offset 8 - int GPRSize = FuncInfo->getVariadicGPRSize(); - if (GPRSize > 0) { - SDValue GRTop, GRTopAddr; - - GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(8, getPointerTy())); - - GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy()); - GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, - DAG.getConstant(GPRSize, getPointerTy())); - - MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, - MachinePointerInfo(SV, 8), - false, false, 0)); - } - - // void *__vr_top at offset 16 - int FPRSize = FuncInfo->getVariadicFPRSize(); - if (FPRSize > 0) { - SDValue VRTop, VRTopAddr; - VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(16, getPointerTy())); - - VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy()); - VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, - DAG.getConstant(FPRSize, getPointerTy())); - - MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, - MachinePointerInfo(SV, 16), - false, false, 0)); - } - - // int __gr_offs at offset 24 - SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(24, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), - GROffsAddr, MachinePointerInfo(SV, 24), - false, false, 0)); - - // int __vr_offs at offset 28 - SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(28, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), - VROffsAddr, MachinePointerInfo(SV, 28), - false, false, 0)); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); -} - -SDValue -AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: llvm_unreachable("Don't know how to custom lower this!"); - case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128); - case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128); - case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128); - case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128); - case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true); - case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false); - case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true); - case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false); - case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); - case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - - case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); - case ISD::SRL_PARTS: - case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); - - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::BRCOND: return LowerBRCOND(Op, DAG); - case ISD::BR_CC: return LowerBR_CC(Op, DAG); - case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::VACOPY: return LowerVACOPY(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG, getSubtarget()); - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); - case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); - } - - return SDValue(); -} - -/// Check if the specified splat value corresponds to a valid vector constant -/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If -/// so, return the encoded 8-bit immediate and the OpCmode instruction fields -/// values. -static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, - unsigned SplatBitSize, SelectionDAG &DAG, - bool is128Bits, NeonModImmType type, EVT &VT, - unsigned &Imm, unsigned &OpCmode) { - switch (SplatBitSize) { - default: - llvm_unreachable("unexpected size for isNeonModifiedImm"); - case 8: { - if (type != Neon_Mov_Imm) - return false; - assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); - // Neon movi per byte: Op=0, Cmode=1110. - OpCmode = 0xe; - Imm = SplatBits; - VT = is128Bits ? MVT::v16i8 : MVT::v8i8; - break; - } - case 16: { - // Neon move inst per halfword - VT = is128Bits ? MVT::v8i16 : MVT::v4i16; - if ((SplatBits & ~0xff) == 0) { - // Value = 0x00nn is 0x00nn LSL 0 - // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000 - // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001 - // Op=x, Cmode=100y - Imm = SplatBits; - OpCmode = 0x8; - break; - } - if ((SplatBits & ~0xff00) == 0) { - // Value = 0xnn00 is 0x00nn LSL 8 - // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010 - // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011 - // Op=x, Cmode=101x - Imm = SplatBits >> 8; - OpCmode = 0xa; - break; - } - // can't handle any other - return false; - } - - case 32: { - // First the LSL variants (MSL is unusable by some interested instructions). - - // Neon move instr per word, shift zeros - VT = is128Bits ? MVT::v4i32 : MVT::v2i32; - if ((SplatBits & ~0xff) == 0) { - // Value = 0x000000nn is 0x000000nn LSL 0 - // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000 - // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001 - // Op=x, Cmode=000x - Imm = SplatBits; - OpCmode = 0; - break; - } - if ((SplatBits & ~0xff00) == 0) { - // Value = 0x0000nn00 is 0x000000nn LSL 8 - // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010 - // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011 - // Op=x, Cmode=001x - Imm = SplatBits >> 8; - OpCmode = 0x2; - break; - } - if ((SplatBits & ~0xff0000) == 0) { - // Value = 0x00nn0000 is 0x000000nn LSL 16 - // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100 - // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101 - // Op=x, Cmode=010x - Imm = SplatBits >> 16; - OpCmode = 0x4; - break; - } - if ((SplatBits & ~0xff000000) == 0) { - // Value = 0xnn000000 is 0x000000nn LSL 24 - // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110 - // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111 - // Op=x, Cmode=011x - Imm = SplatBits >> 24; - OpCmode = 0x6; - break; - } - - // Now the MSL immediates. - - // Neon move instr per word, shift ones - if ((SplatBits & ~0xffff) == 0 && - ((SplatBits | SplatUndef) & 0xff) == 0xff) { - // Value = 0x0000nnff is 0x000000nn MSL 8 - // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100 - // Op=x, Cmode=1100 - Imm = SplatBits >> 8; - OpCmode = 0xc; - break; - } - if ((SplatBits & ~0xffffff) == 0 && - ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { - // Value = 0x00nnffff is 0x000000nn MSL 16 - // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101 - // Op=x, Cmode=1101 - Imm = SplatBits >> 16; - OpCmode = 0xd; - break; - } - // can't handle any other - return false; - } - - case 64: { - if (type != Neon_Mov_Imm) - return false; - // Neon move instr bytemask, where each byte is either 0x00 or 0xff. - // movi Op=1, Cmode=1110. - OpCmode = 0x1e; - uint64_t BitMask = 0xff; - uint64_t Val = 0; - unsigned ImmMask = 1; - Imm = 0; - for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { - if (((SplatBits | SplatUndef) & BitMask) == BitMask) { - Val |= BitMask; - Imm |= ImmMask; - } else if ((SplatBits & BitMask) != 0) { - return false; - } - BitMask <<= 8; - ImmMask <<= 1; - } - SplatBits = Val; - VT = is128Bits ? MVT::v2i64 : MVT::v1i64; - break; - } - } - - return true; -} - -static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - // We're looking for an SRA/SHL pair which form an SBFX. - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - if (!isa<ConstantSDNode>(N->getOperand(1))) - return SDValue(); - - uint64_t TruncMask = N->getConstantOperandVal(1); - if (!isMask_64(TruncMask)) - return SDValue(); - - uint64_t Width = CountPopulation_64(TruncMask); - SDValue Shift = N->getOperand(0); - - if (Shift.getOpcode() != ISD::SRL) - return SDValue(); - - if (!isa<ConstantSDNode>(Shift->getOperand(1))) - return SDValue(); - uint64_t LSB = Shift->getConstantOperandVal(1); - - if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) - return SDValue(); - - return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0), - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(LSB + Width - 1, MVT::i64)); -} - -/// For a true bitfield insert, the bits getting into that contiguous mask -/// should come from the low part of an existing value: they must be formed from -/// a compatible SHL operation (unless they're already low). This function -/// checks that condition and returns the least-significant bit that's -/// intended. If the operation not a field preparation, -1 is returned. -static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT, - SDValue &MaskedVal, uint64_t Mask) { - if (!isShiftedMask_64(Mask)) - return -1; - - // Now we need to alter MaskedVal so that it is an appropriate input for a BFI - // instruction. BFI will do a left-shift by LSB before applying the mask we've - // spotted, so in general we should pre-emptively "undo" that by making sure - // the incoming bits have had a right-shift applied to them. - // - // This right shift, however, will combine with existing left/right shifts. In - // the simplest case of a completely straight bitfield operation, it will be - // expected to completely cancel out with an existing SHL. More complicated - // cases (e.g. bitfield to bitfield copy) may still need a real shift before - // the BFI. - - uint64_t LSB = countTrailingZeros(Mask); - int64_t ShiftRightRequired = LSB; - if (MaskedVal.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(MaskedVal.getOperand(1))) { - ShiftRightRequired -= MaskedVal.getConstantOperandVal(1); - MaskedVal = MaskedVal.getOperand(0); - } else if (MaskedVal.getOpcode() == ISD::SRL && - isa<ConstantSDNode>(MaskedVal.getOperand(1))) { - ShiftRightRequired += MaskedVal.getConstantOperandVal(1); - MaskedVal = MaskedVal.getOperand(0); - } - - if (ShiftRightRequired > 0) - MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal, - DAG.getConstant(ShiftRightRequired, MVT::i64)); - else if (ShiftRightRequired < 0) { - // We could actually end up with a residual left shift, for example with - // "struc.bitfield = val << 1". - MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal, - DAG.getConstant(-ShiftRightRequired, MVT::i64)); - } - - return LSB; -} - -/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by -/// a mask and an extension. Returns true if a BFI was found and provides -/// information on its surroundings. -static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, - bool &Extended) { - Extended = false; - if (N.getOpcode() == ISD::ZERO_EXTEND) { - Extended = true; - N = N.getOperand(0); - } - - if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { - Mask = N->getConstantOperandVal(1); - N = N.getOperand(0); - } else { - // Mask is the whole width. - Mask = -1ULL >> (64 - N.getValueType().getSizeInBits()); - } - - if (N.getOpcode() == AArch64ISD::BFI) { - BFI = N; - return true; - } - - return false; -} - -/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which -/// is roughly equivalent to (and (BFI ...), mask). This form is used because it -/// can often be further combined with a larger mask. Ultimately, we want mask -/// to be 2^32-1 or 2^64-1 so the AND can be skipped. -static SDValue tryCombineToBFI(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - assert(N->getOpcode() == ISD::OR && "Unexpected root"); - - // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or - // abandon the effort. - SDValue LHS = N->getOperand(0); - if (LHS.getOpcode() != ISD::AND) - return SDValue(); - - uint64_t LHSMask; - if (isa<ConstantSDNode>(LHS.getOperand(1))) - LHSMask = LHS->getConstantOperandVal(1); - else - return SDValue(); - - // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask - // is or abandon the effort. - SDValue RHS = N->getOperand(1); - if (RHS.getOpcode() != ISD::AND) - return SDValue(); - - uint64_t RHSMask; - if (isa<ConstantSDNode>(RHS.getOperand(1))) - RHSMask = RHS->getConstantOperandVal(1); - else - return SDValue(); - - // Can't do anything if the masks are incompatible. - if (LHSMask & RHSMask) - return SDValue(); - - // Now we need one of the masks to be a contiguous field. Without loss of - // generality that should be the RHS one. - SDValue Bitfield = LHS.getOperand(0); - if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) { - // We know that LHS is a candidate new value, and RHS isn't already a better - // one. - std::swap(LHS, RHS); - std::swap(LHSMask, RHSMask); - } - - // We've done our best to put the right operands in the right places, all we - // can do now is check whether a BFI exists. - Bitfield = RHS.getOperand(0); - int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask); - if (LSB == -1) - return SDValue(); - - uint32_t Width = CountPopulation_64(RHSMask); - assert(Width && "Expected non-zero bitfield width"); - - SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, - LHS.getOperand(0), Bitfield, - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(Width, MVT::i64)); - - // Mask is trivial - if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits()))) - return BFI; - - return DAG.getNode(ISD::AND, DL, VT, BFI, - DAG.getConstant(LHSMask | RHSMask, VT)); -} - -/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its -/// original input. This is surprisingly common because SROA splits things up -/// into i8 chunks, so the originally detected MaskedBFI may actually only act -/// on the low (say) byte of a word. This is then orred into the rest of the -/// word afterwards. -/// -/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)). -/// -/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the -/// MaskedBFI. We can also deal with a certain amount of extend/truncate being -/// involved. -static SDValue tryCombineToLargerBFI(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - // First job is to hunt for a MaskedBFI on either the left or right. Swap - // operands if it's actually on the right. - SDValue BFI; - SDValue PossExtraMask; - uint64_t ExistingMask = 0; - bool Extended = false; - if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended)) - PossExtraMask = N->getOperand(1); - else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended)) - PossExtraMask = N->getOperand(0); - else - return SDValue(); - - // We can only combine a BFI with another compatible mask. - if (PossExtraMask.getOpcode() != ISD::AND || - !isa<ConstantSDNode>(PossExtraMask.getOperand(1))) - return SDValue(); - - uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1); - - // Masks must be compatible. - if (ExtraMask & ExistingMask) - return SDValue(); - - SDValue OldBFIVal = BFI.getOperand(0); - SDValue NewBFIVal = BFI.getOperand(1); - if (Extended) { - // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be - // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments - // need to be made compatible. - assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32 - && "Invalid types for BFI"); - OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal); - NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal); - } - - // We need the MaskedBFI to be combined with a mask of the *same* value. - if (PossExtraMask.getOperand(0) != OldBFIVal) - return SDValue(); - - BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, - OldBFIVal, NewBFIVal, - BFI.getOperand(2), BFI.getOperand(3)); - - // If the masking is trivial, we don't need to create it. - if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits()))) - return BFI; - - return DAG.getNode(ISD::AND, DL, VT, BFI, - DAG.getConstant(ExtraMask | ExistingMask, VT)); -} - -/// An EXTR instruction is made up of two shifts, ORed together. This helper -/// searches for and classifies those shifts. -static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, - bool &FromHi) { - if (N.getOpcode() == ISD::SHL) - FromHi = false; - else if (N.getOpcode() == ISD::SRL) - FromHi = true; - else - return false; - - if (!isa<ConstantSDNode>(N.getOperand(1))) - return false; - - ShiftAmount = N->getConstantOperandVal(1); - Src = N->getOperand(0); - return true; -} - -/// EXTR instruction extracts a contiguous chunk of bits from two existing -/// registers viewed as a high/low pair. This function looks for the pattern: -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an -/// EXTR. Can't quite be done in TableGen because the two immediates aren't -/// independent. -static SDValue tryCombineToEXTR(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - assert(N->getOpcode() == ISD::OR && "Unexpected root"); - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - SDValue LHS; - uint32_t ShiftLHS = 0; - bool LHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) - return SDValue(); - - SDValue RHS; - uint32_t ShiftRHS = 0; - bool RHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) - return SDValue(); - - // If they're both trying to come from the high part of the register, they're - // not really an EXTR. - if (LHSFromHi == RHSFromHi) - return SDValue(); - - if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) - return SDValue(); - - if (LHSFromHi) { - std::swap(LHS, RHS); - std::swap(ShiftLHS, ShiftRHS); - } - - return DAG.getNode(AArch64ISD::EXTR, DL, VT, - LHS, RHS, - DAG.getConstant(ShiftRHS, MVT::i64)); -} - -/// Target-specific dag combine xforms for ISD::OR -static SDValue PerformORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - // Attempt to recognise bitfield-insert operations. - SDValue Res = tryCombineToBFI(N, DCI, Subtarget); - if (Res.getNode()) - return Res; - - // Attempt to combine an existing MaskedBFI operation into one with a larger - // mask. - Res = tryCombineToLargerBFI(N, DCI, Subtarget); - if (Res.getNode()) - return Res; - - Res = tryCombineToEXTR(N, DCI); - if (Res.getNode()) - return Res; - - if (!Subtarget->hasNEON()) - return SDValue(); - - // Attempt to use vector immediate-form BSL - // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. - - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND) - return SDValue(); - - SDValue N1 = N->getOperand(1); - if (N1.getOpcode() != ISD::AND) - return SDValue(); - - if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); - APInt SplatBits0; - if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && - !HasAnyUndefs) { - BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); - APInt SplatBits1; - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs && - SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && - SplatBits0 == ~SplatBits1) { - - return DAG.getNode(ISD::VSELECT, DL, VT, N0->getOperand(1), - N0->getOperand(0), N1->getOperand(0)); - } - } - } - - return SDValue(); -} - -/// Target-specific dag combine xforms for ISD::SRA -static SDValue PerformSRACombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - // We're looking for an SRA/SHL pair which form an SBFX. - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - if (!isa<ConstantSDNode>(N->getOperand(1))) - return SDValue(); - - uint64_t ExtraSignBits = N->getConstantOperandVal(1); - SDValue Shift = N->getOperand(0); - - if (Shift.getOpcode() != ISD::SHL) - return SDValue(); - - if (!isa<ConstantSDNode>(Shift->getOperand(1))) - return SDValue(); - - uint64_t BitsOnLeft = Shift->getConstantOperandVal(1); - uint64_t Width = VT.getSizeInBits() - ExtraSignBits; - uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft; - - if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) - return SDValue(); - - return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0), - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(LSB + Width - 1, MVT::i64)); -} - -/// Check if this is a valid build_vector for the immediate operand of -/// a vector shift operation, where all the elements of the build_vector -/// must have the same constant integer value. -static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { - // Ignore bit_converts. - while (Op.getOpcode() == ISD::BITCAST) - Op = Op.getOperand(0); - BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, - HasAnyUndefs, ElementBits) || - SplatBitSize > ElementBits) - return false; - Cnt = SplatBits.getSExtValue(); - return true; -} - -/// Check if this is a valid build_vector for the immediate operand of -/// a vector shift left operation. That value must be in the range: -/// 0 <= Value < ElementBits -static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - return (Cnt >= 0 && Cnt < ElementBits); -} - -/// Check if this is a valid build_vector for the immediate operand of a -/// vector shift right operation. The value must be in the range: -/// 1 <= Value <= ElementBits -static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - return (Cnt >= 1 && Cnt <= ElementBits); -} - -static SDValue GenForSextInreg(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - EVT SrcVT, EVT DestVT, EVT SubRegVT, - const int *Mask, SDValue Src) { - SelectionDAG &DAG = DCI.DAG; - SDValue Bitcast - = DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src); - SDValue Sext - = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast); - SDValue ShuffleVec - = DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask); - SDValue ExtractSubreg - = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), - SubRegVT, ShuffleVec, - DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0); - return ExtractSubreg; -} - -/// Checks for vector shifts and lowers them. -static SDValue PerformShiftCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *ST) { - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); - if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64)) - return PerformSRACombine(N, DCI); - - // We're looking for an SRA/SHL pair to help generating instruction - // sshll v0.8h, v0.8b, #0 - // The instruction STXL is also the alias of this instruction. - // - // For example, for DAG like below, - // v2i32 = sra (v2i32 (shl v2i32, 16)), 16 - // we can transform it into - // v2i32 = EXTRACT_SUBREG - // (v4i32 (suffle_vector - // (v4i32 (sext (v4i16 (bitcast v2i32))), - // undef, (0, 2, u, u)), - // sub_64 - // - // With this transformation we expect to generate "SSHLL + UZIP1" - // Sometimes UZIP1 can be optimized away by combining with other context. - int64_t ShrCnt, ShlCnt; - if (N->getOpcode() == ISD::SRA - && (VT == MVT::v2i32 || VT == MVT::v4i16) - && isVShiftRImm(N->getOperand(1), VT, ShrCnt) - && N->getOperand(0).getOpcode() == ISD::SHL - && isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) { - SDValue Src = N->getOperand(0).getOperand(0); - if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) { - // sext_inreg(v2i32, v2i16) - // We essentially only care the Mask {0, 2, u, u} - int Mask[4] = {0, 2, 4, 6}; - return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32, - Mask, Src); - } - else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) { - // sext_inreg(v2i16, v2i8) - // We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u} - int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14}; - return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32, - Mask, Src); - } - else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) { - // sext_inreg(v4i16, v4i8) - // We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u} - int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14}; - return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16, - Mask, Src); - } - } - - // Nothing to be done for scalar shifts. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!VT.isVector() || !TLI.isTypeLegal(VT)) - return SDValue(); - - assert(ST->hasNEON() && "unexpected vector shift"); - int64_t Cnt; - - switch (N->getOpcode()) { - default: - llvm_unreachable("unexpected shift opcode"); - - case ISD::SHL: - if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { - SDValue RHS = - DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, - DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); - } - break; - - case ISD::SRA: - case ISD::SRL: - if (isVShiftRImm(N->getOperand(1), VT, Cnt)) { - SDValue RHS = - DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, - DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS); - } - break; - } - - return SDValue(); -} - -/// ARM-specific DAG combining for intrinsics. -static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - - switch (IntNo) { - default: - // Don't do anything for most intrinsics. - break; - - case Intrinsic::arm_neon_vqshifts: - case Intrinsic::arm_neon_vqshiftu: - EVT VT = N->getOperand(1).getValueType(); - int64_t Cnt; - if (!isVShiftLImm(N->getOperand(2), VT, Cnt)) - break; - unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts) - ? AArch64ISD::NEON_QSHLs - : AArch64ISD::NEON_QSHLu; - return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), - N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); - } - - return SDValue(); -} - -/// Target-specific DAG combine function for NEON load/store intrinsics -/// to merge base address updates. -static SDValue CombineBaseUpdate(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) - return SDValue(); - - SelectionDAG &DAG = DCI.DAG; - bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || - N->getOpcode() == ISD::INTRINSIC_W_CHAIN); - unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); - SDValue Addr = N->getOperand(AddrOpIdx); - - // Search for a use of the address operand that is an increment. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - if (User->getOpcode() != ISD::ADD || - UI.getUse().getResNo() != Addr.getResNo()) - continue; - - // Check that the add is independent of the load/store. Otherwise, folding - // it would create a cycle. - if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) - continue; - - // Find the new opcode for the updating load/store. - bool isLoad = true; - bool isLaneOp = false; - unsigned NewOpc = 0; - unsigned NumVecs = 0; - if (isIntrinsic) { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); - switch (IntNo) { - default: llvm_unreachable("unexpected intrinsic for Neon base update"); - case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD; - NumVecs = 1; break; - case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD; - NumVecs = 2; break; - case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD; - NumVecs = 3; break; - case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD; - NumVecs = 4; break; - case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD; - NumVecs = 1; isLoad = false; break; - case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD; - NumVecs = 2; isLoad = false; break; - case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD; - NumVecs = 3; isLoad = false; break; - case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD; - NumVecs = 4; isLoad = false; break; - case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD; - NumVecs = 2; break; - case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD; - NumVecs = 3; break; - case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD; - NumVecs = 4; break; - case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD; - NumVecs = 2; isLoad = false; break; - case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD; - NumVecs = 3; isLoad = false; break; - case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD; - NumVecs = 4; isLoad = false; break; - case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD; - NumVecs = 2; isLaneOp = true; break; - case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD; - NumVecs = 3; isLaneOp = true; break; - case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD; - NumVecs = 4; isLaneOp = true; break; - case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD; - NumVecs = 2; isLoad = false; isLaneOp = true; break; - case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD; - NumVecs = 3; isLoad = false; isLaneOp = true; break; - case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD; - NumVecs = 4; isLoad = false; isLaneOp = true; break; - } - } else { - isLaneOp = true; - switch (N->getOpcode()) { - default: llvm_unreachable("unexpected opcode for Neon base update"); - case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD; - NumVecs = 2; break; - case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD; - NumVecs = 3; break; - case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD; - NumVecs = 4; break; - } - } - - // Find the size of memory referenced by the load/store. - EVT VecTy; - if (isLoad) - VecTy = N->getValueType(0); - else - VecTy = N->getOperand(AddrOpIdx + 1).getValueType(); - unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; - if (isLaneOp) - NumBytes /= VecTy.getVectorNumElements(); - - // If the increment is a constant, it must match the memory ref size. - SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); - if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { - uint32_t IncVal = CInc->getZExtValue(); - if (IncVal != NumBytes) - continue; - Inc = DAG.getTargetConstant(IncVal, MVT::i32); - } - - // Create the new updating load/store node. - EVT Tys[6]; - unsigned NumResultVecs = (isLoad ? NumVecs : 0); - unsigned n; - for (n = 0; n < NumResultVecs; ++n) - Tys[n] = VecTy; - Tys[n++] = MVT::i64; - Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs + 2)); - SmallVector<SDValue, 8> Ops; - Ops.push_back(N->getOperand(0)); // incoming chain - Ops.push_back(N->getOperand(AddrOpIdx)); - Ops.push_back(Inc); - for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { - Ops.push_back(N->getOperand(i)); - } - MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops, MemInt->getMemoryVT(), - MemInt->getMemOperand()); - - // Update the uses. - std::vector<SDValue> NewResults; - for (unsigned i = 0; i < NumResultVecs; ++i) { - NewResults.push_back(SDValue(UpdN.getNode(), i)); - } - NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain - DCI.CombineTo(N, NewResults); - DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); - - break; - } - return SDValue(); -} - -/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) -/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs. -/// If so, combine them to a vldN-dup operation and return true. -static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); - - // Check if the VDUPLANE operand is a vldN-dup intrinsic. - SDNode *VLD = N->getOperand(0).getNode(); - if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) - return SDValue(); - unsigned NumVecs = 0; - unsigned NewOpc = 0; - unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); - if (IntNo == Intrinsic::arm_neon_vld2lane) { - NumVecs = 2; - NewOpc = AArch64ISD::NEON_LD2DUP; - } else if (IntNo == Intrinsic::arm_neon_vld3lane) { - NumVecs = 3; - NewOpc = AArch64ISD::NEON_LD3DUP; - } else if (IntNo == Intrinsic::arm_neon_vld4lane) { - NumVecs = 4; - NewOpc = AArch64ISD::NEON_LD4DUP; - } else { - return SDValue(); - } - - // First check that all the vldN-lane uses are VDUPLANEs and that the lane - // numbers match the load. - unsigned VLDLaneNo = - cast<ConstantSDNode>(VLD->getOperand(NumVecs + 3))->getZExtValue(); - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); - UI != UE; ++UI) { - // Ignore uses of the chain result. - if (UI.getUse().getResNo() == NumVecs) - continue; - SDNode *User = *UI; - if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE || - VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) - return SDValue(); - } - - // Create the vldN-dup node. - EVT Tys[5]; - unsigned n; - for (n = 0; n < NumVecs; ++n) - Tys[n] = VT; - Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs + 1)); - SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; - MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); - SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, - VLDMemInt->getMemoryVT(), - VLDMemInt->getMemOperand()); - - // Update the uses. - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); - UI != UE; ++UI) { - unsigned ResNo = UI.getUse().getResNo(); - // Ignore uses of the chain result. - if (ResNo == NumVecs) - continue; - SDNode *User = *UI; - DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); - } - - // Now the vldN-lane intrinsic is dead except for its chain result. - // Update uses of the chain. - std::vector<SDValue> VLDDupResults; - for (unsigned n = 0; n < NumVecs; ++n) - VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); - VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); - DCI.CombineTo(VLD, VLDDupResults); - - return SDValue(N, 0); -} - -// vselect (v1i1 setcc) -> -// vselect (v1iXX setcc) (XX is the size of the compared operand type) -// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as -// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine -// such VSELECT. -static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) { - SDValue N0 = N->getOperand(0); - EVT CCVT = N0.getValueType(); - - if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 || - CCVT.getVectorElementType() != MVT::i1) - return SDValue(); - - EVT ResVT = N->getValueType(0); - EVT CmpVT = N0.getOperand(0).getValueType(); - // Only combine when the result type is of the same size as the compared - // operands. - if (ResVT.getSizeInBits() != CmpVT.getSizeInBits()) - return SDValue(); - - SDValue IfTrue = N->getOperand(1); - SDValue IfFalse = N->getOperand(2); - SDValue SetCC = - DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, - IfTrue, IfFalse); -} - -// sign_extend (extract_vector_elt (v1i1 setcc)) -> -// extract_vector_elt (v1iXX setcc) -// (XX is the size of the compared operand type) -static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) { - SDValue N0 = N->getOperand(0); - SDValue Vec = N0.getOperand(0); - - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Vec.getOpcode() != ISD::SETCC) - return SDValue(); - - EVT ResVT = N->getValueType(0); - EVT CmpVT = Vec.getOperand(0).getValueType(); - // Only optimize when the result type is of the same size as the element - // type of the compared operand. - if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits()) - return SDValue(); - - SDValue Lane = N0.getOperand(1); - SDValue SetCC = - DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), - Vec.getOperand(0), Vec.getOperand(1), - cast<CondCodeSDNode>(Vec.getOperand(2))->get()); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT, - SetCC, Lane); -} - -SDValue -AArch64TargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - switch (N->getOpcode()) { - default: break; - case ISD::AND: return PerformANDCombine(N, DCI); - case ISD::OR: return PerformORCombine(N, DCI, getSubtarget()); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - return PerformShiftCombine(N, DCI, getSubtarget()); - case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG); - case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG); - case ISD::INTRINSIC_WO_CHAIN: - return PerformIntrinsicCombine(N, DCI.DAG); - case AArch64ISD::NEON_VDUPLANE: - return CombineVLDDUP(N, DCI); - case AArch64ISD::NEON_LD2DUP: - case AArch64ISD::NEON_LD3DUP: - case AArch64ISD::NEON_LD4DUP: - return CombineBaseUpdate(N, DCI); - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::arm_neon_vst1: - case Intrinsic::arm_neon_vst2: - case Intrinsic::arm_neon_vst3: - case Intrinsic::arm_neon_vst4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: - case Intrinsic::aarch64_neon_vld1x2: - case Intrinsic::aarch64_neon_vld1x3: - case Intrinsic::aarch64_neon_vld1x4: - case Intrinsic::aarch64_neon_vst1x2: - case Intrinsic::aarch64_neon_vst1x3: - case Intrinsic::aarch64_neon_vst1x4: - case Intrinsic::arm_neon_vst2lane: - case Intrinsic::arm_neon_vst3lane: - case Intrinsic::arm_neon_vst4lane: - return CombineBaseUpdate(N, DCI); - default: - break; - } - } - return SDValue(); -} - -bool -AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - VT = VT.getScalarType(); - - if (!VT.isSimple()) - return false; - - switch (VT.getSimpleVT().SimpleTy) { - case MVT::f16: - case MVT::f32: - case MVT::f64: - return true; - case MVT::f128: - return false; - default: - break; - } - - return false; -} - -bool AArch64TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - bool *Fast) const { - const AArch64Subtarget *Subtarget = getSubtarget(); - // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus - bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); - - switch (VT.getSimpleVT().SimpleTy) { - default: - return false; - // Scalar types - case MVT::i8: case MVT::i16: - case MVT::i32: case MVT::i64: - case MVT::f32: case MVT::f64: { - // Unaligned access can use (for example) LRDB, LRDH, LDRW - if (AllowsUnaligned) { - if (Fast) - *Fast = true; - return true; - } - return false; - } - // 64-bit vector types - case MVT::v8i8: case MVT::v4i16: - case MVT::v2i32: case MVT::v1i64: - case MVT::v2f32: case MVT::v1f64: - // 128-bit vector types - case MVT::v16i8: case MVT::v8i16: - case MVT::v4i32: case MVT::v2i64: - case MVT::v4f32: case MVT::v2f64: { - // For any little-endian targets with neon, we can support unaligned - // load/store of V registers using ld1/st1. - // A big-endian target may also explicitly support unaligned accesses - if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { - if (Fast) - *Fast = true; - return true; - } - return false; - } - } -} - -// Check whether a shuffle_vector could be presented as concat_vector. -bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG, - SDValue V0, SDValue V1, - const int *Mask, - SDValue &Res) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - if (VT.getSizeInBits() != 128) - return false; - if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() || - VT.getVectorElementType() != V1.getValueType().getVectorElementType()) - return false; - - unsigned NumElts = VT.getVectorNumElements(); - bool isContactVector = true; - bool splitV0 = false; - if (V0.getValueType().getSizeInBits() == 128) - splitV0 = true; - - for (int I = 0, E = NumElts / 2; I != E; I++) { - if (Mask[I] != I) { - isContactVector = false; - break; - } - } - - if (isContactVector) { - int offset = NumElts / 2; - for (int I = NumElts / 2, E = NumElts; I != E; I++) { - if (Mask[I] != I + splitV0 * offset) { - isContactVector = false; - break; - } - } - } - - if (isContactVector) { - EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - NumElts / 2); - if (splitV0) { - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, - DAG.getConstant(0, MVT::i64)); - } - if (V1.getValueType().getSizeInBits() == 128) { - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, - DAG.getConstant(0, MVT::i64)); - } - Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); - return true; - } - return false; -} - -// Check whether a Build Vector could be presented as Shuffle Vector. -// This Shuffle Vector maybe not legalized, so the length of its operand and -// the length of result may not equal. -bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, - SDValue &V0, SDValue &V1, - int *Mask) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned V0NumElts = 0; - - // Check if all elements are extracted from less than 3 vectors. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Elt = Op.getOperand(i); - if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Elt.getOperand(0).getValueType().getVectorElementType() != - VT.getVectorElementType()) - return false; - - if (!V0.getNode()) { - V0 = Elt.getOperand(0); - V0NumElts = V0.getValueType().getVectorNumElements(); - } - if (Elt.getOperand(0) == V0) { - Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue()); - continue; - } else if (!V1.getNode()) { - V1 = Elt.getOperand(0); - } - if (Elt.getOperand(0) == V1) { - unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue(); - Mask[i] = (Lane + V0NumElts); - continue; - } else { - return false; - } - } - return true; -} - -// LowerShiftRightParts - Lower SRL_PARTS and SRA_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a quad-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - - assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - SDValue Tmp3 = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - - SDValue A64cc; - SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt, - DAG.getConstant(0, MVT::i64), - ISD::SETGE, A64cc, - DAG, dl); - - SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(0, Tmp3.getValueType()), Tmp3, - A64cc); - SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - TrueVal, FalseVal, A64cc); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); -} - -/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a quad-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - - assert(Op.getOpcode() == ISD::SHL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue Tmp4 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - - SDValue A64cc; - SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt, - DAG.getConstant(0, MVT::i64), - ISD::SETGE, A64cc, - DAG, dl); - - SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(0, Tmp4.getValueType()), Tmp4, - A64cc); - SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - Tmp3, FalseVal, A64cc); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); -} - -// If this is a case we can't handle, return null and let the default -// expansion code take care of it. -SDValue -AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const AArch64Subtarget *ST) const { - - BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); - SDLoc DL(Op); - EVT VT = Op.getValueType(); - - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - - unsigned UseNeonMov = VT.getSizeInBits() >= 64; - - // Note we favor lowering MOVI over MVNI. - // This has implications on the definition of patterns in TableGen to select - // BIC immediate instructions but not ORR immediate instructions. - // If this lowering order is changed, TableGen patterns for BIC immediate and - // ORR immediate instructions have to be updated. - if (UseNeonMov && - BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - if (SplatBitSize <= 64) { - // First attempt to use vector immediate-form MOVI - EVT NeonMovVT; - unsigned Imm = 0; - unsigned OpCmode = 0; - - if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), - SplatBitSize, DAG, VT.is128BitVector(), - Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) { - SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); - SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); - - if (ImmVal.getNode() && OpCmodeVal.getNode()) { - SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT, - ImmVal, OpCmodeVal); - return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); - } - } - - // Then attempt to use vector immediate-form MVNI - uint64_t NegatedImm = (~SplatBits).getZExtValue(); - if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, - DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT, - Imm, OpCmode)) { - SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); - SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); - if (ImmVal.getNode() && OpCmodeVal.getNode()) { - SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT, - ImmVal, OpCmodeVal); - return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); - } - } - - // Attempt to use vector immediate-form FMOV - if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) || - (VT == MVT::v2f64 && SplatBitSize == 64)) { - APFloat RealVal( - SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble, - SplatBits); - uint32_t ImmVal; - if (A64Imms::isFPImm(RealVal, ImmVal)) { - SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); - return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val); - } - } - } - } - - unsigned NumElts = VT.getVectorNumElements(); - bool isOnlyLowElement = true; - bool usesOnlyOneValue = true; - bool hasDominantValue = false; - bool isConstant = true; - - // Map of the number of times a particular SDValue appears in the - // element list. - DenseMap<SDValue, unsigned> ValueCounts; - SDValue Value; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - if (i > 0) - isOnlyLowElement = false; - if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) - isConstant = false; - - ValueCounts.insert(std::make_pair(V, 0)); - unsigned &Count = ValueCounts[V]; - - // Is this value dominant? (takes up more than half of the lanes) - if (++Count > (NumElts / 2)) { - hasDominantValue = true; - Value = V; - } - } - if (ValueCounts.size() != 1) - usesOnlyOneValue = false; - if (!Value.getNode() && ValueCounts.size() > 0) - Value = ValueCounts.begin()->first; - - if (ValueCounts.size() == 0) - return DAG.getUNDEF(VT); - - if (isOnlyLowElement) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); - - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (hasDominantValue && EltSize <= 64) { - // Use VDUP for non-constant splats. - if (!isConstant) { - SDValue N; - - // If we are DUPing a value that comes directly from a vector, we could - // just use DUPLANE. We can only do this if the lane being extracted - // is at a constant index, as the DUP from lane instructions only have - // constant-index forms. - // - // If there is a TRUNCATE between EXTRACT_VECTOR_ELT and DUP, we can - // remove TRUNCATE for DUPLANE by apdating the source vector to - // appropriate vector type and lane index. - // - // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they - // are not legal any more, no need to check the type size in bits should - // be large than 64. - SDValue V = Value; - if (Value->getOpcode() == ISD::TRUNCATE) - V = Value->getOperand(0); - if (V->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa<ConstantSDNode>(V->getOperand(1)) && - V->getOperand(0).getValueType().getSizeInBits() >= 64) { - - // If the element size of source vector is larger than DUPLANE - // element size, we can do transformation by, - // 1) bitcasting source register to smaller element vector - // 2) mutiplying the lane index by SrcEltSize/ResEltSize - // For example, we can lower - // "v8i16 vdup_lane(v4i32, 1)" - // to be - // "v8i16 vdup_lane(v8i16 bitcast(v4i32), 2)". - SDValue SrcVec = V->getOperand(0); - unsigned SrcEltSize = - SrcVec.getValueType().getVectorElementType().getSizeInBits(); - unsigned ResEltSize = VT.getVectorElementType().getSizeInBits(); - if (SrcEltSize > ResEltSize) { - assert((SrcEltSize % ResEltSize == 0) && "Invalid element size"); - SDValue BitCast; - unsigned SrcSize = SrcVec.getValueType().getSizeInBits(); - unsigned ResSize = VT.getSizeInBits(); - - if (SrcSize > ResSize) { - assert((SrcSize % ResSize == 0) && "Invalid vector size"); - EVT CastVT = - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - SrcSize / ResEltSize); - BitCast = DAG.getNode(ISD::BITCAST, DL, CastVT, SrcVec); - } else { - assert((SrcSize == ResSize) && "Invalid vector size of source vec"); - BitCast = DAG.getNode(ISD::BITCAST, DL, VT, SrcVec); - } - - unsigned LaneIdx = V->getConstantOperandVal(1); - SDValue Lane = - DAG.getConstant((SrcEltSize / ResEltSize) * LaneIdx, MVT::i64); - N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, BitCast, Lane); - } else { - assert((SrcEltSize == ResEltSize) && - "Invalid element size of source vec"); - N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, V->getOperand(0), - V->getOperand(1)); - } - } else - N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); - - if (!usesOnlyOneValue) { - // The dominant value was splatted as 'N', but we now have to insert - // all differing elements. - for (unsigned I = 0; I < NumElts; ++I) { - if (Op.getOperand(I) == Value) - continue; - SmallVector<SDValue, 3> Ops; - Ops.push_back(N); - Ops.push_back(Op.getOperand(I)); - Ops.push_back(DAG.getConstant(I, MVT::i64)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Ops); - } - } - return N; - } - if (usesOnlyOneValue && isConstant) { - return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); - } - } - // If all elements are constants and the case above didn't get hit, fall back - // to the default expansion, which will generate a load from the constant - // pool. - if (isConstant) - return SDValue(); - - // Try to lower this in lowering ShuffleVector way. - SDValue V0, V1; - int Mask[16]; - if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) { - unsigned V0NumElts = V0.getValueType().getVectorNumElements(); - if (!V1.getNode() && V0NumElts == NumElts * 2) { - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(NumElts, MVT::i64)); - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(0, MVT::i64)); - V0NumElts = V0.getValueType().getVectorNumElements(); - } - - if (V1.getNode() && NumElts == V0NumElts && - V0NumElts == V1.getValueType().getVectorNumElements()) { - SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); - if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) - return Shuffle; - else - return LowerVECTOR_SHUFFLE(Shuffle, DAG); - } else { - SDValue Res; - if (isConcatVector(Op, DAG, V0, V1, Mask, Res)) - return Res; - } - } - - // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we - // know the default expansion would otherwise fall back on something even - // worse. For a vector with one or two non-undef values, that's - // scalar_to_vector for the elements followed by a shuffle (provided the - // shuffle is valid for the target) and materialization element by element - // on the stack followed by a load for everything else. - if (!isConstant && !usesOnlyOneValue) { - SDValue Vec = DAG.getUNDEF(VT); - for (unsigned i = 0 ; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - SDValue LaneIdx = DAG.getConstant(i, MVT::i64); - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx); - } - return Vec; - } - return SDValue(); -} - -/// isREVMask - Check if a vector shuffle corresponds to a REV -/// instruction with the specified blocksize. (The order of the elements -/// within each block of the vector is reversed.) -static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { - assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && - "Only possible block sizes for REV are: 16, 32, 64"); - - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); - if (EltSz == 64) - return false; - - unsigned NumElts = VT.getVectorNumElements(); - unsigned BlockElts = M[0] + 1; - // If the first shuffle index is UNDEF, be optimistic. - if (M[0] < 0) - BlockElts = BlockSize / EltSz; - - if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) - return false; - - for (unsigned i = 0; i < NumElts; ++i) { - if (M[i] < 0) - continue; // ignore UNDEF indices - if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) - return false; - } - - return true; -} - -// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and -// TRN instruction. -static unsigned isPermuteMask(ArrayRef<int> M, EVT VT, bool isV2undef) { - unsigned NumElts = VT.getVectorNumElements(); - if (NumElts < 4) - return 0; - - bool ismatch = true; - - // Check UZP1 - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i * 2; - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_UZP1; - - // Check UZP2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i * 2 + 1; - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_UZP2; - - // Check ZIP1 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i / 2 + NumElts * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_ZIP1; - - // Check ZIP2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = (NumElts + i) / 2 + NumElts * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_ZIP2; - - // Check TRN1 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i + (NumElts - 1) * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_TRN1; - - // Check TRN2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = 1 + i + (NumElts - 1) * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_TRN2; - - return 0; -} - -SDValue -AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) const { - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); - - // Convert shuffles that are directly supported on NEON to target-specific - // DAG nodes, instead of keeping them as shuffles and matching them again - // during code selection. This is more efficient and avoids the possibility - // of inconsistencies between legalization and selection. - ArrayRef<int> ShuffleMask = SVN->getMask(); - - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (EltSize > 64) - return SDValue(); - - if (isREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1); - if (isREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1); - if (isREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1); - - unsigned ISDNo; - if (V2.getOpcode() == ISD::UNDEF) - ISDNo = isPermuteMask(ShuffleMask, VT, true); - else - ISDNo = isPermuteMask(ShuffleMask, VT, false); - - if (ISDNo) { - if (V2.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISDNo, dl, VT, V1, V1); - else - return DAG.getNode(ISDNo, dl, VT, V1, V2); - } - - SDValue Res; - if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res)) - return Res; - - // If the element of shuffle mask are all the same constant, we can - // transform it into either NEON_VDUP or NEON_VDUPLANE - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) Lane = 0; - - // Test if V1 is a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); - } - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::BUILD_VECTOR) { - bool IsScalarToVector = true; - for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF && - i != (unsigned)Lane) { - IsScalarToVector = false; - break; - } - if (IsScalarToVector) - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, - V1.getOperand(Lane)); - } - - // Test if V1 is a EXTRACT_SUBVECTOR. - if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { - int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue(); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0), - DAG.getConstant(Lane + ExtLane, MVT::i64)); - } - // Test if V1 is a CONCAT_VECTORS. - if (V1.getOpcode() == ISD::CONCAT_VECTORS && - V1.getOperand(1).getOpcode() == ISD::UNDEF) { - SDValue Op0 = V1.getOperand(0); - assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() && - "Invalid vector lane access"); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0, - DAG.getConstant(Lane, MVT::i64)); - } - - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, - DAG.getConstant(Lane, MVT::i64)); - } - - int Length = ShuffleMask.size(); - int V1EltNum = V1.getValueType().getVectorNumElements(); - - // If the number of v1 elements is the same as the number of shuffle mask - // element and the shuffle masks are sequential values, we can transform - // it into NEON_VEXTRACT. - if (V1EltNum == Length) { - // Check if the shuffle mask is sequential. - int SkipUndef = 0; - while (ShuffleMask[SkipUndef] == -1) { - SkipUndef++; - } - int CurMask = ShuffleMask[SkipUndef]; - if (CurMask >= SkipUndef) { - bool IsSequential = true; - for (int I = SkipUndef; I < Length; ++I) { - if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) { - IsSequential = false; - break; - } - CurMask++; - } - if (IsSequential) { - assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); - unsigned VecSize = EltSize * V1EltNum; - unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef); - if (VecSize == 64 || VecSize == 128) - return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, - DAG.getConstant(Index, MVT::i64)); - } - } - } - - // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert - // by element from V2 to V1 . - // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a - // better choice to be inserted than V1 as less insert needed, so we count - // element to be inserted for both V1 and V2, and select less one as insert - // target. - - // Collect elements need to be inserted and their index. - SmallVector<int, 8> NV1Elt; - SmallVector<int, 8> N1Index; - SmallVector<int, 8> NV2Elt; - SmallVector<int, 8> N2Index; - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != I) { - NV1Elt.push_back(ShuffleMask[I]); - N1Index.push_back(I); - } - } - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != (I + V1EltNum)) { - NV2Elt.push_back(ShuffleMask[I]); - N2Index.push_back(I); - } - } - - // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 - // will be inserted. - SDValue InsV = V1; - SmallVector<int, 8> InsMasks = NV1Elt; - SmallVector<int, 8> InsIndex = N1Index; - if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { - if (NV1Elt.size() > NV2Elt.size()) { - InsV = V2; - InsMasks = NV2Elt; - InsIndex = N2Index; - } - } else { - InsV = DAG.getNode(ISD::UNDEF, dl, VT); - } - - for (int I = 0, E = InsMasks.size(); I != E; ++I) { - SDValue ExtV = V1; - int Mask = InsMasks[I]; - if (Mask >= V1EltNum) { - ExtV = V2; - Mask -= V1EltNum; - } - // Any value type smaller than i32 is illegal in AArch64, and this lower - // function is called after legalize pass, so we need to legalize - // the result here. - EVT EltVT; - if (VT.getVectorElementType().isFloatingPoint()) - EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32; - else - EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32; - - if (Mask >= 0) { - ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, - DAG.getConstant(Mask, MVT::i64)); - InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV, - DAG.getConstant(InsIndex[I], MVT::i64)); - } - } - return InsV; -} - -AArch64TargetLowering::ConstraintType -AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - default: break; - case 'w': // An FP/SIMD vector register - return C_RegisterClass; - case 'I': // Constant that can be used with an ADD instruction - case 'J': // Constant that can be used with a SUB instruction - case 'K': // Constant that can be used with a 32-bit logical instruction - case 'L': // Constant that can be used with a 64-bit logical instruction - case 'M': // Constant that can be used as a 32-bit MOV immediate - case 'N': // Constant that can be used as a 64-bit MOV immediate - case 'Y': // Floating point constant zero - case 'Z': // Integer constant zero - return C_Other; - case 'Q': // A memory reference with base register and no offset - return C_Memory; - case 'S': // A symbolic address - return C_Other; - } - } - - // FIXME: Ump, Utf, Usa, Ush - // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, - // whatever they may be - // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be - // Usa: An absolute symbolic address - // Ush: The high part (bits 32:12) of a pc-relative symbolic address - assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa" - && Constraint != "Ush" && "Unimplemented constraints"); - - return TargetLowering::getConstraintType(Constraint); -} - -TargetLowering::ConstraintWeight -AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info, - const char *Constraint) const { - - llvm_unreachable("Constraint weight unimplemented"); -} - -void -AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const { - SDValue Result; - - // Only length 1 constraints are C_Other. - if (Constraint.size() != 1) return; - - // Only C_Other constraints get lowered like this. That means constants for us - // so return early if there's no hope the constraint can be lowered. - - switch(Constraint[0]) { - default: break; - case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'Z': { - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); - if (!C) - return; - - uint64_t CVal = C->getZExtValue(); - uint32_t Bits; - - switch (Constraint[0]) { - default: - // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J' - // is a peculiarly useless SUB constraint. - llvm_unreachable("Unimplemented C_Other constraint"); - case 'I': - if (CVal <= 0xfff) - break; - return; - case 'K': - if (A64Imms::isLogicalImm(32, CVal, Bits)) - break; - return; - case 'L': - if (A64Imms::isLogicalImm(64, CVal, Bits)) - break; - return; - case 'Z': - if (CVal == 0) - break; - return; - } - - Result = DAG.getTargetConstant(CVal, Op.getValueType()); - break; - } - case 'S': { - // An absolute symbolic address or label reference. - if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { - Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), - GA->getValueType(0)); - } else if (const BlockAddressSDNode *BA - = dyn_cast<BlockAddressSDNode>(Op)) { - Result = DAG.getTargetBlockAddress(BA->getBlockAddress(), - BA->getValueType(0)); - } else if (const ExternalSymbolSDNode *ES - = dyn_cast<ExternalSymbolSDNode>(Op)) { - Result = DAG.getTargetExternalSymbol(ES->getSymbol(), - ES->getValueType(0)); - } else - return; - break; - } - case 'Y': - if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) { - if (CFP->isExactlyValue(0.0)) { - Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0)); - break; - } - } - return; - } - - if (Result.getNode()) { - Ops.push_back(Result); - return; - } - - // It's an unknown constraint for us. Let generic code have a go. - TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); -} - -std::pair<unsigned, const TargetRegisterClass*> -AArch64TargetLowering::getRegForInlineAsmConstraint( - const std::string &Constraint, - MVT VT) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - case 'r': - if (VT.getSizeInBits() <= 32) - return std::make_pair(0U, &AArch64::GPR32RegClass); - else if (VT == MVT::i64) - return std::make_pair(0U, &AArch64::GPR64RegClass); - break; - case 'w': - if (VT == MVT::f16) - return std::make_pair(0U, &AArch64::FPR16RegClass); - else if (VT == MVT::f32) - return std::make_pair(0U, &AArch64::FPR32RegClass); - else if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &AArch64::FPR64RegClass); - else if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &AArch64::FPR128RegClass); - break; - } - } - - // Use the default implementation in TargetLowering to convert the register - // constraint into a member of a register class. - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); -} - -/// Represent NEON load and store intrinsics as MemIntrinsicNodes. -/// The associated MachineMemOperands record the alignment specified -/// in the intrinsic calls. -bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, - unsigned Intrinsic) const { - switch (Intrinsic) { - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::aarch64_neon_vld1x2: - case Intrinsic::aarch64_neon_vld1x3: - case Intrinsic::aarch64_neon_vld1x4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm_neon_vst1: - case Intrinsic::arm_neon_vst2: - case Intrinsic::arm_neon_vst3: - case Intrinsic::arm_neon_vst4: - case Intrinsic::aarch64_neon_vst1x2: - case Intrinsic::aarch64_neon_vst1x3: - case Intrinsic::aarch64_neon_vst1x4: - case Intrinsic::arm_neon_vst2lane: - case Intrinsic::arm_neon_vst3lane: - case Intrinsic::arm_neon_vst4lane: { - Info.opc = ISD::INTRINSIC_VOID; - // Conservatively set memVT to the entire set of vectors stored. - unsigned NumElts = 0; - for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { - Type *ArgTy = I.getArgOperand(ArgI)->getType(); - if (!ArgTy->isVectorTy()) - break; - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; - } - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; - return true; - } - default: - break; - } - - return false; -} - -// Truncations from 64-bit GPR to 32-bit GPR is free. -bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) - return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; -} - -bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) - return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; -} - -// All 32-bit GPR operations implicitly zero the high-half of the corresponding -// 64-bit GPR. -bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) - return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; -} - -bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) - return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; -} - -bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { - EVT VT1 = Val.getValueType(); - if (isZExtFree(VT1, VT2)) { - return true; - } - - if (Val.getOpcode() != ISD::LOAD) - return false; - - // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. - return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() && - VT2.isInteger() && VT1.getSizeInBits() <= 32); -} - -// isLegalAddressingMode - Return true if the addressing mode represented -/// by AM is legal for this target, for a load/store of the specified type. -bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // AArch64 has five basic addressing modes: - // reg - // reg + 9-bit signed offset - // reg + SIZE_IN_BYTES * 12-bit unsigned offset - // reg1 + reg2 - // reg + SIZE_IN_BYTES * reg - - // No global is ever allowed as a base. - if (AM.BaseGV) - return false; - - // No reg+reg+imm addressing. - if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) - return false; - - // check reg + imm case: - // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 - uint64_t NumBytes = 0; - if (Ty->isSized()) { - uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty); - NumBytes = NumBits / 8; - if (!isPowerOf2_64(NumBits)) - NumBytes = 0; - } - - if (!AM.Scale) { - int64_t Offset = AM.BaseOffs; - - // 9-bit signed offset - if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1) - return true; - - // 12-bit unsigned offset - unsigned shift = Log2_64(NumBytes); - if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && - // Must be a multiple of NumBytes (NumBytes is a power of 2) - (Offset >> shift) << shift == Offset) - return true; - return false; - } - if (!AM.Scale || AM.Scale == 1 || - (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes)) - return true; - return false; -} - -int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { - // Scaling factors are not free at all. - // Operands | Rt Latency - // ------------------------------------------- - // Rt, [Xn, Xm] | 4 - // ------------------------------------------- - // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 - // Rt, [Xn, Wm, <extend> #imm] | - if (isLegalAddressingMode(AM, Ty)) - // Scale represents reg2 * scale, thus account for 1 if - // it is not equal to 0 or 1. - return AM.Scale != 0 && AM.Scale != 1; - return -1; -} - -/// getMaximalGlobalOffset - Returns the maximal possible offset which can -/// be used for loads / stores from the global. -unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { - return 4095; -} - diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h deleted file mode 100644 index 070db94808f..00000000000 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ /dev/null @@ -1,410 +0,0 @@ -//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that AArch64 uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H -#define LLVM_TARGET_AARCH64_ISELLOWERING_H - -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/Target/TargetLowering.h" - -namespace llvm { -namespace AArch64ISD { - enum NodeType { - // Start the numbering from where ISD NodeType finishes. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // This is a conditional branch which also notes the flag needed - // (eq/sgt/...). A64 puts this information on the branches rather than - // compares as LLVM does. - BR_CC, - - // A node to be selected to an actual call operation: either BL or BLR in - // the absence of tail calls. - Call, - - // Indicates a floating-point immediate which fits into the format required - // by the FMOV instructions. First (and only) operand is the 8-bit encoded - // value of that immediate. - FPMOV, - - // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS - // and an LSB. - EXTR, - - // Wraps a load from the GOT, which should always be performed with a 64-bit - // load instruction. This prevents the DAG combiner folding a truncate to - // form a smaller memory access. - GOTLoad, - - // Performs a bitfield insert. Arguments are: the value being inserted into; - // the value being inserted; least significant bit changed; width of the - // field. - BFI, - - // Simply a convenient node inserted during ISelLowering to represent - // procedure return. Will almost certainly be selected to "RET". - Ret, - - /// Extracts a field of contiguous bits from the source and sign extends - /// them into a single register. Arguments are: source; immr; imms. Note - /// these are pre-encoded since DAG matching can't cope with combining LSB - /// and Width into these values itself. - SBFX, - - /// This is an A64-ification of the standard LLVM SELECT_CC operation. The - /// main difference is that it only has the values and an A64 condition, - /// which will be produced by a setcc instruction. - SELECT_CC, - - /// This serves most of the functions of the LLVM SETCC instruction, for two - /// purposes. First, it prevents optimisations from fiddling with the - /// compare after we've moved the CondCode information onto the SELECT_CC or - /// BR_CC instructions. Second, it gives a legal instruction for the actual - /// comparison. - /// - /// It keeps a record of the condition flags asked for because certain - /// instructions are only valid for a subset of condition codes. - SETCC, - - // Designates a node which is a tail call: both a call and a return - // instruction as far as selction is concerned. It should be selected to an - // unconditional branch. Has the usual plethora of call operands, but: 1st - // is callee, 2nd is stack adjustment required immediately before branch. - TC_RETURN, - - // Designates a call used to support the TLS descriptor ABI. The call itself - // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall - // var") must be attached somehow during code generation. It takes two - // operands: the callee and the symbol to be relocated against. - TLSDESCCALL, - - // Leaf node which will be lowered to an appropriate MRS to obtain the - // thread pointer: TPIDR_EL0. - THREAD_POINTER, - - /// Extracts a field of contiguous bits from the source and zero extends - /// them into a single register. Arguments are: source; immr; imms. Note - /// these are pre-encoded since DAG matching can't cope with combining LSB - /// and Width into these values itself. - UBFX, - - // Wraps an address which the ISelLowering phase has decided should be - // created using the large memory model style: i.e. a sequence of four - // movz/movk instructions. - WrapperLarge, - - // Wraps an address which the ISelLowering phase has decided should be - // created using the small memory model style: i.e. adrp/add or - // adrp/mem-op. This exists to prevent bare TargetAddresses which may never - // get selected. - WrapperSmall, - - // Vector move immediate - NEON_MOVIMM, - - // Vector Move Inverted Immediate - NEON_MVNIMM, - - // Vector FP move immediate - NEON_FMOVIMM, - - // Vector permute - NEON_UZP1, - NEON_UZP2, - NEON_ZIP1, - NEON_ZIP2, - NEON_TRN1, - NEON_TRN2, - - // Vector Element reverse - NEON_REV64, - NEON_REV32, - NEON_REV16, - - // Vector compare - NEON_CMP, - - // Vector compare zero - NEON_CMPZ, - - // Vector compare bitwise test - NEON_TST, - - // Vector saturating shift - NEON_QSHLs, - NEON_QSHLu, - - // Vector dup - NEON_VDUP, - - // Vector dup by lane - NEON_VDUPLANE, - - // Vector extract - NEON_VEXTRACT, - - // NEON duplicate lane loads - NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, - NEON_LD3DUP, - NEON_LD4DUP, - - // NEON loads with post-increment base updates: - NEON_LD1_UPD, - NEON_LD2_UPD, - NEON_LD3_UPD, - NEON_LD4_UPD, - NEON_LD1x2_UPD, - NEON_LD1x3_UPD, - NEON_LD1x4_UPD, - - // NEON stores with post-increment base updates: - NEON_ST1_UPD, - NEON_ST2_UPD, - NEON_ST3_UPD, - NEON_ST4_UPD, - NEON_ST1x2_UPD, - NEON_ST1x3_UPD, - NEON_ST1x4_UPD, - - // NEON duplicate lane loads with post-increment base updates: - NEON_LD2DUP_UPD, - NEON_LD3DUP_UPD, - NEON_LD4DUP_UPD, - - // NEON lane loads with post-increment base updates: - NEON_LD2LN_UPD, - NEON_LD3LN_UPD, - NEON_LD4LN_UPD, - - // NEON lane store with post-increment base updates: - NEON_ST2LN_UPD, - NEON_ST3LN_UPD, - NEON_ST4LN_UPD - }; -} - - -class AArch64Subtarget; -class AArch64TargetMachine; - -class AArch64TargetLowering : public TargetLowering { -public: - explicit AArch64TargetLowering(AArch64TargetMachine &TM); - - const char *getTargetNodeName(unsigned Opcode) const override; - - CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const; - - SDValue LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const override; - - SDValue LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const override; - - unsigned getByValTypeAlignment(Type *Ty) const override; - - SDValue LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const override; - - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; - - bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1, - const int *Mask, SDValue &Res) const; - - bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0, - SDValue &V1, int *Mask) const; - - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const AArch64Subtarget *ST) const; - - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; - - void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, - SDValue &Chain) const; - - /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Targets which want to do tail call - /// optimization should implement this function. - bool IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool IsVarArg, - bool IsCalleeStructRet, - bool IsCallerStructRet, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SelectionDAG& DAG) const; - - /// Finds the incoming stack arguments which overlap the given fixed stack - /// object and incorporates their load into the current chain. This prevents - /// an upcoming store from clobbering the stack argument before it's used. - SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, - MachineFrameInfo *MFI, int ClobberedFI) const; - - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - - bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; - - bool IsTailCallConvention(CallingConv::ID CallCC) const; - - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - - bool isLegalICmpImmediate(int64_t Val) const override; - - /// \brief Return true if the addressing mode represented by AM is legal for - /// this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; - - /// \brief Return the cost of the scaling factor used in the addressing - /// mode represented by AM for this target, for a load/store - /// of the specified type. - /// If the AM is supported, the return value must be >= 0. - /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; - - bool isTruncateFree(Type *Ty1, Type *Ty2) const override; - bool isTruncateFree(EVT VT1, EVT VT2) const override; - - bool isZExtFree(Type *Ty1, Type *Ty2) const override; - bool isZExtFree(EVT VT1, EVT VT2) const override; - bool isZExtFree(SDValue Val, EVT VT2) const override; - - SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const; - - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const override; - - MachineBasicBlock * - emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Size, unsigned Opcode) const; - - MachineBasicBlock * - emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned CmpOp, - A64CC::CondCodes Cond) const; - MachineBasicBlock * - emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size) const; - - MachineBasicBlock * - EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const; - - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const; - SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - - SDValue PerformDAGCombine(SDNode *N,DAGCombinerInfo &DCI) const override; - - unsigned getRegisterByName(const char* RegName, EVT VT) const override; - - /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster - /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be - /// expanded to FMAs when this method returns true, otherwise fmuladd is - /// expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; - - /// allowsUnalignedMemoryAccesses - Returns true if the target allows - /// unaligned memory accesses of the specified type. Returns whether it - /// is "fast" by reference in the second argument. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - bool *Fast) const override; - - ConstraintType - getConstraintType(const std::string &Constraint) const override; - - ConstraintWeight - getSingleConstraintMatchWeight(AsmOperandInfo &Info, - const char *Constraint) const override; - void LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const override; - - std::pair<unsigned, const TargetRegisterClass*> - getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const override; - - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - unsigned Intrinsic) const override; - - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - -protected: - std::pair<const TargetRegisterClass*, uint8_t> - findRepresentativeClass(MVT VT) const override; - -private: - const InstrItineraryData *Itins; - - const AArch64Subtarget *getSubtarget() const { - return &getTargetMachine().getSubtarget<AArch64Subtarget>(); - } -}; -enum NeonModImmType { - Neon_Mov_Imm, - Neon_Mvn_Imm -}; - -extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement, - bool &usesOnlyOneValue, bool &hasDominantValue, - bool &isConstant, bool &isUNDEF); -} // namespace llvm - -#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td deleted file mode 100644 index 4cc3813203c..00000000000 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ /dev/null @@ -1,1487 +0,0 @@ -//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This file describes AArch64 instruction formats, down to the level of the -// instruction's overall class. -//===----------------------------------------------------------------------===// - - -//===----------------------------------------------------------------------===// -// A64 Instruction Format Definitions. -//===----------------------------------------------------------------------===// - -// A64 is currently the only instruction set supported by the AArch64 -// architecture. -class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns, - InstrItinClass itin> - : Instruction { - // All A64 instructions are 32-bit. This field will be filled in - // gradually going down the hierarchy. - field bits<32> Inst; - - field bits<32> Unpredictable = 0; - // SoftFail is the generic name for this field, but we alias it so - // as to make it more obvious what it means in ARM-land. - field bits<32> SoftFail = Unpredictable; - - // LLVM-level model of the AArch64/A64 distinction. - let Namespace = "AArch64"; - let DecoderNamespace = "A64"; - let Size = 4; - - // Set the templated fields - let OutOperandList = outs; - let InOperandList = ins; - let AsmString = asmstr; - let Pattern = patterns; - let Itinerary = itin; -} - -class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction { - let Namespace = "AArch64"; - - let OutOperandList = outs; - let InOperandList= ins; - let Pattern = patterns; - let isCodeGenOnly = 1; - let isPseudo = 1; -} - -// Represents a pseudo-instruction that represents a single A64 instruction for -// whatever reason, the eventual result will be a 32-bit real instruction. -class A64PseudoInst<dag outs, dag ins, list<dag> patterns> - : PseudoInst<outs, ins, patterns> { - let Size = 4; -} - -// As above, this will be a single A64 instruction, but we can actually give the -// expansion in TableGen. -class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result> - : A64PseudoInst<outs, ins, patterns>, - PseudoInstExpansion<Result>; - - -// First, some common cross-hierarchy register formats. - -class A64InstRd<dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - bits<5> Rd; - - let Inst{4-0} = Rd; -} - -class A64InstRt<dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - bits<5> Rt; - - let Inst{4-0} = Rt; -} - - -class A64InstRdn<dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRd<outs, ins, asmstr, patterns, itin> { - // Inherit rdt - bits<5> Rn; - - let Inst{9-5} = Rn; -} - -class A64InstRtn<dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRt<outs, ins, asmstr, patterns, itin> { - // Inherit rdt - bits<5> Rn; - - let Inst{9-5} = Rn; -} - -// Instructions taking Rt,Rt2,Rn -class A64InstRtt2n<dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - bits<5> Rt2; - - let Inst{14-10} = Rt2; -} - -class A64InstRdnm<dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<5> Rm; - - let Inst{20-16} = Rm; -} - -class A64InstRtnm<dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - bits<5> Rm; - - let Inst{20-16} = Rm; -} - -//===----------------------------------------------------------------------===// -// -// Actual A64 Instruction Formats -// - -// Format for Add-subtract (extended register) instructions. -class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option, - dag outs, dag ins, string asmstr, list<dag> patterns, - InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - bits<3> Imm3; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b01011; - let Inst{23-22} = opt; - let Inst{21} = 0b1; - // Rm inherited in 20-16 - let Inst{15-13} = option; - let Inst{12-10} = Imm3; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - -// Format for Add-subtract (immediate) instructions. -class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<12> Imm12; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b10001; - let Inst{23-22} = shift; - let Inst{21-10} = Imm12; -} - -// Format for Add-subtract (shifted register) instructions. -class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift, - dag outs, dag ins, string asmstr, list<dag> patterns, - InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - bits<6> Imm6; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b01011; - let Inst{23-22} = shift; - let Inst{21} = 0b0; - // Rm inherited in 20-16 - let Inst{15-10} = Imm6; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - -// Format for Add-subtract (with carry) instructions. -class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2, - dag outs, dag ins, string asmstr, list<dag> patterns, - InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-21} = 0b11010000; - // Rm inherited in 20-16 - let Inst{15-10} = opcode2; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - - -// Format for Bitfield instructions -class A64I_bitfield<bit sf, bits<2> opc, bit n, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<6> ImmR; - bits<6> ImmS; - - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{22} = n; - let Inst{21-16} = ImmR; - let Inst{15-10} = ImmS; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for compare and branch (immediate) instructions. -class A64I_cmpbr<bit sf, bit op, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRt<outs, ins, asmstr, patterns, itin> { - bits<19> Label; - - let Inst{31} = sf; - let Inst{30-25} = 0b011010; - let Inst{24} = op; - let Inst{23-5} = Label; - // Inherit Rt in 4-0 -} - -// Format for conditional branch (immediate) instructions. -class A64I_condbr<bit o1, bit o0, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - bits<19> Label; - bits<4> Cond; - - let Inst{31-25} = 0b0101010; - let Inst{24} = o1; - let Inst{23-5} = Label; - let Inst{4} = o0; - let Inst{3-0} = Cond; -} - -// Format for conditional compare (immediate) instructions. -class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - bits<5> Rn; - bits<5> UImm5; - bits<4> NZCVImm; - bits<4> Cond; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010010; - let Inst{20-16} = UImm5; - let Inst{15-12} = Cond; - let Inst{11} = 0b1; - let Inst{10} = o2; - let Inst{9-5} = Rn; - let Inst{4} = o3; - let Inst{3-0} = NZCVImm; -} - -// Format for conditional compare (register) instructions. -class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - bits<5> Rn; - bits<5> Rm; - bits<4> NZCVImm; - bits<4> Cond; - - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010010; - let Inst{20-16} = Rm; - let Inst{15-12} = Cond; - let Inst{11} = 0b0; - let Inst{10} = o2; - let Inst{9-5} = Rn; - let Inst{4} = o3; - let Inst{3-0} = NZCVImm; -} - -// Format for conditional select instructions. -class A64I_condsel<bit sf, bit op, bit s, bits<2> op2, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - bits<4> Cond; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010100; - // Inherit Rm in 20-16 - let Inst{15-12} = Cond; - let Inst{11-10} = op2; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for data processing (1 source) instructions -class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode, - string asmstr, dag outs, dag ins, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - let Inst{31} = sf; - let Inst{30} = 0b1; - let Inst{29} = S; - let Inst{28-21} = 0b11010110; - let Inst{20-16} = opcode2; - let Inst{15-10} = opcode; -} - -// Format for data processing (2 source) instructions -class A64I_dp_2src<bit sf, bits<6> opcode, bit S, - string asmstr, dag outs, dag ins, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = S; - let Inst{28-21} = 0b11010110; - let Inst{15-10} = opcode; -} - -// Format for data-processing (3 source) instructions - -class A64I_dp3<bit sf, bits<6> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = sf; - let Inst{30-29} = opcode{5-4}; - let Inst{28-24} = 0b11011; - let Inst{23-21} = opcode{3-1}; - // Inherits Rm in 20-16 - let Inst{15} = opcode{0}; - // {14-10} mostly Ra, but unspecified for SMULH/UMULH - // Inherits Rn in 9-5 - // Inherits Rd in 4-0 -} - -// Format for exception generation instructions -class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - bits<16> UImm16; - - let Inst{31-24} = 0b11010100; - let Inst{23-21} = opc; - let Inst{20-5} = UImm16; - let Inst{4-2} = op2; - let Inst{1-0} = ll; -} - -// Format for extract (immediate) instructions -class A64I_extract<bit sf, bits<3> op, bit n, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - bits<6> LSB; - - let Inst{31} = sf; - let Inst{30-29} = op{2-1}; - let Inst{28-23} = 0b100111; - let Inst{22} = n; - let Inst{21} = op{0}; - // Inherits Rm in bits 20-16 - let Inst{15-10} = LSB; - // Inherits Rn in 9-5 - // Inherits Rd in 4-0 -} - -let Predicates = [HasFPARMv8] in { - -// Format for floating-point compare instructions. -class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-14} = op; - let Inst{13-10} = 0b1000; - let Inst{9-5} = Rn; - let Inst{4-0} = opcode2; -} - -// Format for floating-point conditional compare instructions. -class A64I_fpccmp<bit m, bit s, bits<2> type, bit op, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<5> Rn; - bits<5> Rm; - bits<4> NZCVImm; - bits<4> Cond; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-12} = Cond; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4} = op; - let Inst{3-0} = NZCVImm; -} - -// Format for floating-point conditional select instructions. -class A64I_fpcondsel<bit m, bit s, bits<2> type, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - bits<4> Cond; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = Cond; - let Inst{11-10} = 0b11; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - - -// Format for floating-point data-processing (1 source) instructions. -class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-15} = opcode; - let Inst{14-10} = 0b10000; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point data-processing (2 sources) instructions. -class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point data-processing (3 sources) instructions. -class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - bits<5> Ra; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11111; - let Inst{23-22} = type; - let Inst{21} = o1; - // Inherit Rm in 20-16 - let Inst{15} = o0; - let Inst{14-10} = Ra; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point <-> fixed-point conversion instructions. -class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<6> Scale; - - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b0; - let Inst{20-19} = mode; - let Inst{18-16} = opcode; - let Inst{15-10} = Scale; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point <-> integer conversion instructions. -class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - - -// Format for floating-point immediate instructions. -class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRd<outs, ins, asmstr, patterns, itin> { - bits<8> Imm8; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-13} = Imm8; - let Inst{12-10} = 0b100; - let Inst{9-5} = imm5; - // Inherit Rd in 4-0 -} - -} - -// Format for load-register (literal) instructions. -class A64I_LDRlit<bits<2> opc, bit v, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRt<outs, ins, asmstr, patterns, itin> { - bits<19> Imm19; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-5} = Imm19; - // Inherit Rt in 4-0 -} - -// Format for load-store exclusive instructions. -class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list <dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - let Inst{31-30} = size; - let Inst{29-24} = 0b001000; - let Inst{23} = o2; - let Inst{22} = L; - let Inst{21} = o1; - let Inst{15} = o0; -} - -class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list <dag> patterns, InstrItinClass itin>: - A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ - bits<5> Rt2; - let Inst{14-10} = Rt2; -} - -class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list <dag> patterns, InstrItinClass itin>: - A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ - bits<5> Rs; - let Inst{20-16} = Rs; -} - -class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list <dag> patterns, InstrItinClass itin>: - A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ - bits<5> Rt2; - let Inst{14-10} = Rt2; -} - -// Format for load-store register (immediate post-indexed) instructions -class A64I_LSpostind<bits<2> size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - bits<9> SImm9; - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b01; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for load-store register (immediate pre-indexed) instructions -class A64I_LSpreind<bits<2> size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - bits<9> SImm9; - - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b11; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for load-store register (unprivileged) instructions -class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - bits<9> SImm9; - - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for load-store (unscaled immediate) instructions. -class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - bits<9> SImm9; - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - - -// Format for load-store (unsigned immediate) instructions. -class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - bits<12> UImm12; - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b01; - let Inst{23-22} = opc; - let Inst{21-10} = UImm12; -} - -// Format for load-store register (register offset) instructions. -class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> { - bits<5> Rm; - - // Complex operand selection needed for these instructions, so they - // need an "addr" field for encoding/decoding to be generated. - bits<3> Ext; - // OptionHi = Ext{2-1} - // S = Ext{0} - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-14} = Ext{2-1}; - let Inst{13} = optionlo; - let Inst{12} = Ext{0}; - let Inst{11-10} = 0b10; - // Inherits Rn in 9-5 - // Inherits Rt in 4-0 - - let AddedComplexity = 50; -} - -// Format for Load-store register pair (offset) instructions -class A64I_LSPoffset<bits<2> opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b010; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Load-store register pair (post-indexed) instructions -class A64I_LSPpostind<bits<2> opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b001; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Load-store register pair (pre-indexed) instructions -class A64I_LSPpreind<bits<2> opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b011; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Load-store non-temporal register pair (offset) instructions -class A64I_LSPnontemp<bits<2> opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b000; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Logical (immediate) instructions -class A64I_logicalimm<bit sf, bits<2> opc, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bit N; - bits<6> ImmR; - bits<6> ImmS; - - // N, ImmR and ImmS have no separate existence in any assembly syntax (or for - // selection), so we'll combine them into a single field here. - bits<13> Imm; - // N = Imm{12}; - // ImmR = Imm{11-6}; - // ImmS = Imm{5-0}; - - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100100; - let Inst{22} = Imm{12}; - let Inst{21-16} = Imm{11-6}; - let Inst{15-10} = Imm{5-0}; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - -// Format for Logical (shifted register) instructions -class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - bits<6> Imm6; - - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-24} = 0b01010; - let Inst{23-22} = shift; - let Inst{21} = N; - // Rm inherited - let Inst{15-10} = Imm6; - // Rn inherited - // Rd inherited -} - -// Format for Move wide (immediate) -class A64I_movw<bit sf, bits<2> opc, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRd<outs, ins, asmstr, patterns, itin> { - bits<16> UImm16; - bits<2> Shift; // Called "hw" officially - - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = Shift; - let Inst{20-5} = UImm16; - // Inherits Rd in 4-0 -} - -// Format for PC-relative addressing instructions, ADR and ADRP. -class A64I_PCADR<bit op, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRd<outs, ins, asmstr, patterns, itin> { - bits<21> Label; - - let Inst{31} = op; - let Inst{30-29} = Label{1-0}; - let Inst{28-24} = 0b10000; - let Inst{23-5} = Label{20-2}; -} - -// Format for system instructions -class A64I_system<bit l, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - bits<2> Op0; - bits<3> Op1; - bits<4> CRn; - bits<4> CRm; - bits<3> Op2; - bits<5> Rt; - - let Inst{31-22} = 0b1101010100; - let Inst{21} = l; - let Inst{20-19} = Op0; - let Inst{18-16} = Op1; - let Inst{15-12} = CRn; - let Inst{11-8} = CRm; - let Inst{7-5} = Op2; - let Inst{4-0} = Rt; - - // These instructions can do horrible things. - let hasSideEffects = 1; -} - -// Format for unconditional branch (immediate) instructions -class A64I_Bimm<bit op, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<26> Label; - - let Inst{31} = op; - let Inst{30-26} = 0b00101; - let Inst{25-0} = Label; -} - -// Format for Test & branch (immediate) instructions -class A64I_TBimm<bit op, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRt<outs, ins, asmstr, patterns, itin> { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<6> Imm; - bits<14> Label; - - let Inst{31} = Imm{5}; - let Inst{30-25} = 0b011011; - let Inst{24} = op; - let Inst{23-19} = Imm{4-0}; - let Inst{18-5} = Label; - // Inherit Rt in 4-0 -} - -// Format for Unconditional branch (register) instructions, including -// RET. Shares no fields with instructions further up the hierarchy -// so top-level. -class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64Inst<outs, ins, asmstr, patterns, itin> { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<5> Rn; - - let Inst{31-25} = 0b1101011; - let Inst{24-21} = opc; - let Inst{20-16} = op2; - let Inst{15-10} = op3; - let Inst{9-5} = Rn; - let Inst{4-0} = op4; -} - - -//===----------------------------------------------------------------------===// -// -// Neon Instruction Format Definitions. -// - -let Predicates = [HasNEON] in { - -class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1> - : InstAlias<Asm, Result, Emit> { -} - -// Format AdvSIMD bitwise extract -class NeonI_BitExtract<bit q, bits<2> op2, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b101110; - let Inst{23-22} = op2; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - // imm4 in 14-11 - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD perm -class NeonI_Perm<bit q, bits<2> size, bits<3> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b001110; - let Inst{23-22} = size; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD table lookup -class NeonI_TBL<bit q, bits<2> op2, bits<2> len, bit op, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b001110; - let Inst{23-22} = op2; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 3 vector registers with same vector type -class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 3 vector registers with different vector type -class NeonI_3VDiff<bit q, bit u, bits<2> size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11} = 0b0; - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD two registers and an element -class NeonI_2VElem<bit q, bit u, bits<2> size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01111; - let Inst{23-22} = size; - // l in Inst{21} - // m in Inst{20} - // Inherit Rm in 19-16 - let Inst{15-12} = opcode; - // h in Inst{11} - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 1 vector register with modified immediate -class NeonI_1VModImm<bit q, bit op, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRd<outs,ins, asmstr, patterns, itin> { - bits<8> Imm; - bits<4> cmode; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = op; - let Inst{28-19} = 0b0111100000; - let Inst{15-12} = cmode; - let Inst{11} = 0b0; // o2 - let Inst{10} = 1; - // Inherit Rd in 4-0 - let Inst{18-16} = Imm{7-5}; // imm a:b:c - let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h -} - -// Format AdvSIMD 3 scalar registers with same type - -class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - - -// Format AdvSIMD 2 vector registers miscellaneous -class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 2 vector 1 immediate shift -class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<7> Imm; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-23} = 0b011110; - let Inst{22-16} = Imm; - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD duplicate and insert -class NeonI_copy<bit q, bit op, bits<4> imm4, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<5> Imm5; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{20-16} = Imm5; - let Inst{15} = 0b0; - let Inst{14-11} = imm4; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} -// Format AdvSIMD insert from element to vector -class NeonI_insert<bit q, bit op, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<5> Imm5; - bits<4> Imm4; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{20-16} = Imm5; - let Inst{15} = 0b0; - let Inst{14-11} = Imm4; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar pairwise -class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 2 vector across lanes -class NeonI_2VAcross<bit q, bit u, bits<2> size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar two registers miscellaneous -class NeonI_Scalar2SameMisc<bit u, bits<2> size, bits<5> opcode, dag outs, dag ins, - string asmstr, list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD vector load/store multiple N-element structure -class NeonI_LdStMult<bit q, bit l, bits<4> opcode, bits<2> size, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011000; - let Inst{22} = l; - let Inst{21-16} = 0b000000; - let Inst{15-12} = opcode; - let Inst{11-10} = size; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load/store multiple N-element structure (post-index) -class NeonI_LdStMult_Post<bit q, bit l, bits<4> opcode, bits<2> size, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtnm<outs, ins, asmstr, patterns, itin> -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011001; - let Inst{22} = l; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11-10} = size; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load Single N-element structure to all lanes -class NeonI_LdOne_Dup<bit q, bit r, bits<3> opcode, bits<2> size, dag outs, - dag ins, string asmstr, list<dag> patterns, - InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011010; - let Inst{22} = 0b1; - let Inst{21} = r; - let Inst{20-16} = 0b00000; - let Inst{15-13} = opcode; - let Inst{12} = 0b0; - let Inst{11-10} = size; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load/store Single N-element structure to/from one lane -class NeonI_LdStOne_Lane<bit l, bit r, bits<2> op2_1, bit op0, dag outs, - dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtn<outs, ins, asmstr, patterns, itin> -{ - bits<4> lane; - let Inst{31} = 0b0; - let Inst{29-23} = 0b0011010; - let Inst{22} = l; - let Inst{21} = r; - let Inst{20-16} = 0b00000; - let Inst{15-14} = op2_1; - let Inst{13} = op0; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD post-index vector load Single N-element structure to all lanes -class NeonI_LdOne_Dup_Post<bit q, bit r, bits<3> opcode, bits<2> size, dag outs, - dag ins, string asmstr, list<dag> patterns, - InstrItinClass itin> - : A64InstRtnm<outs, ins, asmstr, patterns, itin> -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011011; - let Inst{22} = 0b1; - let Inst{21} = r; - // Inherit Rm in 20-16 - let Inst{15-13} = opcode; - let Inst{12} = 0b0; - let Inst{11-10} = size; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD post-index vector load/store Single N-element structure -// to/from one lane -class NeonI_LdStOne_Lane_Post<bit l, bit r, bits<2> op2_1, bit op0, dag outs, - dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRtnm<outs, ins, asmstr, patterns, itin> -{ - bits<4> lane; - let Inst{31} = 0b0; - let Inst{29-23} = 0b0011011; - let Inst{22} = l; - let Inst{21} = r; - // Inherit Rm in 20-16 - let Inst{15-14} = op2_1; - let Inst{13} = op0; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD 3 scalar registers with different type - -class NeonI_Scalar3Diff<bit u, bits<2> size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31-30} = 0b01; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar shift by immediate - -class NeonI_ScalarShiftImm<bit u, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - bits<4> Imm4; - bits<3> Imm3; - let Inst{31-30} = 0b01; - let Inst{29} = u; - let Inst{28-23} = 0b111110; - let Inst{22-19} = Imm4; - let Inst{18-16} = Imm3; - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD crypto AES -class NeonI_Crypto_AES<bits<2> size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - let Inst{31-24} = 0b01001110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10100; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD crypto SHA -class NeonI_Crypto_SHA<bits<2> size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdn<outs, ins, asmstr, patterns, itin> { - let Inst{31-24} = 0b01011110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10100; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD crypto 3V SHA -class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode, - dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> { - let Inst{31-24} = 0b01011110; - let Inst{23-22} = size; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-12} = opcode; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar x indexed element -class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo, - bits<4> opcode, dag outs, dag ins, - string asmstr, list<dag> patterns, - InstrItinClass itin> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> -{ - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11111; - let Inst{23} = szhi; - let Inst{22} = szlo; - // l in Inst{21} - // m in Instr{20} - // Inherit Rm in 19-16 - let Inst{15-12} = opcode; - // h in Inst{11} - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} -// Format AdvSIMD scalar copy - insert from element to scalar -class NeonI_ScalarCopy<dag outs, dag ins, string asmstr, - list<dag> patterns, InstrItinClass itin> - : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> { - let Inst{28} = 0b1; -} -} - diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp deleted file mode 100644 index e2612abffa5..00000000000 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ /dev/null @@ -1,979 +0,0 @@ -//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include <algorithm> - -using namespace llvm; - -#define GET_INSTRINFO_CTOR_DTOR -#include "AArch64GenInstrInfo.inc" - -AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) - : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), - Subtarget(STI) {} - -void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - unsigned Opc = 0; - unsigned ZeroReg = 0; - if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) { - // E.g. ADD xDst, xsp, #0 (, lsl #0) - BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg) - .addReg(SrcReg) - .addImm(0); - return; - } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { - // E.g. ADD wDST, wsp, #0 (, lsl #0) - BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg) - .addReg(SrcReg) - .addImm(0); - return; - } else if (DestReg == AArch64::NZCV) { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - // E.g. MSR NZCV, xDST - BuildMI(MBB, I, DL, get(AArch64::MSRix)) - .addImm(A64SysReg::NZCV) - .addReg(SrcReg); - } else if (SrcReg == AArch64::NZCV) { - assert(AArch64::GPR64RegClass.contains(DestReg)); - // E.g. MRS xDST, NZCV - BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) - .addImm(A64SysReg::NZCV); - } else if (AArch64::GPR64RegClass.contains(DestReg)) { - if(AArch64::GPR64RegClass.contains(SrcReg)){ - Opc = AArch64::ORRxxx_lsl; - ZeroReg = AArch64::XZR; - } else{ - assert(AArch64::FPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::GPR32RegClass.contains(DestReg)) { - if(AArch64::GPR32RegClass.contains(SrcReg)){ - Opc = AArch64::ORRwww_lsl; - ZeroReg = AArch64::WZR; - } else{ - assert(AArch64::FPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR32RegClass.contains(DestReg)) { - if(AArch64::FPR32RegClass.contains(SrcReg)){ - BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) - .addReg(SrcReg); - return; - } - else { - assert(AArch64::GPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR64RegClass.contains(DestReg)) { - if(AArch64::FPR64RegClass.contains(SrcReg)){ - BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) - .addReg(SrcReg); - return; - } - else { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR128RegClass.contains(DestReg)) { - assert(AArch64::FPR128RegClass.contains(SrcReg)); - - // If NEON is enable, we use ORR to implement this copy. - // If NEON isn't available, emit STR and LDR to handle this. - if(getSubTarget().hasNEON()) { - BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg) - .addReg(SrcReg) - .addReg(SrcReg); - return; - } else { - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) - .addReg(SrcReg) - .addReg(AArch64::XSP) - .addImm(0x1ff & -16); - - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) - .addReg(AArch64::XSP, RegState::Define) - .addReg(AArch64::XSP) - .addImm(16); - return; - } - } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) { - // The copy of two FPR8 registers is implemented by the copy of two FPR32 - const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8, - &AArch64::FPR32RegClass); - unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) - .addReg(Src); - return; - } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) { - // The copy of two FPR16 registers is implemented by the copy of two FPR32 - const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16, - &AArch64::FPR32RegClass); - unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) - .addReg(Src); - return; - } else { - CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg); - return; - } - - // E.g. ORR xDst, xzr, xSrc, lsl #0 - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(ZeroReg) - .addReg(SrcReg) - .addImm(0); -} - -void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, - unsigned SrcReg) const { - unsigned SubRegs; - bool IsQRegs; - if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) { - SubRegs = 2; - IsQRegs = false; - } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) { - SubRegs = 3; - IsQRegs = false; - } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) { - SubRegs = 4; - IsQRegs = false; - } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) { - SubRegs = 2; - IsQRegs = true; - } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) { - SubRegs = 3; - IsQRegs = true; - } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) { - SubRegs = 4; - IsQRegs = true; - } else - llvm_unreachable("Unknown register class"); - - unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0; - int Spacing = 1; - const TargetRegisterInfo *TRI = &getRegisterInfo(); - // Copy register tuples backward when the first Dest reg overlaps - // with SrcReg. - if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { - BeginIdx = BeginIdx + (SubRegs - 1); - Spacing = -1; - } - - unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B; - for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); - assert(Dst && Src && "Bad sub-register"); - BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src) - .addReg(Src); - } - return; -} - -/// Does the Opcode represent a conditional branch that we can remove and re-add -/// at the end of a basic block? -static bool isCondBranch(unsigned Opc) { - return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx || - Opc == AArch64::CBNZw || Opc == AArch64::CBNZx || - Opc == AArch64::TBZwii || Opc == AArch64::TBZxii || - Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii; -} - -/// Takes apart a given conditional branch MachineInstr (see isCondBranch), -/// setting TBB to the destination basic block and populating the Cond vector -/// with data necessary to recreate the conditional branch at a later -/// date. First element will be the opcode, and subsequent ones define the -/// conditions being branched on in an instruction-specific manner. -static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB, - SmallVectorImpl<MachineOperand> &Cond) { - switch(I->getOpcode()) { - case AArch64::Bcc: - case AArch64::CBZw: - case AArch64::CBZx: - case AArch64::CBNZw: - case AArch64::CBNZx: - // These instructions just have one predicate operand in position 0 (either - // a condition code or a register being compared). - Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); - Cond.push_back(I->getOperand(0)); - TBB = I->getOperand(1).getMBB(); - return; - case AArch64::TBZwii: - case AArch64::TBZxii: - case AArch64::TBNZwii: - case AArch64::TBNZxii: - // These have two predicate operands: a register and a bit position. - Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); - Cond.push_back(I->getOperand(0)); - Cond.push_back(I->getOperand(1)); - TBB = I->getOperand(2).getMBB(); - return; - default: - llvm_unreachable("Unknown conditional branch to classify"); - } -} - - -bool -AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (LastOpc == AArch64::Bimm) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } - if (isCondBranch(LastOpc)) { - classifyCondBranch(LastInst, TBB, Cond); - return false; - } - return true; // Can't handle indirect branch. - } - - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && LastOpc == AArch64::Bimm) { - while (SecondLastOpc == AArch64::Bimm) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); - } - } - } - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a B and a Bcc, handle it. - if (LastOpc == AArch64::Bimm) { - if (SecondLastOpc == AArch64::Bcc) { - TBB = SecondLastInst->getOperand(1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); - Cond.push_back(SecondLastInst->getOperand(0)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } else if (isCondBranch(SecondLastOpc)) { - classifyCondBranch(SecondLastInst, TBB, Cond); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - } - - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } - - // Otherwise, can't handle this. - return true; -} - -bool AArch64InstrInfo::ReverseBranchCondition( - SmallVectorImpl<MachineOperand> &Cond) const { - switch (Cond[0].getImm()) { - case AArch64::Bcc: { - A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm()); - CC = A64InvertCondCode(CC); - Cond[1].setImm(CC); - return false; - } - case AArch64::CBZw: - Cond[0].setImm(AArch64::CBNZw); - return false; - case AArch64::CBZx: - Cond[0].setImm(AArch64::CBNZx); - return false; - case AArch64::CBNZw: - Cond[0].setImm(AArch64::CBZw); - return false; - case AArch64::CBNZx: - Cond[0].setImm(AArch64::CBZx); - return false; - case AArch64::TBZwii: - Cond[0].setImm(AArch64::TBNZwii); - return false; - case AArch64::TBZxii: - Cond[0].setImm(AArch64::TBNZxii); - return false; - case AArch64::TBNZwii: - Cond[0].setImm(AArch64::TBZwii); - return false; - case AArch64::TBNZxii: - Cond[0].setImm(AArch64::TBZxii); - return false; - default: - llvm_unreachable("Unknown branch type"); - } -} - - -unsigned -AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const { - if (!FBB && Cond.empty()) { - BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB); - return 1; - } else if (!FBB) { - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - for (int i = 1, e = Cond.size(); i != e; ++i) - MIB.addOperand(Cond[i]); - MIB.addMBB(TBB); - return 1; - } - - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - for (int i = 1, e = Cond.size(); i != e; ++i) - MIB.addOperand(Cond[i]); - MIB.addMBB(TBB); - - BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB); - return 2; -} - -unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode())) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) return 1; - --I; - if (!isCondBranch(I->getOpcode())) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; -} - -bool -AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const { - MachineInstr &MI = *MBBI; - MachineBasicBlock &MBB = *MI.getParent(); - - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - case AArch64::TLSDESC_BLRx: { - MachineInstr *NewMI = - BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL)) - .addOperand(MI.getOperand(1)); - MI.setDesc(get(AArch64::BLRx)); - - llvm::finalizeBundle(MBB, NewMI, *++MBBI); - return true; - } - default: - return false; - } - - return false; -} - -void -AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, - int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(MBBI); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FrameIdx); - - MachineMemOperand *MMO - = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, - MFI.getObjectSize(FrameIdx), - Align); - - unsigned StoreOp = 0; - if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { - switch(RC->getSize()) { - case 4: StoreOp = AArch64::LS32_STR; break; - case 8: StoreOp = AArch64::LS64_STR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) { - StoreOp = AArch64::LSFP8_STR; - } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) { - StoreOp = AArch64::LSFP16_STR; - } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || - RC->hasType(MVT::f128)) { - switch (RC->getSize()) { - case 4: StoreOp = AArch64::LSFP32_STR; break; - case 8: StoreOp = AArch64::LSFP64_STR; break; - case 16: StoreOp = AArch64::LSFP128_STR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else { // For a super register class has more than one sub registers - if (AArch64::DPairRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x2_8B; - else if (AArch64::DTripleRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x3_8B; - else if (AArch64::DQuadRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x4_8B; - else if (AArch64::QPairRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x2_16B; - else if (AArch64::QTripleRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x3_16B; - else if (AArch64::QQuadRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x4_16B; - else - llvm_unreachable("Unknown reg class"); - - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); - // Vector store has different operands from other store instructions. - NewMI.addFrameIndex(FrameIdx) - .addReg(SrcReg, getKillRegState(isKill)) - .addMemOperand(MMO); - return; - } - - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); - NewMI.addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FrameIdx) - .addImm(0) - .addMemOperand(MMO); - -} - -void -AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(MBBI); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FrameIdx); - - MachineMemOperand *MMO - = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FrameIdx), - Align); - - unsigned LoadOp = 0; - if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { - switch(RC->getSize()) { - case 4: LoadOp = AArch64::LS32_LDR; break; - case 8: LoadOp = AArch64::LS64_LDR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) { - LoadOp = AArch64::LSFP8_LDR; - } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) { - LoadOp = AArch64::LSFP16_LDR; - } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || - RC->hasType(MVT::f128)) { - switch (RC->getSize()) { - case 4: LoadOp = AArch64::LSFP32_LDR; break; - case 8: LoadOp = AArch64::LSFP64_LDR; break; - case 16: LoadOp = AArch64::LSFP128_LDR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else { // For a super register class has more than one sub registers - if (AArch64::DPairRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x2_8B; - else if (AArch64::DTripleRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x3_8B; - else if (AArch64::DQuadRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x4_8B; - else if (AArch64::QPairRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x2_16B; - else if (AArch64::QTripleRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x3_16B; - else if (AArch64::QQuadRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x4_16B; - else - llvm_unreachable("Unknown reg class"); - - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); - // Vector load has different operands from other load instructions. - NewMI.addFrameIndex(FrameIdx) - .addMemOperand(MMO); - return; - } - - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); - NewMI.addFrameIndex(FrameIdx) - .addImm(0) - .addMemOperand(MMO); -} - -unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const { - unsigned Limit = (1 << 16) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!I->getOperand(i).isFI()) continue; - - // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff - // is the largest offset guaranteed to fit in the immediate offset. - if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) { - Limit = std::min(Limit, 0xfffu); - break; - } - - int AccessScale, MinOffset, MaxOffset; - getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset); - Limit = std::min(Limit, static_cast<unsigned>(MaxOffset)); - - break; // At most one FI per instruction - } - } - } - - return Limit; -} -void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, - int &AccessScale, int &MinOffset, - int &MaxOffset) const { - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unknown load/store kind"); - case TargetOpcode::DBG_VALUE: - AccessScale = 1; - MinOffset = INT_MIN; - MaxOffset = INT_MAX; - return; - case AArch64::LS8_LDR: case AArch64::LS8_STR: - case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: - case AArch64::LDRSBw: - case AArch64::LDRSBx: - AccessScale = 1; - MinOffset = 0; - MaxOffset = 0xfff; - return; - case AArch64::LS16_LDR: case AArch64::LS16_STR: - case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: - case AArch64::LDRSHw: - case AArch64::LDRSHx: - AccessScale = 2; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LS32_LDR: case AArch64::LS32_STR: - case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: - case AArch64::LDRSWx: - case AArch64::LDPSWx: - AccessScale = 4; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LS64_LDR: case AArch64::LS64_STR: - case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: - case AArch64::PRFM: - AccessScale = 8; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: - AccessScale = 16; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: - case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: - AccessScale = 4; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: - case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: - AccessScale = 8; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: - AccessScale = 16; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LD1x2_8B: case AArch64::ST1x2_8B: - AccessScale = 16; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x3_8B: case AArch64::ST1x3_8B: - AccessScale = 24; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x4_8B: case AArch64::ST1x4_8B: - case AArch64::LD1x2_16B: case AArch64::ST1x2_16B: - AccessScale = 32; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x3_16B: case AArch64::ST1x3_16B: - AccessScale = 48; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x4_16B: case AArch64::ST1x4_16B: - AccessScale = 64; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - } -} - -unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { - const MCInstrDesc &MCID = MI.getDesc(); - const MachineBasicBlock &MBB = *MI.getParent(); - const MachineFunction &MF = *MBB.getParent(); - const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); - - if (MCID.getSize()) - return MCID.getSize(); - - if (MI.getOpcode() == AArch64::INLINEASM) - return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); - - switch (MI.getOpcode()) { - case TargetOpcode::BUNDLE: - return getInstBundleLength(MI); - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: - case TargetOpcode::DBG_VALUE: - case AArch64::TLSDESCCALL: - return 0; - default: - llvm_unreachable("Unknown instruction class"); - } -} - -unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { - unsigned Size = 0; - MachineBasicBlock::const_instr_iterator I = MI; - MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); - while (++I != E && I->isInsideBundle()) { - assert(!I->isBundle() && "No nested bundle!"); - Size += getInstSizeInBytes(*I); - } - return Size; -} - -bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const AArch64InstrInfo &TII) { - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - - MFI.getObjectOffset(FrameRegIdx); - llvm_unreachable("Unimplemented rewriteFrameIndex"); -} - -void llvm::emitRegUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, - int64_t NumBytes, MachineInstr::MIFlag MIFlags) { - if (NumBytes == 0 && DstReg == SrcReg) - return; - else if (abs64(NumBytes) & ~0xffffff) { - // Generically, we have to materialize the offset into a temporary register - // and subtract it. There are a couple of ways this could be done, for now - // we'll use a movz/movk or movn/movk sequence. - uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes)); - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) - .addImm(0xffff & Bits).addImm(0) - .setMIFlags(MIFlags); - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(1) - .setMIFlags(MIFlags); - } - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(2) - .setMIFlags(MIFlags); - } - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(3) - .setMIFlags(MIFlags); - } - - // ADD DST, SRC, xTMP (, lsl #0) - unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; - BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addReg(ScratchReg, RegState::Kill) - .addImm(0) - .setMIFlag(MIFlags); - return; - } - - // Now we know that the adjustment can be done in at most two add/sub - // (immediate) instructions, which is always more efficient than a - // literal-pool load, or even a hypothetical movz/movk/add sequence - - // Decide whether we're doing addition or subtraction - unsigned LowOp, HighOp; - if (NumBytes >= 0) { - LowOp = AArch64::ADDxxi_lsl0_s; - HighOp = AArch64::ADDxxi_lsl12_s; - } else { - LowOp = AArch64::SUBxxi_lsl0_s; - HighOp = AArch64::SUBxxi_lsl12_s; - NumBytes = abs64(NumBytes); - } - - // If we're here, at the very least a move needs to be produced, which just - // happens to be materializable by an ADD. - if ((NumBytes & 0xfff) || NumBytes == 0) { - BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(NumBytes & 0xfff) - .setMIFlag(MIFlags); - - // Next update should use the register we've just defined. - SrcReg = DstReg; - } - - if (NumBytes & 0xfff000) { - BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(NumBytes >> 12) - .setMIFlag(MIFlags); - } -} - -void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned ScratchReg, int64_t NumBytes, - MachineInstr::MIFlag MIFlags) { - emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16, - NumBytes, MIFlags); -} - - -namespace { - struct LDTLSCleanup : public MachineFunctionPass { - static char ID; - LDTLSCleanup() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &MF) override { - AArch64MachineFunctionInfo* MFI - = MF.getInfo<AArch64MachineFunctionInfo>(); - if (MFI->getNumLocalDynamicTLSAccesses() < 2) { - // No point folding accesses if there isn't at least two. - return false; - } - - MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); - return VisitNode(DT->getRootNode(), 0); - } - - // Visit the dominator subtree rooted at Node in pre-order. - // If TLSBaseAddrReg is non-null, then use that to replace any - // TLS_base_addr instructions. Otherwise, create the register - // when the first such instruction is seen, and then use it - // as we encounter more instructions. - bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { - MachineBasicBlock *BB = Node->getBlock(); - bool Changed = false; - - // Traverse the current block. - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; - ++I) { - switch (I->getOpcode()) { - case AArch64::TLSDESC_BLRx: - // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) - break; - - if (TLSBaseAddrReg) - I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); - else - I = SetRegister(I, &TLSBaseAddrReg); - Changed = true; - break; - default: - break; - } - } - - // Visit the children of this block in the dominator tree. - for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); - I != E; ++I) { - Changed |= VisitNode(*I, TLSBaseAddrReg); - } - - return Changed; - } - - // Replace the TLS_base_addr instruction I with a copy from - // TLSBaseAddrReg, returning the new instruction. - MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, - unsigned TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const AArch64TargetMachine *TM = - static_cast<const AArch64TargetMachine *>(&MF->getTarget()); - const AArch64InstrInfo *TII = TM->getInstrInfo(); - - // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the - // code sequence assumes the address will be. - MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - AArch64::X0) - .addReg(TLSBaseAddrReg); - - // Erase the TLS_base_addr instruction. - I->eraseFromParent(); - - return Copy; - } - - // Create a virtal register in *TLSBaseAddrReg, and populate it by - // inserting a copy instruction after I. Returns the new instruction. - MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const AArch64TargetMachine *TM = - static_cast<const AArch64TargetMachine *>(&MF->getTarget()); - const AArch64InstrInfo *TII = TM->getInstrInfo(); - - // Create a virtual register for the TLS base address. - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); - - // Insert a copy from X0 to TLSBaseAddrReg for later. - MachineInstr *Next = I->getNextNode(); - MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - *TLSBaseAddrReg) - .addReg(AArch64::X0); - - return Copy; - } - - const char *getPassName() const override { - return "Local Dynamic TLS Access Clean-up"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - }; -} - -char LDTLSCleanup::ID = 0; -FunctionPass* -llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h deleted file mode 100644 index 10d5185ab63..00000000000 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ /dev/null @@ -1,112 +0,0 @@ -//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64INSTRINFO_H -#define LLVM_TARGET_AARCH64INSTRINFO_H - -#include "AArch64RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "AArch64GenInstrInfo.inc" - -namespace llvm { - -class AArch64Subtarget; - -class AArch64InstrInfo : public AArch64GenInstrInfo { - const AArch64RegisterInfo RI; - const AArch64Subtarget &Subtarget; -public: - explicit AArch64InstrInfo(const AArch64Subtarget &TM); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - const TargetRegisterInfo &getRegisterInfo() const { return RI; } - - const AArch64Subtarget &getSubTarget() const { return Subtarget; } - - void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const override; - void CopyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg) const; - - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; - void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; - - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify = false) const override; - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const override; - unsigned RemoveBranch(MachineBasicBlock &MBB) const override; - bool - ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; - - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - - /// Look through the instructions in this function and work out the largest - /// the stack frame can be while maintaining the ability to address local - /// slots with no complexities. - unsigned estimateRSStackLimit(MachineFunction &MF) const; - - /// getAddressConstraints - For loads and stores (and PRFMs) taking an - /// immediate offset, this function determines the constraints required for - /// the immediate. It must satisfy: - /// + MinOffset <= imm <= MaxOffset - /// + imm % OffsetScale == 0 - void getAddressConstraints(const MachineInstr &MI, int &AccessScale, - int &MinOffset, int &MaxOffset) const; - - - unsigned getInstSizeInBytes(const MachineInstr &MI) const; - - unsigned getInstBundleLength(const MachineInstr &MI) const; - -}; - -bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const AArch64InstrInfo &TII); - - -void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, - int64_t NumBytes, - MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); - -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned ScratchReg, int64_t NumBytes, - MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); - -} - -#endif diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td deleted file mode 100644 index 4d3c80152c3..00000000000 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ /dev/null @@ -1,5388 +0,0 @@ -//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the AArch64 scalar instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM Instruction Predicate Definitions. -// -def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, - AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; -def HasNEON : Predicate<"Subtarget->hasNEON()">, - AssemblerPredicate<"FeatureNEON", "neon">; -def HasCrypto : Predicate<"Subtarget->hasCrypto()">, - AssemblerPredicate<"FeatureCrypto","crypto">; - -// Use fused MAC if more precision in FP computation is allowed. -def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" - " FPOpFusion::Fast)">; -include "AArch64InstrFormats.td" - -//===----------------------------------------------------------------------===// -// AArch64 specific pattern fragments. -// -// An 'fmul' node with a single use. -def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{ - return N->hasOneUse(); -}]>; - - -//===----------------------------------------------------------------------===// -// Target-specific ISD nodes and profiles -//===----------------------------------------------------------------------===// - -def SDT_A64ret : SDTypeProfile<0, 0, []>; -def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain, - SDNPOptInGlue, - SDNPVariadic]>; - -// (ins NZCV, Condition, Dest) -def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>; -def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>; - -// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition) -def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<2, 3>]>; -def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>; - -// (outs NZCV), (ins LHS, RHS, Condition) -def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, - SDTCisSameAs<1, 2>]>; -def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>; - - -// (outs GPR64), (ins) -def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; - -// A64 compares don't care about the cond really (they set all flags) so a -// simple binary operator is useful. -def A64cmp : PatFrag<(ops node:$lhs, node:$rhs), - (A64setcc node:$lhs, node:$rhs, cond)>; - - -// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN -// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C -// and V flags can be set differently by this operation. It comes down to -// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are -// then everything is fine. If not then the optimization is wrong. Thus general -// comparisons are only valid if op2 != 0. - -// So, finally, the only LLVM-native comparisons that don't mention C and V are -// SETEQ and SETNE. They're the only ones we can safely use CMN for in the -// absence of information about op2. -def equality_cond : PatLeaf<(cond), [{ - return N->get() == ISD::SETEQ || N->get() == ISD::SETNE; -}]>; - -def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), - (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>; - -// There are two layers of indirection here, driven by the following -// considerations. -// + TableGen does not know CodeModel or Reloc so that decision should be -// made for a variable/address at ISelLowering. -// + The output of ISelLowering should be selectable (hence the Wrapper, -// rather than a bare target opcode) -def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisSameAs<0, 4>, - SDTCisPtrTy<0>]>; - -def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>; - -def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisVT<3, i32>, - SDTCisPtrTy<0>]>; - -def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>; - - -def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; -def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad, - [SDNPHasChain]>; - - -// (A64BFI LHS, RHS, LSB, Width) -def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisVT<3, i64>, - SDTCisVT<4, i64>]>; - -def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>; - -// (A64EXTR HiReg, LoReg, LSB) -def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisVT<3, i64>]>; -def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; - -// (A64[SU]BFX Field, ImmR, ImmS). -// -// Note that ImmR and ImmS are already encoded for the actual instructions. The -// more natural LSB and Width mix together to form ImmR and ImmS, something -// which TableGen can't handle. -def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; -def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; - -def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; - -class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; - -//===----------------------------------------------------------------------===// -// Call sequence pseudo-instructions -//===----------------------------------------------------------------------===// - - -def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; - -def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - -// The TLSDESCCALL node is a variant call which goes to an indirectly calculated -// destination but needs a relocation against a fixed symbol. As such it has two -// certain operands: the callee and the relocated variable. -// -// The TLS ABI only allows it to be selected to a BLR instructin (with -// appropriate relocation). -def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; - -def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; - - -def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>; -def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; - -def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>; -def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - - - -// These pseudo-instructions have special semantics by virtue of being passed to -// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by -// LowerCall to (in our case) tell the back-end about stack adjustments for -// arguments passed on the stack. Here we select those markers to -// pseudo-instructions which explicitly set the stack, and finally in the -// RegisterInfo we convert them to a true stack adjustment. -let Defs = [XSP], Uses = [XSP] in { - def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt), - [(AArch64callseq_start timm:$amt)]>; - - def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2), - [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; -} - -//===----------------------------------------------------------------------===// -// Atomic operation pseudo-instructions -//===----------------------------------------------------------------------===// - -// These get selected from C++ code as a pretty much direct translation from the -// generic DAG nodes. The one exception is the AtomicOrdering is added as an -// operand so that the eventual lowering can make use of it and choose -// acquire/release operations when required. - -let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { -multiclass AtomicSizes { - def _I8 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I16 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I32 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I64 : PseudoInst<(outs GPR64:$dst), - (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>; -} -} - -defm ATOMIC_LOAD_ADD : AtomicSizes; -defm ATOMIC_LOAD_SUB : AtomicSizes; -defm ATOMIC_LOAD_AND : AtomicSizes; -defm ATOMIC_LOAD_OR : AtomicSizes; -defm ATOMIC_LOAD_XOR : AtomicSizes; -defm ATOMIC_LOAD_NAND : AtomicSizes; -defm ATOMIC_SWAP : AtomicSizes; -let Defs = [NZCV] in { - // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes; - defm ATOMIC_LOAD_MAX : AtomicSizes; - defm ATOMIC_LOAD_UMIN : AtomicSizes; - defm ATOMIC_LOAD_UMAX : AtomicSizes; -} - -class AtomicCmpSwap<RegisterClass GPRData> - : PseudoInst<(outs GPRData:$dst), - (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new, - i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [NZCV]; -} - -def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>; -def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>; -def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>; -def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>; - -//===----------------------------------------------------------------------===// -// Add-subtract (extended register) instructions -//===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP - -// The RHS of these operations is conceptually a sign/zero-extended -// register, optionally shifted left by 1-4. The extension can be a -// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but -// must be specified with one exception: - -// If one of the registers is sp/wsp then LSL is an alias for UXTW in -// 32-bit instructions and UXTX in 64-bit versions, the shift amount -// is not optional in that case (but can explicitly be 0), and the -// entire suffix can be skipped (e.g. "add sp, x3, x2"). - -multiclass extend_operands<string PREFIX, string Diag> { - def _asmoperand : AsmOperandClass { - let Name = PREFIX; - let RenderMethod = "addRegExtendOperands"; - let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">"; - let DiagnosticType = "AddSubRegExtend" # Diag; - } - - def _operand : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> { - let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">"; - let DecoderMethod = "DecodeRegExtendOperand"; - let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand"); - } -} - -defm UXTB : extend_operands<"UXTB", "Small">; -defm UXTH : extend_operands<"UXTH", "Small">; -defm UXTW : extend_operands<"UXTW", "Small">; -defm UXTX : extend_operands<"UXTX", "Large">; -defm SXTB : extend_operands<"SXTB", "Small">; -defm SXTH : extend_operands<"SXTH", "Small">; -defm SXTW : extend_operands<"SXTW", "Small">; -defm SXTX : extend_operands<"SXTX", "Large">; - -def LSL_extasmoperand : AsmOperandClass { - let Name = "RegExtendLSL"; - let RenderMethod = "addRegExtendOperands"; - let DiagnosticType = "AddSubRegExtendLarge"; -} - -def LSL_extoperand : Operand<i64> { - let ParserMatchClass = LSL_extasmoperand; -} - - -// The patterns for various sign-extensions are a little ugly and -// non-uniform because everything has already been promoted to the -// legal i64 and i32 types. We'll wrap the various variants up in a -// class for use later. -class extend_types { - dag uxtb; dag uxth; dag uxtw; dag uxtx; - dag sxtb; dag sxth; dag sxtw; dag sxtx; - ValueType ty; - RegisterClass GPR; -} - -def extends_to_i64 : extend_types { - let uxtb = (and (anyext i32:$Rm), 255); - let uxth = (and (anyext i32:$Rm), 65535); - let uxtw = (zext i32:$Rm); - let uxtx = (i64 $Rm); - - let sxtb = (sext_inreg (anyext i32:$Rm), i8); - let sxth = (sext_inreg (anyext i32:$Rm), i16); - let sxtw = (sext i32:$Rm); - let sxtx = (i64 $Rm); - - let ty = i64; - let GPR = GPR64xsp; -} - - -def extends_to_i32 : extend_types { - let uxtb = (and i32:$Rm, 255); - let uxth = (and i32:$Rm, 65535); - let uxtw = (i32 i32:$Rm); - let uxtx = (i32 i32:$Rm); - - let sxtb = (sext_inreg i32:$Rm, i8); - let sxth = (sext_inreg i32:$Rm, i16); - let sxtw = (i32 i32:$Rm); - let sxtx = (i32 i32:$Rm); - - let ty = i32; - let GPR = GPR32wsp; -} - -// Now, six of the extensions supported are easy and uniform: if the source size -// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate -// those instructions in one block. - -// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me: -// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would -// be impossible. -// + Patterns are very different as well. -// + Passing different registers would be ugly (more fields in extend_types -// would probably be the best option). -multiclass addsub_exts<bit sf, bit op, bit S, string asmop, - SDPatternOperator opfrag, - dag outs, extend_types exts> { - def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000, - outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001, - outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010, - outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100, - outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101, - outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110, - outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} - -// These two could be merge in with the above, but their patterns aren't really -// necessary and the naming-scheme would necessarily break: -multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag, - dag outs> { - def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011, - outs, - (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111, - outs, - (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No Pattern: same as uxtx */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} - -multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> { - def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, - outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No pattern: probably same as uxtw */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111, - outs, (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No Pattern: probably same as uxtw */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} - -class SetRD<RegisterClass RC, SDPatternOperator op> - : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>; -class SetNZCV<SDPatternOperator op> - : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>; - -defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>, - (outs GPR64xsp:$Rd), extends_to_i64>, - addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>, - (outs GPR64xsp:$Rd)>; -defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>, - (outs GPR32wsp:$Rd), extends_to_i32>, - addsub_wxtx< 0b0, 0b0, "add\t$Rd, ", - (outs GPR32wsp:$Rd)>; -defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>, - (outs GPR64xsp:$Rd), extends_to_i64>, - addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>, - (outs GPR64xsp:$Rd)>; -defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>, - (outs GPR32wsp:$Rd), extends_to_i32>, - addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ", - (outs GPR32wsp:$Rd)>; - -let Defs = [NZCV] in { -defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>, - (outs GPR64:$Rd), extends_to_i64>, - addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>, - (outs GPR64:$Rd)>; -defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>, - (outs GPR32:$Rd), extends_to_i32>, - addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ", - (outs GPR32:$Rd)>; -defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>, - (outs GPR64:$Rd), extends_to_i64>, - addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>, - (outs GPR64:$Rd)>; -defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>, - (outs GPR32:$Rd), extends_to_i32>, - addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ", - (outs GPR32:$Rd)>; - - -let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in { -defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, - (outs), extends_to_i64>, - addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>; -defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, - (outs), extends_to_i32>, - addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>; -defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, - (outs), extends_to_i64>, - addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>; -defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, - (outs), extends_to_i32>, - addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>; -} -} - -// Now patterns for the operation without a shift being needed. No patterns are -// created for uxtx/sxtx since they're non-uniform and it's expected that -// add/sub (shifted register) will handle those cases anyway. -multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop, - extend_types exts> { - def : Pat<(nodeop exts.ty:$Rn, exts.uxtb), - (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.uxth), - (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.uxtw), - (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>; - - def : Pat<(nodeop exts.ty:$Rn, exts.sxtb), - (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.sxth), - (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.sxtw), - (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>; -} - -defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>; -defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>; -defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>; -defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>; - -defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>; -defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>; -defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>; -defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>; - -// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is -// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the -// operation. Also permitted in this case is complete omission of the argument, -// which implies "lsl #0". -multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd, - RegisterClass GPR_Rn, RegisterClass GPR_Rm> { - def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), - (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>; - - def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"), - (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL), - 0>; - -} - -defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>; -defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>; -defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>; -defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>; -defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>; -defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>; -defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>; -defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>; - -// Rd cannot be sp for flag-setting variants so only half of the aliases are -// needed. -defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>; -defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>; -defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>; -defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>; - -// CMP unfortunately has to be different because the instruction doesn't have a -// dest register. -multiclass cmp_lsl_aliases<string asmop, Instruction inst, - RegisterClass GPR_Rn, RegisterClass GPR_Rm> { - def : InstAlias<!strconcat(asmop, " $Rn, $Rm"), - (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>; - - def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"), - (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>; -} - -defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>; -defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>; -defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>; -defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>; - -//===----------------------------------------------------------------------===// -// Add-subtract (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV - -// These instructions accept a 12-bit unsigned immediate, optionally shifted -// left by 12 bits. Official assembly format specifies a 12 bit immediate with -// one of "", "LSL #0", "LSL #12" supplementary operands. - -// There are surprisingly few ways to make this work with TableGen, so this -// implementation has separate instructions for the "LSL #0" and "LSL #12" -// variants. - -// If the MCInst retained a single combined immediate (which could be 0x123000, -// for example) then both components (imm & shift) would have to be delegated to -// a single assembly operand. This would entail a separate operand parser -// (because the LSL would have to live in the same AArch64Operand as the -// immediate to be accessible); assembly parsing is rather complex and -// error-prone C++ code. -// -// By splitting the immediate, we can delegate handling this optional operand to -// an InstAlias. Supporting functions to generate the correct MCInst are still -// required, but these are essentially trivial and parsing can remain generic. -// -// Rejected plans with rationale: -// ------------------------------ -// -// In an ideal world you'de have two first class immediate operands (in -// InOperandList, specifying imm12 and shift). Unfortunately this is not -// selectable by any means I could discover. -// -// An Instruction with two MCOperands hidden behind a single entry in -// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional, -// but required more C++ code to handle encoding/decoding. Parsing (the intended -// main beneficiary) ended up equally complex because of the optional nature of -// "LSL #0". -// -// Attempting to circumvent the need for a custom OperandParser above by giving -// InstAliases without the "lsl #0" failed. add/sub could be accommodated but -// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands -// should be parsed: there was no way to accommodate an "lsl #12". - -let ParserMethod = "ParseImmWithLSLOperand", - RenderMethod = "addImmWithLSLOperands" in { - // Derived PredicateMethod fields are different for each - def addsubimm_lsl0_asmoperand : AsmOperandClass { - let Name = "AddSubImmLSL0"; - // If an error is reported against this operand, instruction could also be a - // register variant. - let DiagnosticType = "AddSubSecondSource"; - } - - def addsubimm_lsl12_asmoperand : AsmOperandClass { - let Name = "AddSubImmLSL12"; - let DiagnosticType = "AddSubSecondSource"; - } -} - -def shr_12_XFORM : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32); -}]>; - -def shr_12_neg_XFORM : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32); -}]>; - -def neg_XFORM : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32); -}]>; - - -multiclass addsub_imm_operands<ValueType ty> { - let PrintMethod = "printAddSubImmLSL0Operand", - EncoderMethod = "getAddSubImmOpValue", - ParserMatchClass = addsubimm_lsl0_asmoperand in { - def _posimm_lsl0 : Operand<ty>, - ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>; - def _negimm_lsl0 : Operand<ty>, - ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }], - neg_XFORM>; - } - - let PrintMethod = "printAddSubImmLSL12Operand", - EncoderMethod = "getAddSubImmOpValue", - ParserMatchClass = addsubimm_lsl12_asmoperand in { - def _posimm_lsl12 : Operand<ty>, - ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }], - shr_12_XFORM>; - - def _negimm_lsl12 : Operand<ty>, - ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }], - shr_12_neg_XFORM>; - } -} - -// The add operands don't need any transformation -defm addsubimm_operand_i32 : addsub_imm_operands<i32>; -defm addsubimm_operand_i64 : addsub_imm_operands<i64>; - -multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift, - string asmop, string cmpasmop, - Operand imm_operand, Operand cmp_imm_operand, - RegisterClass GPR, RegisterClass GPRsp, - AArch64Reg ZR, ValueType Ty> { - // All registers for non-S variants allow SP - def _s : A64I_addsubimm<sf, op, 0b0, shift, - (outs GPRsp:$Rd), - (ins GPRsp:$Rn, imm_operand:$Imm12), - !strconcat(asmop, "\t$Rd, $Rn, $Imm12"), - [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - - // S variants can read SP but would write to ZR - def _S : A64I_addsubimm<sf, op, 0b1, shift, - (outs GPR:$Rd), - (ins GPRsp:$Rn, imm_operand:$Imm12), - !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"), - [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let Defs = [NZCV]; - } - - // Note that the pattern here for ADDS is subtle. Canonically CMP - // a, b becomes SUBS a, b. If b < 0 then this is equivalent to - // ADDS a, (-b). This is not true in general. - def _cmp : A64I_addsubimm<sf, op, 0b1, shift, - (outs), (ins GPRsp:$Rn, imm_operand:$Imm12), - !strconcat(cmpasmop, " $Rn, $Imm12"), - [(set NZCV, - (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))], - NoItinerary>, - Sched<[WriteCMP, ReadCMP]> { - let Rd = 0b11111; - let Defs = [NZCV]; - let isCompare = 1; - } -} - - -multiclass addsubimm_shifts<string prefix, bit sf, bit op, - string asmop, string cmpasmop, string operand, string cmpoperand, - RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR, - ValueType Ty> { - defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00, - asmop, cmpasmop, - !cast<Operand>(operand # "_lsl0"), - !cast<Operand>(cmpoperand # "_lsl0"), - GPR, GPRsp, ZR, Ty>; - - defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01, - asmop, cmpasmop, - !cast<Operand>(operand # "_lsl12"), - !cast<Operand>(cmpoperand # "_lsl12"), - GPR, GPRsp, ZR, Ty>; -} - -defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn", - "addsubimm_operand_i32_posimm", - "addsubimm_operand_i32_negimm", - GPR32, GPR32wsp, WZR, i32>; -defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn", - "addsubimm_operand_i64_posimm", - "addsubimm_operand_i64_negimm", - GPR64, GPR64xsp, XZR, i64>; -defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp", - "addsubimm_operand_i32_negimm", - "addsubimm_operand_i32_posimm", - GPR32, GPR32wsp, WZR, i32>; -defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp", - "addsubimm_operand_i64_negimm", - "addsubimm_operand_i64_posimm", - GPR64, GPR64xsp, XZR, i64>; - -multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> { - def _fromsp : InstAlias<"mov $Rd, $Rn", - (addop GPRsp:$Rd, SP:$Rn, 0), - 0b1>; - - def _tosp : InstAlias<"mov $Rd, $Rn", - (addop SP:$Rd, GPRsp:$Rn, 0), - 0b1>; -} - -// Recall Rxsp is a RegisterClass containing *just* xsp. -defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>; -defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>; - -//===----------------------------------------------------------------------===// -// Add-subtract (shifted register) instructions -//===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS - -//===------------------------------- -// 1. The "shifted register" operands. Shared with logical insts. -//===------------------------------- - -multiclass shift_operands<string prefix, string form> { - def _asmoperand_i32 : AsmOperandClass { - let Name = "Shift" # form # "i32"; - let RenderMethod = "addShiftOperands"; - let PredicateMethod = "isShift<A64SE::" # form # ", false>"; - let DiagnosticType = "AddSubRegShift32"; - } - - // Note that the operand type is intentionally i64 because the DAGCombiner - // puts these into a canonical form. - def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { - let ParserMatchClass - = !cast<AsmOperandClass>(prefix # "_asmoperand_i32"); - let PrintMethod = "printShiftOperand<A64SE::" # form # ">"; - let DecoderMethod = "Decode32BitShiftOperand"; - } - - def _asmoperand_i64 : AsmOperandClass { - let Name = "Shift" # form # "i64"; - let RenderMethod = "addShiftOperands"; - let PredicateMethod = "isShift<A64SE::" # form # ", true>"; - let DiagnosticType = "AddSubRegShift64"; - } - - def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { - let ParserMatchClass - = !cast<AsmOperandClass>(prefix # "_asmoperand_i64"); - let PrintMethod = "printShiftOperand<A64SE::" # form # ">"; - } -} - -defm lsl_operand : shift_operands<"lsl_operand", "LSL">; -defm lsr_operand : shift_operands<"lsr_operand", "LSR">; -defm asr_operand : shift_operands<"asr_operand", "ASR">; - -// Not used for add/sub, but defined here for completeness. The "logical -// (shifted register)" instructions *do* have an ROR variant. -defm ror_operand : shift_operands<"ror_operand", "ROR">; - -//===------------------------------- -// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions. -//===------------------------------- - -// N.b. the commutable parameter is just !N. It will be first against the wall -// when the revolution comes. -multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable, - string asmop, SDPatternOperator opfrag, ValueType ty, - RegisterClass GPR, list<Register> defs> { - let isCommutable = commutable, Defs = defs in { - def _lsl : A64I_addsubshift<sf, op, s, 0b00, - (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def _lsr : A64I_addsubshift<sf, op, s, 0b01, - (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def _asr : A64I_addsubshift<sf, op, s, 0b10, - (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - } - - def _noshift - : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), - (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn, - GPR:$Rm, 0)>; - - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable, - string asmop, SDPatternOperator opfrag, - list<Register> defs> { - defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s, - commutable, asmop, opfrag, i64, GPR64, defs>; - defm www : addsub_shifts<prefix # "www", 0b0, op, s, - commutable, asmop, opfrag, i32, GPR32, defs>; -} - - -defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>; -defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>; - -defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>; -defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>; - -//===------------------------------- -// 1. The NEG/NEGS aliases -//===------------------------------- - -multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR, - ValueType ty, Operand shift_operand, SDNode shiftop> { - def : InstAlias<"neg $Rd, $Rm, $Imm6", - (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; - - def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)), - (INST ZR, $Rm, shift_operand:$Imm6)>; -} - -defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>; -defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>; -defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>; -def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; -def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>; - -defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>; -defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>; -defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>; -def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; -def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>; - -// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to -// be involved. -class negs_alias<Instruction INST, RegisterClass GPR, - Register ZR, Operand shift_operand, SDNode shiftop> - : InstAlias<"negs $Rd, $Rm, $Imm6", - (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; - -def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>; -def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>; -def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>; -def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; - -def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>; -def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>; -def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>; -def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; - -//===------------------------------- -// 1. The CMP/CMN aliases -//===------------------------------- - -multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable, - string asmop, SDPatternOperator opfrag, ValueType ty, - RegisterClass GPR> { - let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in { - def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00, - (outs), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01, - (outs), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - def _asr : A64I_addsubshift<sf, op, 0b1, 0b10, - (outs), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - } - - def _noshift - : InstAlias<!strconcat(asmop, " $Rn, $Rm"), - (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>; -defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>; - -defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>; -defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>; - -//===----------------------------------------------------------------------===// -// Add-subtract (with carry) instructions -//===----------------------------------------------------------------------===// -// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS - -multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> { - let Uses = [NZCV] in { - def www : A64I_addsubcarry<0b0, op, s, 0b000000, - (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def xxx : A64I_addsubcarry<0b1, op, s, 0b000000, - (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } -} - -let isCommutable = 1 in { - defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">; -} - -defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">; - -let Defs = [NZCV] in { - let isCommutable = 1 in { - defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">; - } - - defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">; -} - -def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>; -def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>; -def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>; -def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>; - -// Note that adde and sube can form a chain longer than two (e.g. for 256-bit -// addition). So the flag-setting instructions are appropriate. -def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>; -def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>; -def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>; -def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>; - -//===----------------------------------------------------------------------===// -// Bitfield -//===----------------------------------------------------------------------===// -// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL, -// UBFIZ, UBFX - -// Because of the rather complicated nearly-overlapping aliases, the decoding of -// this range of instructions is handled manually. The architectural -// instructions are BFM, SBFM and UBFM but a disassembler should never produce -// these. -// -// In the end, the best option was to use BFM instructions for decoding under -// almost all circumstances, but to create aliasing *Instructions* for each of -// the canonical forms and specify a completely custom decoder which would -// substitute the correct MCInst as needed. -// -// This also simplifies instruction selection, parsing etc because the MCInsts -// have a shape that's closer to their use in code. - -//===------------------------------- -// 1. The architectural BFM instructions -//===------------------------------- - -def uimm5_asmoperand : AsmOperandClass { - let Name = "UImm5"; - let PredicateMethod = "isUImm<5>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm5"; -} - -def uimm6_asmoperand : AsmOperandClass { - let Name = "UImm6"; - let PredicateMethod = "isUImm<6>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm6"; -} - -def bitfield32_imm : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> { - let ParserMatchClass = uimm5_asmoperand; - - let DecoderMethod = "DecodeBitfield32ImmOperand"; -} - - -def bitfield64_imm : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> { - let ParserMatchClass = uimm6_asmoperand; - - // Default decoder works in 64-bit case: the 6-bit field can take any value. -} - -multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - } - - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - } -} - -defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">; -defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; - -// BFM instructions modify the destination register rather than defining it -// completely. -def BFMwwii : - A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - let Constraints = "$src = $Rd"; -} - -def BFMxxii : - A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - let Constraints = "$src = $Rd"; -} - - -//===------------------------------- -// 2. Extend aliases to 64-bit dest -//===------------------------------- - -// Unfortunately the extensions that end up as 64-bits cannot be handled by an -// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs -// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is -// not capable of such a map as far as I'm aware - -// Note that these instructions are strictly more specific than the -// BFM ones (in ImmR) so they can handle their own decoding. -class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty, - string asmop, bits<6> imms, dag pattern> - : A64I_bitfield<sf, opc, sf, - (outs GPRDest:$Rd), (ins GPR32:$Rn), - !strconcat(asmop, "\t$Rd, $Rn"), - [(set dty:$Rd, pattern)], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let ImmR = 0b000000; - let ImmS = imms; -} - -// Signed extensions -def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7, - (sext_inreg (anyext i32:$Rn), i8)>; -def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7, - (sext_inreg i32:$Rn, i8)>; -def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15, - (sext_inreg (anyext i32:$Rn), i16)>; -def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15, - (sext_inreg i32:$Rn, i16)>; -def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>; - -// Unsigned extensions -def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7, - (and i32:$Rn, 255)>; -def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15, - (and i32:$Rn, 65535)>; - -// The 64-bit unsigned variants are not strictly architectural but recommended -// for consistency. -let isAsmParserOnly = 1 in { - def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7, - (and (anyext i32:$Rn), 255)>; - def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15, - (and (anyext i32:$Rn), 65535)>; -} - -// Extra patterns for when the source register is actually 64-bits -// too. There's no architectural difference here, it's just LLVM -// shinanigans. There's no need for equivalent zero-extension patterns -// because they'll already be caught by logical (immediate) matching. -def : Pat<(sext_inreg i64:$Rn, i8), - (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>; -def : Pat<(sext_inreg i64:$Rn, i16), - (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>; -def : Pat<(sext_inreg i64:$Rn, i32), - (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>; - - -//===------------------------------- -// 3. Aliases for ASR and LSR (the simple shifts) -//===------------------------------- - -// These also handle their own decoding because ImmS being set makes -// them take precedence over BFM. -multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> { - def wwi : A64I_bitfield<0b0, opc, 0b0, - (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), - [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let ImmS = 31; - } - - def xxi : A64I_bitfield<0b1, opc, 0b1, - (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), - [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let ImmS = 63; - } - -} - -defm ASR : A64I_shift<0b00, "asr", sra>; -defm LSR : A64I_shift<0b10, "lsr", srl>; - -//===------------------------------- -// 4. Aliases for LSL -//===------------------------------- - -// Unfortunately LSL and subsequent aliases are much more complicated. We need -// to be able to say certain output instruction fields depend in a complex -// manner on combinations of input assembly fields). -// -// MIOperandInfo *might* have been able to do it, but at the cost of -// significantly more C++ code. - -// N.b. contrary to usual practice these operands store the shift rather than -// the machine bits in an MCInst. The complexity overhead of consistency -// outweighed the benefits in this case (custom asmparser, printer and selection -// vs custom encoder). -def bitfield32_lsl_imm : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { - let ParserMatchClass = uimm5_asmoperand; - let EncoderMethod = "getBitfield32LSLOpValue"; -} - -def bitfield64_lsl_imm : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { - let ParserMatchClass = uimm6_asmoperand; - let EncoderMethod = "getBitfield64LSLOpValue"; -} - -class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty, - Operand operand> - : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm), - "lsl\t$Rd, $Rn, $FullImm", - [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - bits<12> FullImm; - let ImmR = FullImm{5-0}; - let ImmS = FullImm{11-6}; - - // No disassembler allowed because it would overlap with BFM which does the - // actual work. - let isAsmParserOnly = 1; -} - -def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>; -def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>; - -//===------------------------------- -// 5. Aliases for bitfield extract instructions -//===------------------------------- - -def bfx32_width_asmoperand : AsmOperandClass { - let Name = "BFX32Width"; - let PredicateMethod = "isBitfieldWidth<32>"; - let RenderMethod = "addBFXWidthOperands"; - let DiagnosticType = "Width32"; -} - -def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> { - let PrintMethod = "printBFXWidthOperand"; - let ParserMatchClass = bfx32_width_asmoperand; -} - -def bfx64_width_asmoperand : AsmOperandClass { - let Name = "BFX64Width"; - let PredicateMethod = "isBitfieldWidth<64>"; - let RenderMethod = "addBFXWidthOperands"; - let DiagnosticType = "Width64"; -} - -def bfx64_width : Operand<i64> { - let PrintMethod = "printBFXWidthOperand"; - let ParserMatchClass = bfx64_width_asmoperand; -} - - -multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } - - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } -} - -defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>; -defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; - -// Again, variants based on BFM modify Rd so need it as an input too. -def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} - -def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} - -// SBFX instructions can do a 1-instruction sign-extension of boolean values. -def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>; -def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>; -def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)), - (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>; - -// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could -// use either 64-bit or 32-bit variant, but 32-bit might be more efficient. -def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), - sub_32)>; - -//===------------------------------- -// 6. Aliases for bitfield insert instructions -//===------------------------------- - -def bfi32_lsb_asmoperand : AsmOperandClass { - let Name = "BFI32LSB"; - let PredicateMethod = "isUImm<5>"; - let RenderMethod = "addBFILSBOperands<32>"; - let DiagnosticType = "UImm5"; -} - -def bfi32_lsb : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { - let PrintMethod = "printBFILSBOperand<32>"; - let ParserMatchClass = bfi32_lsb_asmoperand; -} - -def bfi64_lsb_asmoperand : AsmOperandClass { - let Name = "BFI64LSB"; - let PredicateMethod = "isUImm<6>"; - let RenderMethod = "addBFILSBOperands<64>"; - let DiagnosticType = "UImm6"; -} - -def bfi64_lsb : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { - let PrintMethod = "printBFILSBOperand<64>"; - let ParserMatchClass = bfi64_lsb_asmoperand; -} - -// Width verification is performed during conversion so width operand can be -// shared between 32/64-bit cases. Still needed for the print method though -// because ImmR encodes "width - 1". -def bfi32_width_asmoperand : AsmOperandClass { - let Name = "BFI32Width"; - let PredicateMethod = "isBitfieldWidth<32>"; - let RenderMethod = "addBFIWidthOperands"; - let DiagnosticType = "Width32"; -} - -def bfi32_width : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> { - let PrintMethod = "printBFIWidthOperand"; - let ParserMatchClass = bfi32_width_asmoperand; -} - -def bfi64_width_asmoperand : AsmOperandClass { - let Name = "BFI64Width"; - let PredicateMethod = "isBitfieldWidth<64>"; - let RenderMethod = "addBFIWidthOperands"; - let DiagnosticType = "Width64"; -} - -def bfi64_width : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> { - let PrintMethod = "printBFIWidthOperand"; - let ParserMatchClass = bfi64_width_asmoperand; -} - -multiclass A64I_bitfield_insert<bits<2> opc, string asmop> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } - - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } -} - -defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">; -defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; - - -def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} - -def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} - -//===----------------------------------------------------------------------===// -// Compare and branch (immediate) -//===----------------------------------------------------------------------===// -// Contains: CBZ, CBNZ - -class label_asmoperand<int width, int scale> : AsmOperandClass { - let Name = "Label" # width # "_" # scale; - let PredicateMethod = "isLabel<" # width # "," # scale # ">"; - let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; - let DiagnosticType = "Label"; -} - -def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; - -// All conditional immediate branches are the same really: 19 signed bits scaled -// by the instruction-size (4). -def bcc_target : Operand<OtherVT> { - // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. - let ParserMatchClass = label_wid19_scal4_asmoperand; - let PrintMethod = "printLabelOperand<19, 4>"; - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>"; - let OperandType = "OPERAND_PCREL"; -} - -multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> { - let isBranch = 1, isTerminator = 1 in { - def x : A64I_cmpbr<0b1, op, - (outs), - (ins GPR64:$Rt, bcc_target:$Label), - !strconcat(asmop,"\t$Rt, $Label"), - [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)], - NoItinerary>, - Sched<[WriteBr, ReadBr]>; - - def w : A64I_cmpbr<0b0, op, - (outs), - (ins GPR32:$Rt, bcc_target:$Label), - !strconcat(asmop,"\t$Rt, $Label"), - [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)], - NoItinerary>, - Sched<[WriteBr, ReadBr]>; - } -} - -defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{ - return Imm == A64CC::EQ; -}]> >; -defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{ - return Imm == A64CC::NE; -}]> >; - -//===----------------------------------------------------------------------===// -// Conditional branch (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: B.cc - -def cond_code_asmoperand : AsmOperandClass { - let Name = "CondCode"; - let DiagnosticType = "CondCode"; -} - -def cond_code : Operand<i32>, ImmLeaf<i32, [{ - return Imm >= 0 && Imm <= 15; -}]> { - let PrintMethod = "printCondCodeOperand"; - let ParserMatchClass = cond_code_asmoperand; -} - -def Bcc : A64I_condbr<0b0, 0b0, (outs), - (ins cond_code:$Cond, bcc_target:$Label), - "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Uses = [NZCV]; - let isBranch = 1; - let isTerminator = 1; -} - -//===----------------------------------------------------------------------===// -// Conditional compare (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: CCMN, CCMP - -def uimm4_asmoperand : AsmOperandClass { - let Name = "UImm4"; - let PredicateMethod = "isUImm<4>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm4"; -} - -def uimm4 : Operand<i32> { - let ParserMatchClass = uimm4_asmoperand; -} - -def uimm5 : Operand<i32> { - let ParserMatchClass = uimm5_asmoperand; -} - -// The only difference between this operand and the one for instructions like -// B.cc is that it's parsed manually. The other get parsed implicitly as part of -// the mnemonic handling. -def cond_code_op_asmoperand : AsmOperandClass { - let Name = "CondCodeOp"; - let RenderMethod = "addCondCodeOperands"; - let PredicateMethod = "isCondCode"; - let ParserMethod = "ParseCondCodeOperand"; - let DiagnosticType = "CondCode"; -} - -def cond_code_op : Operand<i32> { - let PrintMethod = "printCondCodeOperand"; - let ParserMatchClass = cond_code_op_asmoperand; -} - -class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop> - : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs), - (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"), - [], NoItinerary>, - Sched<[WriteCMP, ReadCMP]> { - let Defs = [NZCV]; -} - -def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">; -def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">; -def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">; -def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">; - -//===----------------------------------------------------------------------===// -// Conditional compare (register) instructions -//===----------------------------------------------------------------------===// -// Contains: CCMN, CCMP - -class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop> - : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1, - (outs), - (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]> { - let Defs = [NZCV]; -} - -def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">; -def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">; -def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">; -def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">; - -//===----------------------------------------------------------------------===// -// Conditional select instructions -//===----------------------------------------------------------------------===// -// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG - -// Condition code which is encoded as the inversion (semantically rather than -// bitwise) in the instruction. -def inv_cond_code_op_asmoperand : AsmOperandClass { - let Name = "InvCondCodeOp"; - let RenderMethod = "addInvCondCodeOperands"; - let PredicateMethod = "isCondCode"; - let ParserMethod = "ParseCondCodeOperand"; - let DiagnosticType = "CondCode"; -} - -def inv_cond_code_op : Operand<i32> { - let ParserMatchClass = inv_cond_code_op_asmoperand; - let PrintMethod = "printInverseCondCodeOperand"; -} - -// Having a separate operand for the selectable use-case is debatable, but gives -// consistency with cond_code. -def inv_cond_XFORM : SDNodeXForm<imm, [{ - A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue()); - return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32); -}]>; - -def inv_cond_code - : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>; - - -multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop, - SDPatternOperator select> { - let Uses = [NZCV] in { - def wwwc : A64I_condsel<0b0, op, 0b0, op2, - (outs GPR32:$Rd), - (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), - [(set i32:$Rd, (select i32:$Rn, i32:$Rm))], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - - def xxxc : A64I_condsel<0b1, op, 0b0, op2, - (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), - [(set i64:$Rd, (select i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - } -} - -def simple_select - : PatFrag<(ops node:$lhs, node:$rhs), - (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>; - -class complex_select<SDPatternOperator opnode> - : PatFrag<(ops node:$lhs, node:$rhs), - (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>; - - -defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>; -defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc", - complex_select<PatFrag<(ops node:$val), - (add node:$val, 1)>>>; -defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>; -defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>; - -// Now the instruction aliases, which fit nicely into LLVM's model: - -def : InstAlias<"cset $Rd, $Cond", - (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"cset $Rd, $Cond", - (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"csetm $Rd, $Cond", - (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"csetm $Rd, $Cond", - (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinc $Rd, $Rn, $Cond", - (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinc $Rd, $Rn, $Cond", - (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinv $Rd, $Rn, $Cond", - (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinv $Rd, $Rn, $Cond", - (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cneg $Rd, $Rn, $Cond", - (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cneg $Rd, $Rn, $Cond", - (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; - -// Finally some helper patterns. - -// For CSET (a.k.a. zero-extension of icmp) -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), - (CSINCwwwc WZR, WZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), - (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>; - -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), - (CSINCxxxc XZR, XZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), - (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>; - -// For CSETM (a.k.a. sign-extension of icmp) -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), - (CSINVwwwc WZR, WZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), - (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>; - -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), - (CSINVxxxc XZR, XZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), - (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>; - -// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of -// commutativity. The instructions are to complex for isCommutable to be used, -// so we have to create the patterns manually: - -// No commutable pattern for CSEL since the commuted version is isomorphic. - -// CSINC -def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond), - (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond), - (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - -// CSINV -def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond), - (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond), - (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - -// CSNEG -def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond), - (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond), - (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - -//===----------------------------------------------------------------------===// -// Data Processing (1 source) instructions -//===----------------------------------------------------------------------===// -// Contains: RBIT, REV16, REV, REV32, CLZ, CLS. - -// We define an unary operator which always fails. We will use this to -// define unary operators that cannot be matched. - -class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop, - list<dag> patterns, RegisterClass GPRrc, - InstrItinClass itin>: - A64I_dp_1src<sf, - 0, - 0b00000, - opcode, - !strconcat(asmop, "\t$Rd, $Rn"), - (outs GPRrc:$Rd), - (ins GPRrc:$Rn), - patterns, - itin>, - Sched<[WriteALU, ReadALU]>; - -multiclass A64I_dp_1src <bits<6> opcode, string asmop> { - let hasSideEffects = 0 in { - def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>; - def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>; - } -} - -defm RBIT : A64I_dp_1src<0b000000, "rbit">; -defm CLS : A64I_dp_1src<0b000101, "cls">; -defm CLZ : A64I_dp_1src<0b000100, "clz">; - -def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>; -def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>; -def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>; -def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>; - -def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>; -def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>; -def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>; -def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>; - - -def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev", - [(set i32:$Rd, (bswap i32:$Rn))], - GPR32, NoItinerary>; -def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev", - [(set i64:$Rd, (bswap i64:$Rn))], - GPR64, NoItinerary>; -def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32", - [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))], - GPR64, NoItinerary>; -def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16", - [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))], - GPR32, - NoItinerary>; -def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>; - -//===----------------------------------------------------------------------===// -// Data Processing (2 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, -// LSR, ASR, ROR - - -class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns, - RegisterClass GPRsp, - InstrItinClass itin>: - A64I_dp_2src<sf, - opcode, - 0, - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - (outs GPRsp:$Rd), - (ins GPRsp:$Rn, GPRsp:$Rm), - patterns, - itin>, - Sched<[WriteALU, ReadALU, ReadALU]>; - -multiclass dp_2src_crc<bit c, string asmop> { - def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0}, - !strconcat(asmop, "b"), [], GPR32, NoItinerary>; - def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1}, - !strconcat(asmop, "h"), [], GPR32, NoItinerary>; - def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0}, - !strconcat(asmop, "w"), [], GPR32, NoItinerary>; - def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0, - !strconcat(asmop, "x\t$Rd, $Rn, $Rm"), - (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} - -multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> { - def www : dp_2src_impl<0b0, - opcode, - asmop, - [(set i32:$Rd, - (op i32:$Rn, (i64 (zext i32:$Rm))))], - GPR32, - NoItinerary>; - def xxx : dp_2src_impl<0b1, - opcode, - asmop, - [(set i64:$Rd, (op i64:$Rn, i64:$Rm))], - GPR64, - NoItinerary>; -} - - -multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> { - def www : dp_2src_impl<0b0, - opcode, - asmop, - [(set i32:$Rd, (op i32:$Rn, i32:$Rm))], - GPR32, - NoItinerary>; - def xxx : dp_2src_impl<0b1, - opcode, - asmop, - [(set i64:$Rd, (op i64:$Rn, i64:$Rm))], - GPR64, - NoItinerary>; -} - -// Here we define the data processing 2 source instructions. -defm CRC32 : dp_2src_crc<0b0, "crc32">; -defm CRC32C : dp_2src_crc<0b1, "crc32c">; - -let SchedRW = [WriteDiv, ReadDiv, ReadDiv] in { - defm UDIV : dp_2src<0b000010, "udiv", udiv>; - defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; -} - -let SchedRW = [WriteALUs, ReadALU, ReadALU] in { - defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; - defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; - defm ASRV : dp_2src_zext<0b001010, "asr", sra>; - defm RORV : dp_2src_zext<0b001011, "ror", rotr>; -} - -// Extra patterns for an incoming 64-bit value for a 32-bit -// operation. Since the LLVM operations are undefined (as in C) if the -// RHS is out of range, it's perfectly permissible to discard the high -// bits of the GPR64. -def : Pat<(shl i32:$Rn, i64:$Rm), - (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(srl i32:$Rn, i64:$Rm), - (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(sra i32:$Rn, i64:$Rm), - (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(rotr i32:$Rn, i64:$Rm), - (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; - -// Here we define the aliases for the data processing 2 source instructions. -def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">; -def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">; -def ASR_menmonic : MnemonicAlias<"asrv", "asr">; -def ROR_menmonic : MnemonicAlias<"rorv", "ror">; - -//===----------------------------------------------------------------------===// -// Data Processing (3 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH -// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL - -class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg, - ValueType AccTy, RegisterClass SrcReg, - string asmop, dag pattern> - : A64I_dp3<sf, opcode, - (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"), - [(set AccTy:$Rd, pattern)], NoItinerary>, - Sched<[WriteMAC, ReadMAC, ReadMAC, ReadMAC]> { - bits<5> Ra; - let Inst{14-10} = Ra; - - RegisterClass AccGPR = AccReg; - RegisterClass SrcGPR = SrcReg; -} - -def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd", - (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>; -def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd", - (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>; - -def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub", - (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>; -def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub", - (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>; - -def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl", - (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; -def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl", - (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; - -def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl", - (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; -def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl", - (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; - -let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in { - def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm), - "umulh\t$Rd, $Rn, $Rm", - [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteMAC, ReadMAC, ReadMAC]>; - - def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm), - "smulh\t$Rd, $Rn, $Rm", - [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteMAC, ReadMAC, ReadMAC]>; -} - -multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST, - Register ZR, dag pattern> { - def : InstAlias<asmop # " $Rd, $Rn, $Rm", - (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>; - - def : Pat<pattern, (INST $Rn, $Rm, ZR)>; -} - -defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>; -defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>; - -defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR, - (sub 0, (mul i32:$Rn, i32:$Rm))>; -defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR, - (sub 0, (mul i64:$Rn, i64:$Rm))>; - -defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR, - (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>; -defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR, - (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; - -defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR, - (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>; -defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, - (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; - - -//===----------------------------------------------------------------------===// -// Exception generation -//===----------------------------------------------------------------------===// -// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3 - -def uimm16_asmoperand : AsmOperandClass { - let Name = "UImm16"; - let PredicateMethod = "isUImm<16>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm16"; -} - -def uimm16 : Operand<i32> { - let ParserMatchClass = uimm16_asmoperand; -} - -class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop> - : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16), - !strconcat(asmop, "\t$UImm16"), [], NoItinerary>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; -} - -def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">; -def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">; -def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">; -def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">; -def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">; - -def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">; -def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">; -def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">; - -// The immediate is optional for the DCPS instructions, defaulting to 0. -def : InstAlias<"dcps1", (DCPS1i 0)>; -def : InstAlias<"dcps2", (DCPS2i 0)>; -def : InstAlias<"dcps3", (DCPS3i 0)>; - -//===----------------------------------------------------------------------===// -// Extract (immediate) -//===----------------------------------------------------------------------===// -// Contains: EXTR + alias ROR - -def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0, - (outs GPR32:$Rd), - (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB), - "extr\t$Rd, $Rn, $Rm, $LSB", - [(set i32:$Rd, - (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1, - (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB), - "extr\t$Rd, $Rn, $Rm, $LSB", - [(set i64:$Rd, - (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - -def : InstAlias<"ror $Rd, $Rs, $LSB", - (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>; -def : InstAlias<"ror $Rd, $Rs, $LSB", - (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>; - -def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB), - (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>; -def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB), - (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>; - -//===----------------------------------------------------------------------===// -// Floating-point compare instructions -//===----------------------------------------------------------------------===// -// Contains: FCMP, FCMPE - -def fpzero_asmoperand : AsmOperandClass { - let Name = "FPZero"; - let ParserMethod = "ParseFPImmOperand"; - let DiagnosticType = "FPZero"; -} - -def fpz32 : Operand<f32>, - ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -def fpz64 : Operand<f64>, - ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -def fpz64movi : Operand<i64>, - ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> { - def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, - (outs), ins, "fcmp\t$Rn, $Rm", [pattern], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; - } - - def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, - (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; - } -} - -defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), - (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>; -defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), - (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>; - -// What would be Rm should be written as 0; note that even though it's called -// "$Rm" here to fit in with the InstrFormats, it's actually an immediate. -defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm), - (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>; - -defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm), - (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>; - - -//===----------------------------------------------------------------------===// -// Floating-point conditional compare instructions -//===----------------------------------------------------------------------===// -// Contains: FCCMP, FCCMPE - -class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop> - : A64I_fpccmp<0b0, 0b0, type, op, - (outs), - (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; -} - -def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">; -def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">; -def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">; -def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">; - -//===----------------------------------------------------------------------===// -// Floating-point conditional select instructions -//===----------------------------------------------------------------------===// -// Contains: FCSEL - -let Uses = [NZCV] in { - def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), - (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), - "fcsel\t$Rd, $Rn, $Rm, $Cond", - [(set f32:$Rd, - (simple_select f32:$Rn, f32:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - - def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd), - (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond), - "fcsel\t$Rd, $Rn, $Rm, $Cond", - [(set f64:$Rd, - (simple_select f64:$Rn, f64:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; -} - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (1 source) -//===----------------------------------------------------------------------===// -// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI]. - -def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val), - [{ (void)N; return false; }]>; - -// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d" -// syntax. Default to no pattern because most are odd enough not to have one. -multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr, - SDPatternOperator opnode = FPNoUnop> { - def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), - !strconcat(asmstr, "\t$Rd, $Rn"), - [(set f32:$Rd, (opnode f32:$Rn))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn), - !strconcat(asmstr, "\t$Rd, $Rn"), - [(set f64:$Rd, (opnode f64:$Rn))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">; -defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>; -defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>; -let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { - defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; -} - -defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">; -defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>; -defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>; -defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>; -defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">; -defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>; -defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>; - -// The FCVT instrucitons have different source and destination register-types, -// but the fields are uniform everywhere a D-register (say) crops up. Package -// this information in a Record. -class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> { - RegisterClass Class = rc; - ValueType VT = vt; - bit t1 = fld{1}; - bit t0 = fld{0}; -} - -def FCVT16 : FCVTRegType<FPR16, 0b11, f16>; -def FCVT32 : FCVTRegType<FPR32, 0b00, f32>; -def FCVT64 : FCVTRegType<FPR64, 0b01, f64>; - -class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode> - : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0}, - {0,0,0,1, DestReg.t1, DestReg.t0}, - (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn), - "fcvt\t$Rd, $Rn", - [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>; -def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>; -def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>; -def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>; -def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>; -def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>; - - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (2 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL - -def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs), - [{ (void)N; return false; }]>; - -multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr, - SDPatternOperator opnode> { - def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode, - (outs FPR32:$Rd), - (ins FPR32:$Rn, FPR32:$Rm), - !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), - [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode, - (outs FPR64:$Rd), - (ins FPR64:$Rn, FPR64:$Rm), - !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), - [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; -} - -let isCommutable = 1 in { - let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; - } - defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>; - - // No patterns for these. - defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>; - defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>; - defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>; - defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>; - - let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", - PatFrag<(ops node:$lhs, node:$rhs), - (fneg (fmul node:$lhs, node:$rhs))> >; - } -} - -let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { - defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; -} -defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (3 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: FMADD, FMSUB, FNMADD, FNMSUB - -def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma (fneg node:$Rn), node:$Rm, node:$Ra)>; -def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; -def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>; - -class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT, - bits<2> type, bit o1, bit o0, SDPatternOperator fmakind> - : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd), - (ins FPR:$Rn, FPR:$Rm, FPR:$Ra), - !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"), - [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))], - NoItinerary>, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]>; - -def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>; -def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>; -def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>; -def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>; - -def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>; -def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>; -def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>; -def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; - -// Extra patterns for when we're allowed to optimise separate multiplication and -// addition. -let Predicates = [HasFPARMv8, UseFusedMAC] in { -def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), - (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; - -def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), - (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -} - - -//===----------------------------------------------------------------------===// -// Floating-point <-> fixed-point conversion instructions -//===----------------------------------------------------------------------===// -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF - -// #1-#32 allowed, encoded as "64 - <specified imm> -def fixedpos_asmoperand_i32 : AsmOperandClass { - let Name = "CVTFixedPos32"; - let RenderMethod = "addCVTFixedPosOperands"; - let PredicateMethod = "isCVTFixedPos<32>"; - let DiagnosticType = "CVTFixedPos32"; -} - -// Also encoded as "64 - <specified imm>" but #1-#64 allowed. -def fixedpos_asmoperand_i64 : AsmOperandClass { - let Name = "CVTFixedPos64"; - let RenderMethod = "addCVTFixedPosOperands"; - let PredicateMethod = "isCVTFixedPos<64>"; - let DiagnosticType = "CVTFixedPos64"; -} - -// We need the cartesian product of f32/f64 i32/i64 operands for -// conversions: -// + Selection needs to use operands of correct floating type -// + Assembly parsing and decoding depend on integer width -class cvtfix_i32_op<ValueType FloatVT> - : Operand<FloatVT>, - ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> { - let ParserMatchClass = fixedpos_asmoperand_i32; - let DecoderMethod = "DecodeCVT32FixedPosOperand"; - let PrintMethod = "printCVTFixedPosOperand"; -} - -class cvtfix_i64_op<ValueType FloatVT> - : Operand<FloatVT>, - ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> { - let ParserMatchClass = fixedpos_asmoperand_i64; - let PrintMethod = "printCVTFixedPosOperand"; -} - -// Because of the proliferation of weird operands, it's not really -// worth going for a multiclass here. Oh well. - -class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode, - RegisterClass GPR, RegisterClass FPR, - ValueType DstTy, ValueType SrcTy, - Operand scale_op, string asmop, SDNode cvtop> - : A64I_fpfixed<sf, 0b0, type, 0b11, opcode, - (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale), - !strconcat(asmop, "\t$Rd, $Rn, $Scale"), - [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32, - cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>; -def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32, - cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>; -def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32, - cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>; -def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32, - cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>; - -def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64, - cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>; -def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64, - cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>; -def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64, - cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>; -def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64, - cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>; - - -class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode, - RegisterClass FPR, RegisterClass GPR, - ValueType DstTy, ValueType SrcTy, - Operand scale_op, string asmop, SDNode cvtop> - : A64I_fpfixed<sf, 0b0, type, 0b00, opcode, - (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale), - !strconcat(asmop, "\t$Rd, $Rn, $Scale"), - [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32, - cvtfix_i32_op<f32>, "scvtf", sint_to_fp>; -def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64, - cvtfix_i64_op<f32>, "scvtf", sint_to_fp>; -def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32, - cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>; -def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64, - cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>; -def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32, - cvtfix_i32_op<f64>, "scvtf", sint_to_fp>; -def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64, - cvtfix_i64_op<f64>, "scvtf", sint_to_fp>; -def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32, - cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>; -def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64, - cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>; - -//===----------------------------------------------------------------------===// -// Floating-point <-> integer conversion instructions -//===----------------------------------------------------------------------===// -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF - -class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode, - RegisterClass DestPR, RegisterClass SrcPR, string asmop> - : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn), - !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> { - def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, - GPR32, FPR32, asmop # "s">; - def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, - GPR64, FPR32, asmop # "s">; - def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, - GPR32, FPR32, asmop # "u">; - def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, - GPR64, FPR32, asmop # "u">; - - def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, - GPR32, FPR64, asmop # "s">; - def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, - GPR64, FPR64, asmop # "s">; - def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, - GPR32, FPR64, asmop # "u">; - def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, - GPR64, FPR64, asmop # "u">; -} - -defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">; -defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">; -defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; -defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; -defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>; -def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>; -def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>; -def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>; -def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>; -def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>; -def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>; -def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>; -} - -multiclass A64I_inttofp<bit o0, string asmop> { - def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; - def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>; - def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>; - def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>; -} - -defm S : A64I_inttofp<0b0, "scvtf">; -defm U : A64I_inttofp<0b1, "ucvtf">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>; -def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>; -def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>; -def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>; -def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>; -def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>; -def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>; -def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>; -} - -def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; -def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; -def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; -def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>; -def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>; -def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>; -def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>; -} - -def lane1_asmoperand : AsmOperandClass { - let Name = "Lane1"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "Lane1"; -} - -def lane1 : Operand<i32> { - let ParserMatchClass = lane1_asmoperand; - let PrintMethod = "printBareImmOperand"; -} - -let DecoderMethod = "DecodeFMOVLaneInstruction" in { - def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, - (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), - "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111, - (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane), - "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -let Predicates = [HasFPARMv8] in { -def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", - (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; - -def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", - (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; -} - -//===----------------------------------------------------------------------===// -// Floating-point immediate instructions -//===----------------------------------------------------------------------===// -// Contains: FMOV - -def fpimm_asmoperand : AsmOperandClass { - let Name = "FMOVImm"; - let ParserMethod = "ParseFPImmOperand"; - let DiagnosticType = "FPImm"; -} - -// The MCOperand for these instructions are the encoded 8-bit values. -def SDXF_fpimm : SDNodeXForm<fpimm, [{ - uint32_t Imm8; - A64Imms::isFPImm(N->getValueAPF(), Imm8); - return CurDAG->getTargetConstant(Imm8, MVT::i32); -}]>; - -class fmov_operand<ValueType FT> - : Operand<i32>, - PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }], - SDXF_fpimm> { - let PrintMethod = "printFPImmOperand"; - let ParserMatchClass = fpimm_asmoperand; -} - -def fmov32_operand : fmov_operand<f32>; -def fmov64_operand : fmov_operand<f64>; - -class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT, - Operand fmov_operand> - : A64I_fpimm<0b0, 0b0, type, 0b00000, - (outs Reg:$Rd), - (ins fmov_operand:$Imm8), - "fmov\t$Rd, $Imm8", - [(set VT:$Rd, fmov_operand:$Imm8)], - NoItinerary>, - Sched<[WriteFPALU]>; - -def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>; -def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; - -//===----------------------------------------------------------------------===// -// Load-register (literal) instructions -//===----------------------------------------------------------------------===// -// Contains: LDR, LDRSW, PRFM - -def ldrlit_label_asmoperand : AsmOperandClass { - let Name = "LoadLitLabel"; - let RenderMethod = "addLabelOperands<19, 4>"; - let DiagnosticType = "Label"; -} - -def ldrlit_label : Operand<i64> { - let EncoderMethod = "getLoadLitLabelOpValue"; - - // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<19, 4>"; - let ParserMatchClass = ldrlit_label_asmoperand; - let OperandType = "OPERAND_PCREL"; -} - -// Various instructions take an immediate value (which can always be used), -// where some numbers have a symbolic name to make things easier. These operands -// and the associated functions abstract away the differences. -multiclass namedimm<string prefix, string mapper> { - def _asmoperand : AsmOperandClass { - let Name = "NamedImm" # prefix; - let PredicateMethod = "isUImm"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; - let DiagnosticType = "NamedImm_" # prefix; - } - - def _op : Operand<i32> { - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); - let PrintMethod = "printNamedImmOperand<" # mapper # ">"; - let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">"; - } -} - -defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">; - -class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg, - list<dag> patterns = []> - : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19), - "ldr\t$Rt, $Imm19", patterns, NoItinerary>, - Sched<[WriteLd]>; - -let mayLoad = 1 in { - def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; - def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; -} - -let Predicates = [HasFPARMv8] in { -def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>; -def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>; -} - -let mayLoad = 1 in { - let Predicates = [HasFPARMv8] in { - def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; - } - - def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, - (outs GPR64:$Rt), - (ins ldrlit_label:$Imm19), - "ldrsw\t$Rt, $Imm19", - [], NoItinerary>, - Sched<[WriteLd]>; - - def PRFM_lit : A64I_LDRlit<0b11, 0b0, - (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), - "prfm\t$Rt, $Imm19", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]>; -} - -//===----------------------------------------------------------------------===// -// Load-store exclusive instructions -//===----------------------------------------------------------------------===// -// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB, -// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB, -// STLRH, STLR, LDARB, LDARH, LDAR - -// Since these instructions have the undefined register bits set to 1 in -// their canonical form, we need a post encoder method to set those bits -// to 1 when encoding these instructions. We do this using the -// fixLoadStoreExclusive function. This function has template parameters: -// -// fixLoadStoreExclusive<int hasRs, int hasRt2> -// -// hasRs indicates that the instruction uses the Rs field, so we won't set -// it to 1 (and the same for Rt2). We don't need template parameters for -// the other register fiels since Rt and Rn are always used. - -// This operand parses a GPR64xsp register, followed by an optional immediate -// #0. -def GPR64xsp0_asmoperand : AsmOperandClass { - let Name = "GPR64xsp0"; - let PredicateMethod = "isWrappedReg"; - let RenderMethod = "addRegOperands"; - let ParserMethod = "ParseLSXAddressOperand"; - // Diagnostics are provided by ParserMethod -} - -def GPR64xsp0 : RegisterOperand<GPR64xsp> { - let ParserMatchClass = GPR64xsp0_asmoperand; -} - -//===---------------------------------- -// Store-exclusive (releasing & normal) -//===---------------------------------- - -class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, - dag ins, list<dag> pat, - InstrItinClass itin> : - A64I_LDSTex_stn <size, - opcode{2}, 0, opcode{1}, opcode{0}, - outs, ins, - !strconcat(asm, "\t$Rs, $Rt, [$Rn]"), - pat, itin> { - let mayStore = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; - let Constraints = "@earlyclobber $Rs"; -} - -multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> { - def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [],NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _word: A64I_SRexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, - (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; -} - -defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; -defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">; - -//===---------------------------------- -// Loads -//===---------------------------------- - -class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, - dag ins, list<dag> pat, - InstrItinClass itin> : - A64I_LDSTex_tn <size, - opcode{2}, 1, opcode{1}, opcode{0}, - outs, ins, - !strconcat(asm, "\t$Rt, [$Rn]"), - pat, itin> { - let mayLoad = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; -} - -multiclass A64I_LRex<string asmstr, bits<3> opcode> { - def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _word: A64I_LRexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _dword: A64I_LRexs_impl<0b11, opcode, asmstr, - (outs GPR64:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; -} - -defm LDXR : A64I_LRex<"ldxr", 0b000>; -defm LDAXR : A64I_LRex<"ldaxr", 0b001>; -defm LDAR : A64I_LRex<"ldar", 0b101>; - -class acquiring_load<PatFrag base> - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return Ordering == Acquire || Ordering == SequentiallyConsistent; -}]>; - -def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; -def atomic_load_acquire_16 : acquiring_load<atomic_load_16>; -def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; -def atomic_load_acquire_64 : acquiring_load<atomic_load_64>; - -def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>; -def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>; -def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>; -def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>; - -//===---------------------------------- -// Store-release (no exclusivity) -//===---------------------------------- - -class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, - dag ins, list<dag> pat, - InstrItinClass itin> : - A64I_LDSTex_tn <size, - opcode{2}, 0, opcode{1}, opcode{0}, - outs, ins, - !strconcat(asm, "\t$Rt, [$Rn]"), - pat, itin> { - let mayStore = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; -} - -class releasing_store<PatFrag base> - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return Ordering == Release || Ordering == SequentiallyConsistent; -}]>; - -def atomic_store_release_8 : releasing_store<atomic_store_8>; -def atomic_store_release_16 : releasing_store<atomic_store_16>; -def atomic_store_release_32 : releasing_store<atomic_store_32>; -def atomic_store_release_64 : releasing_store<atomic_store_64>; - -multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> { - def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_8 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_16 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _word: A64I_SLexs_impl<0b10, opcode, asmstr, - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_32 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, - (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_64 i64:$Rn, i64:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; -} - -defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; - -//===---------------------------------- -// Store-exclusive pair (releasing & normal) -//===---------------------------------- - -class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, - dag ins, list<dag> pat, - InstrItinClass itin> : - A64I_LDSTex_stt2n <size, - opcode{2}, 0, opcode{1}, opcode{0}, - outs, ins, - !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"), - pat, itin> { - let mayStore = 1; -} - - -multiclass A64I_SPex<string asmstr, bits<3> opcode> { - def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), - (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, - GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - - def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), - (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, - GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; -} - -defm STXP : A64I_SPex<"stxp", 0b010>; -defm STLXP : A64I_SPex<"stlxp", 0b011>; - -//===---------------------------------- -// Load-exclusive pair (acquiring & normal) -//===---------------------------------- - -class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, - dag ins, list<dag> pat, - InstrItinClass itin> : - A64I_LDSTex_tt2n <size, - opcode{2}, 1, opcode{1}, opcode{0}, - outs, ins, - !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"), - pat, itin>{ - let mayLoad = 1; - let DecoderMethod = "DecodeLoadPairExclusiveInstruction"; - let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; -} - -multiclass A64I_LPex<string asmstr, bits<3> opcode> { - def _word: A64I_LPexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rt, GPR32:$Rt2), - (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]>; - - def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, - (outs GPR64:$Rt, GPR64:$Rt2), - (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]>; -} - -defm LDXP : A64I_LPex<"ldxp", 0b010>; -defm LDAXP : A64I_LPex<"ldaxp", 0b011>; - -//===----------------------------------------------------------------------===// -// Load-store register (unscaled immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (register offset) instructions -//===----------------------------------------------------------------------===// -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (unsigned immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (immediate post-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (immediate pre-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW - -// Note that patterns are much later on in a completely separate section (they -// need ADRPxi to be defined). - -//===------------------------------- -// 1. Various operands needed -//===------------------------------- - -//===------------------------------- -// 1.1 Unsigned 12-bit immediate operands -//===------------------------------- -// The addressing mode for these instructions consists of an unsigned 12-bit -// immediate which is scaled by the size of the memory access. -// -// We represent this in the MC layer by two operands: -// 1. A base register. -// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]" -// would have '1' in this field. -// This means that separate functions are needed for converting representations -// which *are* aware of the intended access size. - -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to -// know the access size via some means. An isolated operand does not have this -// information unless told from here, which means we need separate tablegen -// Operands for each access size. This multiclass takes care of instantiating -// the correct template functions in the rest of the backend. - -//===------------------------------- -// 1.1 Unsigned 12-bit immediate operands -//===------------------------------- - -multiclass offsets_uimm12<int MemSize, string prefix> { - def uimm12_asmoperand : AsmOperandClass { - let Name = "OffsetUImm12_" # MemSize; - let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; - let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreUImm12_" # MemSize; - } - - // Pattern is really no more than an ImmLeaf, but predicated on MemSize which - // complicates things beyond TableGen's ken. - def uimm12 : Operand<i64>, - ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> { - let ParserMatchClass - = !cast<AsmOperandClass>(prefix # uimm12_asmoperand); - - let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">"; - let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">"; - } -} - -defm byte_ : offsets_uimm12<1, "byte_">; -defm hword_ : offsets_uimm12<2, "hword_">; -defm word_ : offsets_uimm12<4, "word_">; -defm dword_ : offsets_uimm12<8, "dword_">; -defm qword_ : offsets_uimm12<16, "qword_">; - -//===------------------------------- -// 1.1 Signed 9-bit immediate operands -//===------------------------------- - -// The MCInst is expected to store the bit-wise encoding of the value, -// which amounts to lopping off the extended sign bits. -def SDXF_simm9 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32); -}]>; - -def simm9_asmoperand : AsmOperandClass { - let Name = "SImm9"; - let PredicateMethod = "isSImm<9>"; - let RenderMethod = "addSImmOperands<9>"; - let DiagnosticType = "LoadStoreSImm9"; -} - -def simm9 : Operand<i64>, - ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }], - SDXF_simm9> { - let PrintMethod = "printOffsetSImm9Operand"; - let ParserMatchClass = simm9_asmoperand; -} - - -//===------------------------------- -// 1.3 Register offset extensions -//===------------------------------- - -// The assembly-syntax for these addressing-modes is: -// [<Xn|SP>, <R><m> {, <extend> {<amount>}}] -// -// The essential semantics are: -// + <amount> is a shift: #<log(transfer size)> or #0 -// + <R> can be W or X. -// + If <R> is W, <extend> can be UXTW or SXTW -// + If <R> is X, <extend> can be LSL or SXTX -// -// The trickiest of those constraints is that Rm can be either GPR32 or GPR64, -// which will need separate instructions for LLVM type-consistency. We'll also -// need separate operands, of course. -multiclass regexts<int MemSize, int RmSize, RegisterClass GPR, - string Rm, string prefix> { - def regext_asmoperand : AsmOperandClass { - let Name = "AddrRegExtend_" # MemSize # "_" # Rm; - let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">"; - let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize; - } - - def regext : Operand<i64> { - let PrintMethod - = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">"; - - let DecoderMethod = "DecodeAddrRegExtendOperand"; - let ParserMatchClass - = !cast<AsmOperandClass>(prefix # regext_asmoperand); - } -} - -multiclass regexts_wx<int MemSize, string prefix> { - // Rm is an X-register if LSL or SXTX are specified as the shift. - defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">; - - // Rm is a W-register if UXTW or SXTW are specified as the shift. - defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">; -} - -defm byte_ : regexts_wx<1, "byte_">; -defm hword_ : regexts_wx<2, "hword_">; -defm word_ : regexts_wx<4, "word_">; -defm dword_ : regexts_wx<8, "dword_">; -defm qword_ : regexts_wx<16, "qword_">; - - -//===------------------------------ -// 2. The instructions themselves. -//===------------------------------ - -// We have the following instructions to implement: -// | | B | H | W | X | -// |-----------------+-------+-------+-------+--------| -// | unsigned str | STRB | STRH | STR | STR | -// | unsigned ldr | LDRB | LDRH | LDR | LDR | -// | signed ldr to W | LDRSB | LDRSH | - | - | -// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) | - -// This will instantiate the LDR/STR instructions you'd expect to use for an -// unsigned datatype (first two rows above) or floating-point register, which is -// reasonably uniform across all access sizes. - - -//===------------------------------ -// 2.1 Regular instructions -//===------------------------------ - -// This class covers the basic unsigned or irrelevantly-signed loads and stores, -// to general-purpose and floating-point registers. - -class AddrParams<string prefix> { - Operand uimm12 = !cast<Operand>(prefix # "_uimm12"); - - Operand regextWm = !cast<Operand>(prefix # "_Wm_regext"); - Operand regextXm = !cast<Operand>(prefix # "_Xm_regext"); -} - -def byte_addrparams : AddrParams<"byte">; -def hword_addrparams : AddrParams<"hword">; -def word_addrparams : AddrParams<"word">; -def dword_addrparams : AddrParams<"dword">; -def qword_addrparams : AddrParams<"qword">; - -multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v, - bit high_opc, string asmsuffix, - RegisterClass GPR, AddrParams params> { - // Unsigned immediate - def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0}, - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12), - "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]> { - let mayStore = 1; - } - def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1}, - (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12), - "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - // Register offset (four of these: load/store and Wm/Xm). - let mayLoad = 1 in { - def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0, - (outs GPR:$Rt), - (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), - "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1, - (outs GPR:$Rt), - (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), - "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - } - def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]", - (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - let mayStore = 1 in { - def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0, - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm, - params.regextWm:$Ext), - "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - - def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1, - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm, - params.regextXm:$Ext), - "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - } - def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", - (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - // Unaligned immediate - def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0}, - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), - "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]> { - let mayStore = 1; - } - def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1}, - (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - // Post-indexed - def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0}, - (outs GPR64xsp:$Rn_wb), - (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), - "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]> { - let Constraints = "$Rn = $Rn_wb"; - let mayStore = 1; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1}, - (outs GPR:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - // Pre-indexed - def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0}, - (outs GPR64xsp:$Rn_wb), - (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), - "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]> { - let Constraints = "$Rn = $Rn_wb"; - let mayStore = 1; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1}, - (outs GPR:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - -} - -// STRB/LDRB: First define the instructions -defm LS8 - : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>; - -// STRH/LDRH -defm LS16 - : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>; - - -// STR/LDR to/from a W register -defm LS32 - : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>; - -// STR/LDR to/from an X register -defm LS64 - : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; - -let Predicates = [HasFPARMv8] in { -// STR/LDR to/from a B register -defm LSFP8 - : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; - -// STR/LDR to/from an H register -defm LSFP16 - : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>; - -// STR/LDR to/from an S register -defm LSFP32 - : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>; -// STR/LDR to/from a D register -defm LSFP64 - : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>; -// STR/LDR to/from a Q register -defm LSFP128 - : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, - qword_addrparams>; -} - -//===------------------------------ -// 2.3 Signed loads -//===------------------------------ - -// Byte and half-word signed loads can both go into either an X or a W register, -// so it's worth factoring out. Signed word loads don't fit because there is no -// W version. -multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params, - string prefix> { - // Unsigned offset - def w : A64I_LSunsigimm<size, 0b0, 0b11, - (outs GPR32:$Rt), - (ins GPR64xsp:$Rn, params.uimm12:$UImm12), - "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>; - - def x : A64I_LSunsigimm<size, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, params.uimm12:$UImm12), - "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>; - - // Register offset - let mayLoad = 1 in { - def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0, - (outs GPR32:$Rt), - (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), - "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1, - (outs GPR32:$Rt), - (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), - "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), - "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), - "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", - (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", - (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - - let mayLoad = 1 in { - // Unaligned offset - def w_U : A64I_LSunalimm<size, 0b0, 0b11, - (outs GPR32:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]>; - - def x_U : A64I_LSunalimm<size, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]>; - - - // Post-indexed - def w_PostInd : A64I_LSpostind<size, 0b0, 0b11, - (outs GPR32:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def x_PostInd : A64I_LSpostind<size, 0b0, 0b10, - (outs GPR64:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - // Pre-indexed - def w_PreInd : A64I_LSpreind<size, 0b0, 0b11, - (outs GPR32:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def x_PreInd : A64I_LSpreind<size, 0b0, 0b10, - (outs GPR64:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - } // let mayLoad = 1 -} - -// LDRSB -defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">; -// LDRSH -defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">; - -// LDRSW: load a 32-bit register, sign-extending to 64-bits. -def LDRSWx - : A64I_LSunsigimm<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, word_uimm12:$UImm12), - "ldrsw\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -let mayLoad = 1 in { - def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), - "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext), - "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; -} -def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]", - (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; - - -def LDURSWx - : A64I_LSunalimm<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldursw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -def LDRSWx_PostInd - : A64I_LSpostind<0b10, 0b0, 0b10, - (outs GPR64:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrsw\t$Rt, [$Rn], $SImm9", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; -} - -def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, - (outs GPR64:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrsw\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; -} - -//===------------------------------ -// 2.4 Prefetch operations -//===------------------------------ - -def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), - "prfm\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]> { - let mayLoad = 1; -} -def : InstAlias<"prfm $Rt, [$Rn]", - (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; - -let mayLoad = 1 in { - def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, - GPR32:$Rm, dword_Wm_regext:$Ext), - "prfm\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]>; - def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, dword_Xm_regext:$Ext), - "prfm\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]>; -} - -def : InstAlias<"prfm $Rt, [$Rn, $Rm]", - (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - -def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), - "prfum\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]> { - let mayLoad = 1; -} -def : InstAlias<"prfum $Rt, [$Rn]", - (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; - -//===----------------------------------------------------------------------===// -// Load-store register (unprivileged) instructions -//===----------------------------------------------------------------------===// -// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH - -// These instructions very much mirror the "unscaled immediate" loads, but since -// there are no floating-point variants we need to split them out into their own -// section to avoid instantiation of "ldtr d0, [sp]" etc. - -multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR, - string prefix> { - def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00, - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), - "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayStore = 1; - } - - def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01, - (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - - def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - -} - -// STTRB/LDTRB: First define the instructions -defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">; - -// STTRH/LDTRH -defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">; - -// STTR/LDTR to/from a W register -defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">; - -// STTR/LDTR to/from an X register -defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">; - -// Now a class for the signed instructions that can go to either 32 or 64 -// bits... -multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> { - let mayLoad = 1 in { - def w : A64I_LSunpriv<size, 0b0, 0b11, - (outs GPR32:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]>; - - def x : A64I_LSunpriv<size, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]>; - } - - def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>; - - def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", - (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -} - -// LDTRSB -defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">; -// LDTRSH -defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">; - -// And finally LDTRSW which only goes to 64 bits. -def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldtrsw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -//===----------------------------------------------------------------------===// -// Load-store register pair (offset) instructions -//===----------------------------------------------------------------------===// -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register pair (post-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STP, LDP, LDPSW -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register pair (pre-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STP, LDP, LDPSW -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store non-temporal register pair (offset) instructions -//===----------------------------------------------------------------------===// -// Contains: STNP, LDNP - - -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to -// know the access size via some means. An isolated operand does not have this -// information unless told from here, which means we need separate tablegen -// Operands for each access size. This multiclass takes care of instantiating -// the correct template functions in the rest of the backend. - -multiclass offsets_simm7<string MemSize, string prefix> { - // The bare signed 7-bit immediate is used in post-indexed instructions, but - // because of the scaling performed a generic "simm7" operand isn't - // appropriate here either. - def simm7_asmoperand : AsmOperandClass { - let Name = "SImm7_Scaled" # MemSize; - let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; - let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreSImm7_" # MemSize; - } - - def simm7 : Operand<i64> { - let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">"; - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand"); - } -} - -defm word_ : offsets_simm7<"4", "word_">; -defm dword_ : offsets_simm7<"8", "dword_">; -defm qword_ : offsets_simm7<"16", "qword_">; - -multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg, - Operand simm7, string prefix> { - def _STR : A64I_LSPoffset<opc, v, 0b0, (outs), - (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), - "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayStore = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"stp $Rt, $Rt2, [$Rn]", - (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _LDR : A64I_LSPoffset<opc, v, 0b1, - (outs SomeReg:$Rt, SomeReg:$Rt2), - (ins GPR64xsp:$Rn, simm7:$SImm7), - "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"ldp $Rt, $Rt2, [$Rn]", - (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0, - (outs GPR64xsp:$Rn_wb), - (ins SomeReg:$Rt, SomeReg:$Rt2, - GPR64xsp:$Rn, - simm7:$SImm7), - "stp\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let Constraints = "$Rn = $Rn_wb"; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1, - (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm7:$SImm7), - "ldp\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb), - (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), - "stp\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1, - (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm7:$SImm7), - "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs), - (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), - "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"stnp $Rt, $Rt2, [$Rn]", - (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1, - (outs SomeReg:$Rt, SomeReg:$Rt2), - (ins GPR64xsp:$Rn, simm7:$SImm7), - "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]", - (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - -} - - -defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; -defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; - -let Predicates = [HasFPARMv8] in { -defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; -defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; -defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, - "LSFPPair128">; -} - - -def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} -def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]", - (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>; - -def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} - -def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} - -//===----------------------------------------------------------------------===// -// Logical (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV - -multiclass logical_imm_operands<string prefix, string note, - int size, ValueType VT> { - def _asmoperand : AsmOperandClass { - let Name = "LogicalImm" # note # size; - let PredicateMethod = "isLogicalImm" # note # "<" # size # ">"; - let RenderMethod = "addLogicalImmOperands<" # size # ">"; - let DiagnosticType = "LogicalSecondSource"; - } - - def _operand - : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> { - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); - let PrintMethod = "printLogicalImmOperand<" # size # ">"; - let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">"; - } -} - -defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>; -defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>; - -// The mov versions only differ in assembly parsing, where they -// exclude values representable with either MOVZ or MOVN. -defm logical_imm32_mov - : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>; -defm logical_imm64_mov - : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>; - - -multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> { - def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd), - (ins GPR32:$Rn, logical_imm32_operand:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [(set i32:$Rd, - (opnode i32:$Rn, logical_imm32_operand:$Imm))], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd), - (ins GPR64:$Rn, logical_imm64_operand:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [(set i64:$Rd, - (opnode i64:$Rn, logical_imm64_operand:$Imm))], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; -} - -defm AND : A64I_logimmSizes<0b00, "and", and>; -defm ORR : A64I_logimmSizes<0b01, "orr", or>; -defm EOR : A64I_logimmSizes<0b10, "eor", xor>; - -let Defs = [NZCV] in { - def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), - (ins GPR32:$Rn, logical_imm32_operand:$Imm), - "ands\t$Rd, $Rn, $Imm", - [], NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd), - (ins GPR64:$Rn, logical_imm64_operand:$Imm), - "ands\t$Rd, $Rn, $Imm", - [], NoItinerary>, - Sched<[WriteALU, ReadALU]>; -} - -def : InstAlias<"tst $Rn, $Imm", - (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>; -def : InstAlias<"tst $Rn, $Imm", - (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>; -// FIXME: these sometimes are canonical. -def : InstAlias<"mov $Rd, $Imm", - (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm), 0>; -def : InstAlias<"mov $Rd, $Imm", - (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm), 0>; - -//===----------------------------------------------------------------------===// -// Logical (shifted register) instructions -//===----------------------------------------------------------------------===// -// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV - -// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS" -// behaves differently for unsigned comparisons, so we defensively only allow -// signed or n/a as the operand. In practice "unsigned greater than 0" is "not -// equal to 0" and LLVM gives us this. -def signed_cond : PatLeaf<(cond), [{ - return !isUnsignedIntSetCC(N->get()); -}]>; - - -// These instructions share their "shift" operands with add/sub (shifted -// register instructions). They are defined there. - -// N.b. the commutable parameter is just !N. It will be first against the wall -// when the revolution comes. -multiclass logical_shifts<string prefix, bit sf, bits<2> opc, - bit N, bit commutable, - string asmop, SDPatternOperator opfrag, ValueType ty, - RegisterClass GPR, list<Register> defs> { - let isCommutable = commutable, Defs = defs in { - def _lsl : A64I_logicalshift<sf, opc, 0b00, N, - (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _lsr : A64I_logicalshift<sf, opc, 0b01, N, - (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift<sf, opc, 0b10, N, - (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift<sf, opc, 0b11, N, - (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("ror_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm, - !cast<Operand>("ror_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift - : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), - (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn, - GPR:$Rm, 0)>; - - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable, - string asmop, SDPatternOperator opfrag, - list<Register> defs> { - defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N, - commutable, asmop, opfrag, i64, GPR64, defs>; - defm www : logical_shifts<prefix # "www", 0b0, opc, N, - commutable, asmop, opfrag, i32, GPR32, defs>; -} - - -defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>; -defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>; -defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>; -defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands", - PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), - [{ (void)N; return false; }]>, - [NZCV]>; - -defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic", - PatFrag<(ops node:$lhs, node:$rhs), - (and node:$lhs, (not node:$rhs))>, []>; -defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn", - PatFrag<(ops node:$lhs, node:$rhs), - (or node:$lhs, (not node:$rhs))>, []>; -defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon", - PatFrag<(ops node:$lhs, node:$rhs), - (xor node:$lhs, (not node:$rhs))>, []>; -defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics", - PatFrag<(ops node:$lhs, node:$rhs), - (and node:$lhs, (not node:$rhs)), - [{ (void)N; return false; }]>, - [NZCV]>; - -multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> { - let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in { - def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0, - (outs), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - - def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0, - (outs), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0, - (outs), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0, - (outs), - (ins GPR:$Rn, GPR:$Rm, - !cast<Operand>("ror_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm, - !cast<Operand>("ror_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift : InstAlias<"tst $Rn, $Rm", - (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond), - (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>; -defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>; - - -multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> { - let isCommutable = 0, Rn = 0b11111 in { - def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1, - (outs GPR:$Rd), - (ins GPR:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (shl ty:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - - def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1, - (outs GPR:$Rd), - (ins GPR:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (srl ty:$Rm, - !cast<Operand>("lsr_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1, - (outs GPR:$Rd), - (ins GPR:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (sra ty:$Rm, - !cast<Operand>("asr_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1, - (outs GPR:$Rd), - (ins GPR:$Rm, - !cast<Operand>("ror_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (rotr ty:$Rm, - !cast<Operand>("lsl_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift : InstAlias<"mvn $Rn, $Rm", - (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(not ty:$Rm), - (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>; -} - -defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>; -defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>; - -def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; -def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; - -//===----------------------------------------------------------------------===// -// Move wide (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: MOVN, MOVZ, MOVK + MOV aliases - -// A wide variety of different relocations are needed for variants of these -// instructions, so it turns out that we need a different operand for all of -// them. -multiclass movw_operands<string prefix, string instname, int width> { - def _imm_asmoperand : AsmOperandClass { - let Name = instname # width # "Shifted" # shift; - let PredicateMethod = "is" # instname # width # "Imm"; - let RenderMethod = "addMoveWideImmOperands"; - let ParserMethod = "ParseImmWithLSLOperand"; - let DiagnosticType = "MOVWUImm16"; - } - - def _imm : Operand<i64> { - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand"); - let PrintMethod = "printMoveWideImmOperand"; - let EncoderMethod = "getMoveWideImmOpValue"; - let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">"; - - let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); - } -} - -defm movn32 : movw_operands<"movn32", "MOVN", 32>; -defm movn64 : movw_operands<"movn64", "MOVN", 64>; -defm movz32 : movw_operands<"movz32", "MOVZ", 32>; -defm movz64 : movw_operands<"movz64", "MOVZ", 64>; -defm movk32 : movw_operands<"movk32", "MOVK", 32>; -defm movk64 : movw_operands<"movk64", "MOVK", 64>; - -multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit, - dag ins64bit> { - - def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, - !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary>, - Sched<[WriteALU]> { - bits<18> FullImm; - let UImm16 = FullImm{15-0}; - let Shift = FullImm{17-16}; - } - - def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, - !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary>, - Sched<[WriteALU]> { - bits<18> FullImm; - let UImm16 = FullImm{15-0}; - let Shift = FullImm{17-16}; - } -} - -let isMoveImm = 1, isReMaterializable = 1, - isAsCheapAsAMove = 1, hasSideEffects = 0 in { - defm MOVN : A64I_movwSizes<0b00, "movn", - (ins movn32_imm:$FullImm), - (ins movn64_imm:$FullImm)>; - - // Some relocations are able to convert between a MOVZ and a MOVN. If these - // are applied the instruction must be emitted with the corresponding bits as - // 0, which means a MOVZ needs to override that bit from the default. - let PostEncoderMethod = "fixMOVZ" in - defm MOVZ : A64I_movwSizes<0b10, "movz", - (ins movz32_imm:$FullImm), - (ins movz64_imm:$FullImm)>; -} - -let Constraints = "$src = $Rd", - SchedRW = [WriteALU, ReadALU] in -defm MOVK : A64I_movwSizes<0b11, "movk", - (ins GPR32:$src, movk32_imm:$FullImm), - (ins GPR64:$src, movk64_imm:$FullImm)>; - - -// And now the "MOV" aliases. These also need their own operands because what -// they accept is completely different to what the base instructions accept. -multiclass movalias_operand<string prefix, string basename, - string immpredicate, int width> { - def _asmoperand : AsmOperandClass { - let Name = basename # width # "MovAlias"; - let PredicateMethod - = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">"; - let RenderMethod - = "addMoveWideMovAliasOperands<" # width # ", " - # "A64Imms::" # immpredicate # ">"; - } - - def _movimm : Operand<i64> { - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); - - let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); - } -} - -defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>; -defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>; -defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>; -defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>; - -// FIXME: these are officially canonical aliases, but TableGen is too limited to -// print them at the moment. I believe in this case an "AliasPredicate" method -// will need to be implemented. to allow it, as well as the more generally -// useful handling of non-register, non-constant operands. -class movalias<Instruction INST, RegisterClass GPR, Operand operand> - : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm), 0>; - -def : movalias<MOVZwii, GPR32, movz32_movimm>; -def : movalias<MOVZxii, GPR64, movz64_movimm>; -def : movalias<MOVNwii, GPR32, movn32_movimm>; -def : movalias<MOVNxii, GPR64, movn64_movimm>; - -def movw_addressref_g0 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<0>">; -def movw_addressref_g1 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<1>">; -def movw_addressref_g2 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<2>">; -def movw_addressref_g3 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<3>">; - -def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2, - movw_addressref_g1:$G1, movw_addressref_g0:$G0), - (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3), - movw_addressref_g2:$G2), - movw_addressref_g1:$G1), - movw_addressref_g0:$G0)>; - -//===----------------------------------------------------------------------===// -// PC-relative addressing instructions -//===----------------------------------------------------------------------===// -// Contains: ADR, ADRP - -def adr_label : Operand<i64> { - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>"; - - // This label is a 21-bit offset from PC, unscaled - let PrintMethod = "printLabelOperand<21, 1>"; - let ParserMatchClass = label_asmoperand<21, 1>; - let OperandType = "OPERAND_PCREL"; -} - -def adrp_label_asmoperand : AsmOperandClass { - let Name = "AdrpLabel"; - let RenderMethod = "addLabelOperands<21, 4096>"; - let DiagnosticType = "Label"; -} - -def adrp_label : Operand<i64> { - let EncoderMethod = "getAdrpLabelOpValue"; - - // This label is a 21-bit offset from PC, scaled by the page-size: 4096. - let PrintMethod = "printLabelOperand<21, 4096>"; - let ParserMatchClass = adrp_label_asmoperand; - let OperandType = "OPERAND_PCREL"; -} - -let hasSideEffects = 0 in { - def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), - "adr\t$Rd, $Label", [], NoItinerary>, - Sched<[WriteALUs]>; - - def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label), - "adrp\t$Rd, $Label", [], NoItinerary>, - Sched<[WriteALUs]>; -} - -//===----------------------------------------------------------------------===// -// System instructions -//===----------------------------------------------------------------------===// -// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS -// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL - -// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values. -def uimm3_asmoperand : AsmOperandClass { - let Name = "UImm3"; - let PredicateMethod = "isUImm<3>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm3"; -} - -def uimm3 : Operand<i32> { - let ParserMatchClass = uimm3_asmoperand; -} - -// The HINT alias can accept a simple unsigned 7-bit immediate. -def uimm7_asmoperand : AsmOperandClass { - let Name = "UImm7"; - let PredicateMethod = "isUImm<7>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm7"; -} - -def uimm7 : Operand<i32> { - let ParserMatchClass = uimm7_asmoperand; -} - -// Multiclass namedimm is defined with the prefetch operands. Most of these fit -// into the NamedImmMapper scheme well: they either accept a named operand or -// any immediate under a particular value (which may be 0, implying no immediate -// is allowed). -defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">; -defm isb : namedimm<"isb", "A64ISB::ISBMapper">; -defm ic : namedimm<"ic", "A64IC::ICMapper">; -defm dc : namedimm<"dc", "A64DC::DCMapper">; -defm at : namedimm<"at", "A64AT::ATMapper">; -defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; - -// However, MRS and MSR are more complicated for a few reasons: -// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an -// implementation-defined effect -// * Most registers are shared, but some are read-only or write-only. -// * There is a variant of MSR which accepts the same register name (SPSel), -// but which would have a different encoding. - -// In principle these could be resolved in with more complicated subclasses of -// NamedImmMapper, however that imposes an overhead on other "named -// immediates". Both in concrete terms with virtual tables and in unnecessary -// abstraction. - -// The solution adopted here is to take the MRS/MSR Mappers out of the usual -// hierarchy (they're not derived from NamedImmMapper) and to add logic for -// their special situation. -def mrs_asmoperand : AsmOperandClass { - let Name = "MRS"; - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MRS"; -} - -def mrs_op : Operand<i32> { - let ParserMatchClass = mrs_asmoperand; - let PrintMethod = "printMRSOperand"; - let DecoderMethod = "DecodeMRSOperand"; -} - -def msr_asmoperand : AsmOperandClass { - let Name = "MSRWithReg"; - - // Note that SPSel is valid for both this and the pstate operands, but with - // different immediate encodings. This is why these operands provide a string - // AArch64Operand rather than an immediate. The overlap is small enough that - // it could be resolved with hackery now, but who can say in future? - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MSR"; -} - -def msr_op : Operand<i32> { - let ParserMatchClass = msr_asmoperand; - let PrintMethod = "printMSROperand"; - let DecoderMethod = "DecodeMSROperand"; -} - -def pstate_asmoperand : AsmOperandClass { - let Name = "MSRPState"; - // See comment above about parser. - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MSR"; -} - -def pstate_op : Operand<i32> { - let ParserMatchClass = pstate_asmoperand; - let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>"; - let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>"; -} - -// When <CRn> is specified, an assembler should accept something like "C4", not -// the usual "#4" immediate. -def CRx_asmoperand : AsmOperandClass { - let Name = "CRx"; - let PredicateMethod = "isUImm<4>"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "ParseCRxOperand"; - // Diagnostics are handled in all cases by ParseCRxOperand. -} - -def CRx : Operand<i32> { - let ParserMatchClass = CRx_asmoperand; - let PrintMethod = "printCRxOperand"; -} - - -// Finally, we can start defining the instructions. - -// HINT is straightforward, with a few aliases. -def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7", - [], NoItinerary> { - bits<7> UImm7; - let CRm = UImm7{6-3}; - let Op2 = UImm7{2-0}; - - let Op0 = 0b00; - let Op1 = 0b011; - let CRn = 0b0010; - let Rt = 0b11111; -} - -def : InstAlias<"nop", (HINTi 0)>; -def : InstAlias<"yield", (HINTi 1)>; -def : InstAlias<"wfe", (HINTi 2)>; -def : InstAlias<"wfi", (HINTi 3)>; -def : InstAlias<"sev", (HINTi 4)>; -def : InstAlias<"sevl", (HINTi 5)>; - -// Quite a few instructions then follow a similar pattern of fixing common -// fields in the bitpattern, we'll define a helper-class for them. -class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2, - Operand operand, string asmop> - : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"), - [], NoItinerary> { - let Op0 = op0; - let Op1 = op1; - let CRn = crn; - let Op2 = op2; - let Rt = 0b11111; -} - - -def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">; -def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">; -def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">; -def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">; - -def : InstAlias<"clrex", (CLREXi 0b1111)>; -def : InstAlias<"isb", (ISBi 0b1111)>; - -// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP -// configurations at least. -def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>; - -// Any SYS bitpattern can be represented with a complex and opaque "SYS" -// instruction. -def SYSiccix : A64I_system<0b0, (outs), - (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, - uimm3:$Op2, GPR64:$Rt), - "sys\t$Op1, $CRn, $CRm, $Op2, $Rt", - [], NoItinerary> { - let Op0 = 0b01; -} - -// You can skip the Xt argument whether it makes sense or not for the generic -// SYS instruction. -def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2", - (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>; - - -// But many have aliases, which obviously don't fit into -class SYSalias<dag ins, string asmstring> - : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> { - let isAsmParserOnly = 1; - - bits<14> SysOp; - let Op0 = 0b01; - let Op1 = SysOp{13-11}; - let CRn = SysOp{10-7}; - let CRm = SysOp{6-3}; - let Op2 = SysOp{2-0}; -} - -def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">; - -def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> { - let Rt = 0b11111; -} - -def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">; -def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">; - -def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">; - -def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> { - let Rt = 0b11111; -} - - -def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt), - (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2), - "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2", - [], NoItinerary> { - let Op0 = 0b01; -} - -// The instructions themselves are rather simple for MSR and MRS. -def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), - "msr\t$SysReg, $Rt", [], NoItinerary> { - bits<16> SysReg; - let Op0 = SysReg{15-14}; - let Op1 = SysReg{13-11}; - let CRn = SysReg{10-7}; - let CRm = SysReg{6-3}; - let Op2 = SysReg{2-0}; -} - -def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), - "mrs\t$Rt, $SysReg", [], NoItinerary> { - bits<16> SysReg; - let Op0 = SysReg{15-14}; - let Op1 = SysReg{13-11}; - let CRn = SysReg{10-7}; - let CRm = SysReg{6-3}; - let Op2 = SysReg{2-0}; -} - -def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), - "msr\t$PState, $CRm", [], NoItinerary> { - bits<6> PState; - - let Op0 = 0b00; - let Op1 = PState{5-3}; - let CRn = 0b0100; - let Op2 = PState{2-0}; - let Rt = 0b11111; -} - -//===----------------------------------------------------------------------===// -// Test & branch (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: TBZ, TBNZ - -// The bit to test is a simple unsigned 6-bit immediate in the X-register -// versions. -def uimm6 : Operand<i64> { - let ParserMatchClass = uimm6_asmoperand; -} - -def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>; - -def tbimm_target : Operand<OtherVT> { - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>"; - - // This label is a 14-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<14, 4>"; - let ParserMatchClass = label_wid14_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>; -def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>; - -// These instructions correspond to patterns involving "and" with a power of -// two, which we need to be able to select. -def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">; -def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">; - -let isBranch = 1, isTerminator = 1 in { - def TBZxii : A64I_TBimm<0b0, (outs), - (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), - "tbz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), - A64eq, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]>; - - def TBNZxii : A64I_TBimm<0b1, (outs), - (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), - "tbnz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), - A64ne, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]>; - - - // Note, these instructions overlap with the above 64-bit patterns. This is - // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both - // do the same thing and are both permitted assembly. They also both have - // sensible DAG patterns. - def TBZwii : A64I_TBimm<0b0, (outs), - (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), - "tbz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), - A64eq, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Imm{5} = 0b0; - } - - def TBNZwii : A64I_TBimm<0b1, (outs), - (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), - "tbnz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), - A64ne, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Imm{5} = 0b0; - } -} - -//===----------------------------------------------------------------------===// -// Unconditional branch (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: B, BL - -def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>; - -def bimm_target : Operand<OtherVT> { - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>"; - - // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<26, 4>"; - let ParserMatchClass = label_wid26_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -def blimm_target : Operand<i64> { - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>"; - - // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<26, 4>"; - let ParserMatchClass = label_wid26_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type> - : A64I_Bimm<op, (outs), (ins lbl_type:$Label), - !strconcat(asmop, "\t$Label"), patterns, - NoItinerary>, - Sched<[WriteBr]>; - -let isBranch = 1 in { - def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> { - let isTerminator = 1; - let isBarrier = 1; - } - - let SchedRW = [WriteBrL] in { - def BLimm : A64I_BimmImpl<0b1, "bl", - [(AArch64Call tglobaladdr:$Label)], blimm_target> { - let isCall = 1; - let Defs = [X30]; - } - } -} - -def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (register) instructions -//===----------------------------------------------------------------------===// -// Contains: BR, BLR, RET, ERET, DRP. - -// Most of the notional opcode fields in the A64I_Breg format are fixed in A64 -// at the moment. -class A64I_BregImpl<bits<4> opc, - dag outs, dag ins, string asmstr, list<dag> patterns, - InstrItinClass itin = NoItinerary> - : A64I_Breg<opc, 0b11111, 0b000000, 0b00000, - outs, ins, asmstr, patterns, itin>, - Sched<[WriteBr]> { - let isBranch = 1; - let isIndirectBranch = 1; -} - -// Note that these are not marked isCall or isReturn because as far as LLVM is -// concerned they're not. "ret" is just another jump unless it has been selected -// by LLVM as the function's return. - -let isBranch = 1 in { - def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn), - "br\t$Rn", [(brind i64:$Rn)]> { - let isBarrier = 1; - let isTerminator = 1; - } - - let SchedRW = [WriteBrL] in { - def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), - "blr\t$Rn", [(AArch64Call i64:$Rn)]> { - let isBarrier = 0; - let isCall = 1; - let Defs = [X30]; - } - } - - def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), - "ret\t$Rn", []> { - let isBarrier = 1; - let isTerminator = 1; - let isReturn = 1; - } - - // Create a separate pseudo-instruction for codegen to use so that we don't - // flag x30 as used in every function. It'll be restored before the RET by the - // epilogue if it's legitimately used. - def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> { - let isTerminator = 1; - let isBarrier = 1; - let isReturn = 1; - } - - def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> { - let Rn = 0b11111; - let isBarrier = 1; - let isTerminator = 1; - let isReturn = 1; - } - - def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> { - let Rn = 0b11111; - let isBarrier = 1; - } -} - -def RETAlias : InstAlias<"ret", (RETx X30)>; - - -//===----------------------------------------------------------------------===// -// Address generation patterns -//===----------------------------------------------------------------------===// - -// Primary method of address generation for the small/absolute memory model is -// an ADRP/ADR pair: -// ADRP x0, some_variable -// ADD x0, x0, #:lo12:some_variable -// -// The load/store elision of the ADD is accomplished when selecting -// addressing-modes. This just mops up the cases where that doesn't work and we -// really need an address in some register. - -// This wrapper applies a LO12 modifier to the address. Otherwise we could just -// use the same address. - -class ADRP_ADD<SDNode Wrapper, SDNode addrop> - : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)), - (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>; - -def : ADRP_ADD<A64WrapperSmall, tblockaddress>; -def : ADRP_ADD<A64WrapperSmall, texternalsym>; -def : ADRP_ADD<A64WrapperSmall, tglobaladdr>; -def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>; -def : ADRP_ADD<A64WrapperSmall, tjumptable>; -def : ADRP_ADD<A64WrapperSmall, tconstpool>; - -//===----------------------------------------------------------------------===// -// GOT access patterns -//===----------------------------------------------------------------------===// - -class GOTLoadSmall<SDNode addrfrag> - : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), - (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; - -def : GOTLoadSmall<texternalsym>; -def : GOTLoadSmall<tglobaladdr>; -def : GOTLoadSmall<tglobaltlsaddr>; - -//===----------------------------------------------------------------------===// -// Tail call handling -//===----------------------------------------------------------------------===// - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in { - def TC_RETURNdi - : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff), - [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>; - - def TC_RETURNxi - : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), - [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>; -} - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - Uses = [XSP] in { - def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [], - (Bimm bimm_target:$Label)>; - - def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [], - (BRx GPR64:$Rd)>; -} - - -def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), - (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>; - -//===----------------------------------------------------------------------===// -// Thread local storage -//===----------------------------------------------------------------------===// - -// This is a pseudo-instruction representing the ".tlsdesccall" directive in -// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the -// current location. It should always be immediately followed by a BLR -// instruction, and is intended solely for relaxation by the linker. - -def : Pat<(A64threadpointer), (MRSxi 0xde82)>; - -def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> { - let hasSideEffects = 1; -} - -def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var), - [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> { - let isCall = 1; - let Defs = [X30]; -} - -def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var), - (TLSDESC_BLRx $Rn, texternalsym:$Var)>; - -//===----------------------------------------------------------------------===// -// Bitfield patterns -//===----------------------------------------------------------------------===// - -def bfi32_lsb_to_immr : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64); -}]>; - -def bfi64_lsb_to_immr : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64); -}]>; - -def bfi_width_to_imms : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64); -}]>; - - -// The simpler patterns deal with cases where no AND mask is actually needed -// (either all bits are used or the low 32 bits are used). -let AddedComplexity = 10 in { - -def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS), - (BFIxxii $src, $Rn, - (bfi64_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS)))>; - -def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS), - (BFIwwii $src, $Rn, - (bfi32_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS)))>; - - -def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS), - (i64 4294967295)), - (SUBREG_TO_REG (i64 0), - (BFIwwii (EXTRACT_SUBREG $src, sub_32), - (EXTRACT_SUBREG $Rn, sub_32), - (bfi32_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS))), - sub_32)>; - -} - -//===----------------------------------------------------------------------===// -// Miscellaneous patterns -//===----------------------------------------------------------------------===// - -// Truncation from 64 to 32-bits just involves renaming your register. -def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>; - -// Similarly, extension where we don't care about the high bits is -// just a rename. -def : Pat<(i64 (anyext i32:$val)), - (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>; - -// SELECT instructions providing f128 types need to be handled by a -// pseudo-instruction since the eventual code will need to introduce basic -// blocks and control flow. -def F128CSEL : PseudoInst<(outs FPR128:$Rd), - (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond), - [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> { - let Uses = [NZCV]; - let usesCustomInserter = 1; -} - -//===----------------------------------------------------------------------===// -// Load/store patterns -//===----------------------------------------------------------------------===// - -// There are lots of patterns here, because we need to allow at least three -// parameters to vary independently. -// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ... -// 2. LLVM source: zextloadi8, anyextloadi8, ... -// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ... -// -// The biggest problem turns out to be the address-generation variable. At the -// point of instantiation we need to produce two DAGs, one for the pattern and -// one for the instruction. Doing this at the lowest level of classes doesn't -// work. -// -// Consider the simple uimm12 addressing mode, and the desire to match both (add -// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the -// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or -// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this -// operation, and PatFrags are for selection not output. -// -// As a result, the address-generation patterns are the final -// instantiations. However, we do still need to vary the operand for the address -// further down (At the point we're deciding A64WrapperSmall, we don't know -// the memory width of the operation). - -//===------------------------------ -// 1. Basic infrastructural defs -//===------------------------------ - -// First, some simple classes for !foreach and !subst to use: -class Decls { - dag pattern; -} - -def decls : Decls; -def ALIGN; -def INST; -def OFFSET; -def SHIFT; - -// You can't use !subst on an actual immediate, but you *can* use it on an -// operand record that happens to match a single immediate. So we do. -def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>; -def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>; -def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>; -def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>; -def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>; - -// If the low bits of a pointer are known to be 0 then an "or" is just as good -// as addition for computing an offset. This fragment forwards that check for -// TableGen's use. -def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), -[{ - return CurDAG->isBaseWithConstantOffset(SDValue(N, 0)); -}]>; - -// Load/store (unsigned immediate) operations with relocations against global -// symbols (for lo12) are only valid if those symbols have correct alignment -// (since the immediate offset is divided by the access scale, it can't have a -// remainder). -// -// The guaranteed alignment is provided as part of the WrapperSmall -// operation, and checked against one of these. -def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>; -def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>; -def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>; -def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>; -def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>; - -// "Normal" load/store instructions can be used on atomic operations, provided -// the ordering parameter is at most "monotonic". Anything above that needs -// special handling with acquire/release instructions. -class simple_load<PatFrag base> - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic; -}]>; - -def atomic_load_simple_i8 : simple_load<atomic_load_8>; -def atomic_load_simple_i16 : simple_load<atomic_load_16>; -def atomic_load_simple_i32 : simple_load<atomic_load_32>; -def atomic_load_simple_i64 : simple_load<atomic_load_64>; - -class simple_store<PatFrag base> - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic; -}]>; - -def atomic_store_simple_i8 : simple_store<atomic_store_8>; -def atomic_store_simple_i16 : simple_store<atomic_store_16>; -def atomic_store_simple_i32 : simple_store<atomic_store_32>; -def atomic_store_simple_i64 : simple_store<atomic_store_64>; - -//===------------------------------ -// 2. UImm12 and SImm9 -//===------------------------------ - -// These instructions have two operands providing the address so they can be -// treated similarly for most purposes. - -//===------------------------------ -// 2.1 Base patterns covering extend/truncate semantics -//===------------------------------ - -// Atomic patterns can be shared between integer operations of all sizes, a -// quick multiclass here allows reuse. -multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base, - dag Offset, dag address, ValueType transty, - ValueType sty> { - def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address), - (LOAD Base, Offset)>; - - def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt), - (STORE $Rt, Base, Offset)>; -} - -// Instructions accessing a memory chunk smaller than a register (or, in a -// pinch, the same size) have a characteristic set of patterns they want to -// match: extending loads and truncating stores. This class deals with the -// sign-neutral version of those patterns. -// -// It will be instantiated across multiple addressing-modes. -multiclass ls_small_pats<Instruction LOAD, Instruction STORE, - dag Base, dag Offset, - dag address, ValueType sty> - : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> { - def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>; - - def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>; - - // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit - // register was actually set. - def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; - - def : Pat<(i64 (!cast<SDNode>(extload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; - - def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address), - (STORE $Rt, Base, Offset)>; - - // For truncating store from 64-bits, we have to manually tell LLVM to - // ignore the high bits of the x register. - def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address), - (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>; -} - -// Next come patterns for sign-extending loads. -multiclass load_signed_pats<string T, string U, dag Base, dag Offset, - dag address, ValueType sty> { - def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)), - (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>; - - def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)), - (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>; - -} - -// and finally "natural-width" loads and stores come next. -multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base, - dag Offset, dag address, ValueType sty> { - def : Pat<(sty (load address)), (LOAD Base, Offset)>; - def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>; -} - -// Integer operations also get atomic instructions to select for. -multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base, - dag Offset, dag address, ValueType sty> - : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>, - ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>; - -//===------------------------------ -// 2.2. Addressing-mode instantiations -//===------------------------------ - -multiclass uimm12_pats<dag address, dag Base, dag Offset> { - defm : ls_small_pats<LS8_LDR, LS8_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, byte_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, byte_uimm12, - !subst(ALIGN, any_align, decls.pattern))), - i8>; - defm : ls_small_pats<LS16_LDR, LS16_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, hword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, hword_uimm12, - !subst(ALIGN, min_align2, decls.pattern))), - i16>; - defm : ls_small_pats<LS32_LDR, LS32_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, word_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, word_uimm12, - !subst(ALIGN, min_align4, decls.pattern))), - i32>; - - defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, word_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, word_uimm12, - !subst(ALIGN, min_align4, decls.pattern))), - i32>; - - defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, dword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, dword_uimm12, - !subst(ALIGN, min_align8, decls.pattern))), - i64>; - - defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, hword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, hword_uimm12, - !subst(ALIGN, min_align2, decls.pattern))), - f16>; - - defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, word_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, word_uimm12, - !subst(ALIGN, min_align4, decls.pattern))), - f32>; - - defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, dword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, dword_uimm12, - !subst(ALIGN, min_align8, decls.pattern))), - f64>; - - defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, qword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, qword_uimm12, - !subst(ALIGN, min_align16, decls.pattern))), - f128>; - - defm : load_signed_pats<"B", "", Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, byte_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, byte_uimm12, - !subst(ALIGN, any_align, decls.pattern))), - i8>; - - defm : load_signed_pats<"H", "", Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, hword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, hword_uimm12, - !subst(ALIGN, min_align2, decls.pattern))), - i16>; - - def : Pat<(sextloadi32 !foreach(decls.pattern, address, - !subst(OFFSET, word_uimm12, - !subst(ALIGN, min_align4, decls.pattern)))), - (LDRSWx Base, !foreach(decls.pattern, Offset, - !subst(OFFSET, word_uimm12, decls.pattern)))>; -} - -// Straightforward patterns of last resort: a pointer with or without an -// appropriate offset. -defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>; -defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12), - (i64 i64:$Rn), (i64 OFFSET:$UImm12)>; - -// The offset could be hidden behind an "or", of course: -defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12), - (i64 i64:$Rn), (i64 OFFSET:$UImm12)>; - -// Global addresses under the small-absolute model should use these -// instructions. There are ELF relocations specifically for it. -defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN), - (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>; - -defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, - ALIGN), - (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>; - -// External symbols that make it this far should also get standard relocations. -defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, - ALIGN), - (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; - -defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN), - (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; - -// We also want to use uimm12 instructions for local variables at the moment. -def tframeindex_XFORM : SDNodeXForm<frameindex, [{ - int FI = cast<FrameIndexSDNode>(N)->getIndex(); - return CurDAG->getTargetFrameIndex(FI, MVT::i64); -}]>; - -defm : uimm12_pats<(i64 frameindex:$Rn), - (tframeindex_XFORM tframeindex:$Rn), (i64 0)>; - -// These can be much simpler than uimm12 because we don't to change the operand -// type (e.g. LDURB and LDURH take the same operands). -multiclass simm9_pats<dag address, dag Base, dag Offset> { - defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>; - defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>; - - defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>; - defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>; - - defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>; - defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>; - defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>; - defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address, - f128>; - - def : Pat<(i64 (zextloadi32 address)), - (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>; - - def : Pat<(truncstorei32 i64:$Rt, address), - (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>; - - defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>; - defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>; - def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>; -} - -defm : simm9_pats<(add i64:$Rn, simm9:$SImm9), - (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>; - -defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9), - (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>; - - -//===------------------------------ -// 3. Register offset patterns -//===------------------------------ - -// Atomic patterns can be shared between integer operations of all sizes, a -// quick multiclass here allows reuse. -multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base, - dag Offset, dag Extend, dag address, - ValueType transty, ValueType sty> { - def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address), - (LOAD Base, Offset, Extend)>; - - def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt), - (STORE $Rt, Base, Offset, Extend)>; -} - -// The register offset instructions take three operands giving the instruction, -// and have an annoying split between instructions where Rm is 32-bit and -// 64-bit. So we need a special hierarchy to describe them. Other than that the -// same operations should be supported as for simm9 and uimm12 addressing. - -multiclass ro_small_pats<Instruction LOAD, Instruction STORE, - dag Base, dag Offset, dag Extend, - dag address, ValueType sty> - : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> { - def : Pat<(!cast<SDNode>(zextload # sty) address), - (LOAD Base, Offset, Extend)>; - - def : Pat<(!cast<SDNode>(extload # sty) address), - (LOAD Base, Offset, Extend)>; - - // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit - // register was actually set. - def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; - - def : Pat<(i64 (!cast<SDNode>(extload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; - - def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address), - (STORE $Rt, Base, Offset, Extend)>; - - // For truncating store from 64-bits, we have to manually tell LLVM to - // ignore the high bits of the x register. - def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address), - (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>; - -} - -// Next come patterns for sign-extending loads. -multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend, - dag address, ValueType sty> { - def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)), - (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset") - Base, Offset, Extend)>; - - def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)), - (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset") - Base, Offset, Extend)>; -} - -// and finally "natural-width" loads and stores come next. -multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE, - dag Base, dag Offset, dag Extend, dag address, - ValueType sty> { - def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>; - def : Pat<(store sty:$Rt, address), - (STORE $Rt, Base, Offset, Extend)>; -} - -multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE, - dag Base, dag Offset, dag Extend, dag address, - ValueType sty> - : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>, - ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>; - -multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset, - dag Extend> { - defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq0, decls.pattern)), - i8>; - defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - i16>; - defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - i32>; - - defm : ro_int_neutral_pats< - !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - i32>; - - defm : ro_int_neutral_pats< - !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq3, decls.pattern)), - i64>; - - defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - f16>; - - defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - f32>; - - defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq3, decls.pattern)), - f64>; - - defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"), - !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq4, decls.pattern)), - f128>; - - defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq0, decls.pattern)), - i8>; - - defm : ro_signed_pats<"H", Rm, Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - i16>; - - def : Pat<(sextloadi32 !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern))), - (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset") - Base, Offset, Extend)>; -} - - -// Finally we're in a position to tell LLVM exactly what addresses are reachable -// using register-offset instructions. Essentially a base plus a possibly -// extended, possibly shifted (by access size) offset. - -defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>; - -defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm), - (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>; - -defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)), - (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD (NEON) Support -// - -include "AArch64InstrNEON.td" diff --git a/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/llvm/lib/Target/AArch64/AArch64InstrNEON.td deleted file mode 100644 index 01a59a1a6a8..00000000000 --- a/llvm/lib/Target/AArch64/AArch64InstrNEON.td +++ /dev/null @@ -1,9474 +0,0 @@ -//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the AArch64 NEON instruction set. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// NEON-specific DAG Nodes. -//===----------------------------------------------------------------------===// - -// (outs Result), (ins Imm, OpCmode) -def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>; - -def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>; - -def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>; - -// (outs Result), (ins Imm) -def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1, - [SDTCisVec<0>, SDTCisVT<1, i32>]>>; - -// (outs Result), (ins LHS, RHS, CondCode) -def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; - -// (outs Result), (ins LHS, 0/0.0 constant, CondCode) -def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisVec<1>]>>; - -// (outs Result), (ins LHS, RHS) -def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; - -def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; -def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; -def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; - -def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>; -def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>; -def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>; -def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>; -def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>; -def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>; -def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>; - -def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; -def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>; -def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>; -def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>; -def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1, - [SDTCisVec<0>]>>; -def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>; -def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; - -//===----------------------------------------------------------------------===// -// Addressing-mode instantiations -//===----------------------------------------------------------------------===// - -multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> { -defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, dword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, dword_uimm12, - !subst(ALIGN, min_align8, decls.pattern))), - Ty>; -} - -multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> { -defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, qword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, qword_uimm12, - !subst(ALIGN, min_align16, decls.pattern))), - Ty>; -} - -multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> { - defm : ls_64_pats<address, Base, Offset, v8i8>; - defm : ls_64_pats<address, Base, Offset, v4i16>; - defm : ls_64_pats<address, Base, Offset, v2i32>; - defm : ls_64_pats<address, Base, Offset, v1i64>; - defm : ls_64_pats<address, Base, Offset, v2f32>; - defm : ls_64_pats<address, Base, Offset, v1f64>; - - defm : ls_128_pats<address, Base, Offset, v16i8>; - defm : ls_128_pats<address, Base, Offset, v8i16>; - defm : ls_128_pats<address, Base, Offset, v4i32>; - defm : ls_128_pats<address, Base, Offset, v2i64>; - defm : ls_128_pats<address, Base, Offset, v4f32>; - defm : ls_128_pats<address, Base, Offset, v2f64>; -} - -defm : uimm12_neon_pats<(A64WrapperSmall - tconstpool:$Hi, tconstpool:$Lo12, ALIGN), - (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; - -//===----------------------------------------------------------------------===// -// Multiclasses -//===----------------------------------------------------------------------===// - -multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode, - string asmop, SDPatternOperator opnode8B, - SDPatternOperator opnode16B, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8B : NeonI_3VSame<0b0, u, size, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _16B : NeonI_3VSame<0b1, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } - -} - -multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _4H : NeonI_3VSame<0b0, u, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h", - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _8H : NeonI_3VSame<0b1, u, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h", - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _2S : NeonI_3VSame<0b0, u, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _4S : NeonI_3VSame<0b1, u, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} -multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> - : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> { - let isCommutable = Commutable in { - def _8B : NeonI_3VSame<0b0, u, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _16B : NeonI_3VSame<0b1, u, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> - : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> { - let isCommutable = Commutable in { - def _2D : NeonI_3VSame<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", - [(set (v2i64 VPR128:$Rd), - (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types, -// but Result types can be integer or floating point types. -multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode, - string asmop, SDPatternOperator opnode, - ValueType ResTy2S, ValueType ResTy4S, - ValueType ResTy2D, bit Commutable = 0> { - let isCommutable = Commutable in { - def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", - [(set (ResTy2S VPR64:$Rd), - (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", - [(set (ResTy4S VPR128:$Rd), - (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", - [(set (ResTy2D VPR128:$Rd), - (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -//===----------------------------------------------------------------------===// -// Instruction Definitions -//===----------------------------------------------------------------------===// - -// Vector Arithmetic Instructions - -// Vector Add (Integer and Floating-Point) - -defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>; -defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, - v2f32, v4f32, v2f64, 1>; - -// Patterns to match add of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (add FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (add FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (add FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Sub (Integer and Floating-Point) - -defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>; -defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, - v2f32, v4f32, v2f64, 0>; - -// Patterns to match sub of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (sub FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (sub FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Multiply (Integer and Floating-Point) - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; -defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, - v2f32, v4f32, v2f64, 1>; -} - -// Patterns to match mul of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (MULvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (mul FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (MULvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (MULvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Multiply (Polynomial) - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", - int_arm_neon_vmulp, int_arm_neon_vmulp, 1>; -} - -// Vector Multiply-accumulate and Multiply-subtract (Integer) - -// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and -// two operands constraints. -class NeonI_3VSame_Constraint_impl<string asmop, string asmlane, - RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, - bits<5> opcode, SDPatternOperator opnode> - : NeonI_3VSame<q, u, size, opcode, - (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm), - asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane, - [(set (OpTy VPRC:$Rd), - (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (mul node:$Rn, node:$Rm))>; - -def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (sub node:$Ra, (mul node:$Rn, node:$Rm))>; - - -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { -def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8, - 0b0, 0b0, 0b00, 0b10010, Neon_mla>; -def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8, - 0b1, 0b0, 0b00, 0b10010, Neon_mla>; -def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16, - 0b0, 0b0, 0b01, 0b10010, Neon_mla>; -def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16, - 0b1, 0b0, 0b01, 0b10010, Neon_mla>; -def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32, - 0b0, 0b0, 0b10, 0b10010, Neon_mla>; -def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32, - 0b1, 0b0, 0b10, 0b10010, Neon_mla>; - -def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b00, 0b10010, Neon_mls>; -def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b00, 0b10010, Neon_mls>; -def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16, - 0b0, 0b1, 0b01, 0b10010, Neon_mls>; -def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16, - 0b1, 0b1, 0b01, 0b10010, Neon_mls>; -def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32, - 0b0, 0b1, 0b10, 0b10010, Neon_mls>; -def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32, - 0b1, 0b1, 0b10, 0b10010, Neon_mls>; -} - -// Vector Multiply-accumulate and Multiply-subtract (Floating Point) - -def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>; - -def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>; - -let Predicates = [HasNEON, UseFusedMAC], - SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { -def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32, - 0b0, 0b0, 0b00, 0b11001, Neon_fmla>; -def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32, - 0b1, 0b0, 0b00, 0b11001, Neon_fmla>; -def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64, - 0b1, 0b0, 0b01, 0b11001, Neon_fmla>; - -def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32, - 0b0, 0b0, 0b10, 0b11001, Neon_fmls>; -def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32, - 0b1, 0b0, 0b10, 0b11001, Neon_fmls>; -def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64, - 0b1, 0b0, 0b11, 0b11001, Neon_fmls>; -} - -// We're also allowed to match the fma instruction regardless of compile -// options. -def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)), - (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; -def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), - (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; -def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), - (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; - -def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)), - (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; -def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), - (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; -def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), - (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; - -// Vector Divide (Floating-Point) - -let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { -defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, - v2f32, v4f32, v2f64, 0>; -} - -// Vector Bitwise Operations - -// Vector Bitwise AND - -defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>; - -// Vector Bitwise Exclusive OR - -defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>; - -// Vector Bitwise OR - -defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>; - -// ORR disassembled as MOV if Vn==Vm - -// Vector Move - register -// Alias for ORR if Vn=Vm. -def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", - (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>; -def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", - (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>; - -// The MOVI instruction takes two immediate operands. The first is the -// immediate encoding, while the second is the cmode. A cmode of 14, or -// 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC. -def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>; -def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>; - -def Neon_not8B : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>; -def Neon_not16B : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>; - -def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm), - (or node:$Rn, (Neon_not8B node:$Rm))>; - -def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm), - (or node:$Rn, (Neon_not16B node:$Rm))>; - -def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm), - (and node:$Rn, (Neon_not8B node:$Rm))>; - -def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm), - (and node:$Rn, (Neon_not16B node:$Rm))>; - - -// Vector Bitwise OR NOT - register - -defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn", - Neon_orn8B, Neon_orn16B, 0>; - -// Vector Bitwise Bit Clear (AND NOT) - register - -defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic", - Neon_bic8B, Neon_bic16B, 0>; - -multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B, - SDPatternOperator opnode16B, - Instruction INST8B, - Instruction INST16B> { - def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; -} - -// Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN -defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>; -defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>; -defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>; -defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>; -defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>; - -// Vector Bitwise Select -def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b01, 0b00011, vselect>; - -def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b01, 0b00011, vselect>; - -multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode, - Instruction INST8B, - Instruction INST16B> { - // Disassociate type from instruction definition - def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2f64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4f32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - - // Allow to match BSL instruction pattern with non-constant operand - def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - - // Allow to match llvm.arm.* intrinsics. - def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src), - (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src), - (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src), - (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src), - (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src), - (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src), - (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src), - (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src), - (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src), - (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src), - (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src), - (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src), - (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; -} - -// Additional patterns for bitwise instruction BSL -defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>; - -def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm), - (vselect node:$src, node:$Rn, node:$Rm), - [{ (void)N; return false; }]>; - -// Vector Bitwise Insert if True - -def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>; -def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>; - -// Vector Bitwise Insert if False - -def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>; -def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>; - -// Vector Absolute Difference and Accumulate (Signed, Unsigned) - -def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>; -def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>; - -// Vector Absolute Difference and Accumulate (Unsigned) -def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b00, 0b01111, Neon_uaba>; -def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b00, 0b01111, Neon_uaba>; -def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16, - 0b0, 0b1, 0b01, 0b01111, Neon_uaba>; -def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16, - 0b1, 0b1, 0b01, 0b01111, Neon_uaba>; -def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32, - 0b0, 0b1, 0b10, 0b01111, Neon_uaba>; -def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32, - 0b1, 0b1, 0b10, 0b01111, Neon_uaba>; - -// Vector Absolute Difference and Accumulate (Signed) -def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8, - 0b0, 0b0, 0b00, 0b01111, Neon_saba>; -def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8, - 0b1, 0b0, 0b00, 0b01111, Neon_saba>; -def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16, - 0b0, 0b0, 0b01, 0b01111, Neon_saba>; -def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16, - 0b1, 0b0, 0b01, 0b01111, Neon_saba>; -def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32, - 0b0, 0b0, 0b10, 0b01111, Neon_saba>; -def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32, - 0b1, 0b0, 0b10, 0b01111, Neon_saba>; - - -// Vector Absolute Difference (Signed, Unsigned) -defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>; -defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>; - -// Vector Absolute Difference (Floating Point) -defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd", - int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>; - -// Vector Reciprocal Step (Floating Point) -defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps", - int_arm_neon_vrecps, - v2f32, v4f32, v2f64, 0>; - -// Vector Reciprocal Square Root Step (Floating Point) -defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", - int_arm_neon_vrsqrts, - v2f32, v4f32, v2f64, 0>; - -// Vector Comparisons - -def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETEQ)>; -def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETUGE)>; -def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETGE)>; -def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETUGT)>; -def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETGT)>; - -// NeonI_compare_aliases class: swaps register operands to implement -// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. -class NeonI_compare_aliases<string asmop, string asmlane, - Instruction inst, RegisterOperand VPRC> - : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane # - ", $Rm" # asmlane, - (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>; - -// Vector Comparisons (Integer) - -// Vector Compare Mask Equal (Integer) -let isCommutable =1 in { -defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>; -} - -// Vector Compare Mask Higher or Same (Unsigned Integer) -defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>; - -// Vector Compare Mask Greater Than or Equal (Integer) -defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>; - -// Vector Compare Mask Higher (Unsigned Integer) -defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>; - -// Vector Compare Mask Greater Than (Integer) -defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>; - -// Vector Compare Mask Bitwise Test (Integer) -defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>; - -// Vector Compare Mask Less or Same (Unsigned Integer) -// CMLS is alias for CMHS with operands reversed. -def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>; -def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>; -def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>; -def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>; -def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>; -def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>; -def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>; - -// Vector Compare Mask Less Than or Equal (Integer) -// CMLE is alias for CMGE with operands reversed. -def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>; -def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>; -def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>; -def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>; -def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>; -def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>; -def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>; - -// Vector Compare Mask Lower (Unsigned Integer) -// CMLO is alias for CMHI with operands reversed. -def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>; -def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>; -def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>; -def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>; -def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>; -def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>; -def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>; - -// Vector Compare Mask Less Than (Integer) -// CMLT is alias for CMGT with operands reversed. -def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>; -def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>; -def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>; -def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>; -def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>; -def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>; -def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>; - - -def neon_uimm0_asmoperand : AsmOperandClass -{ - let Name = "UImm0"; - let PredicateMethod = "isUImm<0>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> { - let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printNeonUImm0Operand"; - -} - -multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC> -{ - def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.8b, $Rn.8b, $Imm", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.16b, $Rn.16b, $Imm", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.4h, $Rn.4h, $Imm", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.8h, $Rn.8h, $Imm", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.2s, $Rn.2s, $Imm", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.4s, $Rn.4s, $Imm", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.2d, $Rn.2d, $Imm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -// Vector Compare Mask Equal to Zero (Integer) -defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>; - -// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer) -defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>; - -// Vector Compare Mask Greater Than Zero (Signed Integer) -defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>; - -// Vector Compare Mask Less Than or Equal To Zero (Signed Integer) -defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>; - -// Vector Compare Mask Less Than Zero (Signed Integer) -defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>; - -// Vector Comparisons (Floating Point) - -// Vector Compare Mask Equal (Floating Point) -let isCommutable =1 in { -defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq, - v2i32, v4i32, v2i64, 0>; -} - -// Vector Compare Mask Greater Than Or Equal (Floating Point) -defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge, - v2i32, v4i32, v2i64, 0>; - -// Vector Compare Mask Greater Than (Floating Point) -defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt, - v2i32, v4i32, v2i64, 0>; - -// Vector Compare Mask Less Than Or Equal (Floating Point) -// FCMLE is alias for FCMGE with operands reversed. -def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>; -def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>; -def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>; - -// Vector Compare Mask Less Than (Floating Point) -// FCMLT is alias for FCMGT with operands reversed. -def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>; -def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>; -def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>; - -def fpzero_izero_asmoperand : AsmOperandClass { - let Name = "FPZeroIZero"; - let ParserMethod = "ParseFPImm0AndImm0Operand"; - let DiagnosticType = "FPZero"; -} - -def fpzz32 : Operand<f32>, - ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> { - let ParserMatchClass = fpzero_izero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode, - string asmop, CondCode CC> -{ - def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.2s, $Rn.2s, $FPImm", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.4s, $Rn.4s, $FPImm", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.2d, $Rn.2d, $FPImm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -// Vector Compare Mask Equal to Zero (Floating Point) -defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>; - -// Vector Compare Mask Greater Than or Equal to Zero (Floating Point) -defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>; - -// Vector Compare Mask Greater Than Zero (Floating Point) -defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>; - -// Vector Compare Mask Less Than or Equal To Zero (Floating Point) -defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>; - -// Vector Compare Mask Less Than Zero (Floating Point) -defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>; - -// Vector Absolute Comparisons (Floating Point) - -// Vector Absolute Compare Mask Greater Than Or Equal (Floating Point) -defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge", - int_arm_neon_vacge, - v2i32, v4i32, v2i64, 0>; - -// Vector Absolute Compare Mask Greater Than (Floating Point) -defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt", - int_arm_neon_vacgt, - v2i32, v4i32, v2i64, 0>; - -// Vector Absolute Compare Mask Less Than Or Equal (Floating Point) -// FACLE is alias for FACGE with operands reversed. -def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>; -def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>; -def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>; - -// Vector Absolute Compare Mask Less Than (Floating Point) -// FACLT is alias for FACGT with operands reversed. -def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>; -def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>; -def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>; - -// Vector halving add (Integer Signed, Unsigned) -defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd", - int_arm_neon_vhadds, 1>; -defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd", - int_arm_neon_vhaddu, 1>; - -// Vector halving sub (Integer Signed, Unsigned) -defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub", - int_arm_neon_vhsubs, 0>; -defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub", - int_arm_neon_vhsubu, 0>; - -// Vector rouding halving add (Integer Signed, Unsigned) -defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd", - int_arm_neon_vrhadds, 1>; -defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd", - int_arm_neon_vrhaddu, 1>; - -// Vector Saturating add (Integer Signed, Unsigned) -defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd", - int_arm_neon_vqadds, 1>; -defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd", - int_arm_neon_vqaddu, 1>; - -// Vector Saturating sub (Integer Signed, Unsigned) -defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub", - int_arm_neon_vqsubs, 1>; -defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub", - int_arm_neon_vqsubu, 1>; - -// Vector Shift Left (Signed and Unsigned Integer) -defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl", - int_arm_neon_vshifts, 1>; -defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl", - int_arm_neon_vshiftu, 1>; - -// Vector Saturating Shift Left (Signed and Unsigned Integer) -defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl", - int_arm_neon_vqshifts, 1>; -defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl", - int_arm_neon_vqshiftu, 1>; - -// Vector Rouding Shift Left (Signed and Unsigned Integer) -defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl", - int_arm_neon_vrshifts, 1>; -defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl", - int_arm_neon_vrshiftu, 1>; - -// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer) -defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl", - int_arm_neon_vqrshifts, 1>; -defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl", - int_arm_neon_vqrshiftu, 1>; - -// Vector Maximum (Signed and Unsigned Integer) -defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>; -defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>; - -// Vector Minimum (Signed and Unsigned Integer) -defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>; -defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>; - -// Vector Maximum (Floating Point) -defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax", - int_arm_neon_vmaxs, - v2f32, v4f32, v2f64, 1>; - -// Vector Minimum (Floating Point) -defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin", - int_arm_neon_vmins, - v2f32, v4f32, v2f64, 1>; - -// Vector maxNum (Floating Point) - prefer a number over a quiet NaN) -defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm", - int_aarch64_neon_vmaxnm, - v2f32, v4f32, v2f64, 1>; - -// Vector minNum (Floating Point) - prefer a number over a quiet NaN) -defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm", - int_aarch64_neon_vminnm, - v2f32, v4f32, v2f64, 1>; - -// Vector Maximum Pairwise (Signed and Unsigned Integer) -defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>; -defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>; - -// Vector Minimum Pairwise (Signed and Unsigned Integer) -defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>; -defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>; - -// Vector Maximum Pairwise (Floating Point) -defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp", - int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>; - -// Vector Minimum Pairwise (Floating Point) -defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp", - int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>; - -// Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN) -defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp", - int_aarch64_neon_vpmaxnm, - v2f32, v4f32, v2f64, 1>; - -// Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN) -defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp", - int_aarch64_neon_vpminnm, - v2f32, v4f32, v2f64, 1>; - -// Vector Addition Pairwise (Integer) -defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>; - -// Vector Addition Pairwise (Floating Point) -defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", - int_arm_neon_vpadd, - v2f32, v4f32, v2f64, 1>; - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -// Vector Saturating Doubling Multiply High -defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh", - int_arm_neon_vqdmulh, 1>; - -// Vector Saturating Rouding Doubling Multiply High -defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", - int_arm_neon_vqrdmulh, 1>; - -// Vector Multiply Extended (Floating Point) -defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", - int_aarch64_neon_vmulx, - v2f32, v4f32, v2f64, 1>; -} - -// Patterns to match llvm.aarch64.* intrinsic for -// ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output -class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST> - : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))), - (EXTRACT_SUBREG - (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))), - sub_32)>; - -def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>; -def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>; -def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>; -def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>; -def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>; - -// Vector Immediate Instructions - -multiclass neon_mov_imm_shift_asmoperands<string PREFIX> -{ - def _asmoperand : AsmOperandClass - { - let Name = "NeonMovImmShift" # PREFIX; - let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands"; - let PredicateMethod = "isNeonMovImmShift" # PREFIX; - } -} - -// Definition of vector immediates shift operands - -// The selectable use-cases extract the shift operation -// information from the OpCmode fields encoded in the immediate. -def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{ - uint64_t OpCmode = N->getZExtValue(); - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); - if (!HasShift) return SDValue(); - return CurDAG->getTargetConstant(ShiftImm, MVT::i32); -}]>; - -// Vector immediates shift operands which accept LSL and MSL -// shift operators with shift value in the range of 0, 8, 16, 24 (LSL), -// or 0, 8 (LSLH) or 8, 16 (MSL). -defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">; -defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">; -// LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24 -defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">; - -multiclass neon_mov_imm_shift_operands<string PREFIX, - string HALF, string ISHALF, code pred> -{ - def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM> - { - let PrintMethod = - "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">"; - let DecoderMethod = - "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">"; - let ParserMatchClass = - !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand"); - } -} - -defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && !ShiftOnesIn); -}]>; - -defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && ShiftOnesIn); -}]>; - -defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && !ShiftOnesIn); -}]>; - -def neon_uimm1_asmoperand : AsmOperandClass -{ - let Name = "UImm1"; - let PredicateMethod = "isUImm<1>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm2_asmoperand : AsmOperandClass -{ - let Name = "UImm2"; - let PredicateMethod = "isUImm<2>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm8_asmoperand : AsmOperandClass -{ - let Name = "UImm8"; - let PredicateMethod = "isUImm<8>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> { - let ParserMatchClass = neon_uimm8_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -def neon_uimm64_mask_asmoperand : AsmOperandClass -{ - let Name = "NeonUImm64Mask"; - let PredicateMethod = "isNeonUImm64Mask"; - let RenderMethod = "addNeonUImm64MaskOperands"; -} - -// MCOperand for 64-bit bytemask with each byte having only the -// value 0x00 and 0xff is encoded as an unsigned 8-bit value -def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> { - let ParserMatchClass = neon_uimm64_mask_asmoperand; - let PrintMethod = "printNeonUImm64MaskOperand"; -} - -multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op, - SDPatternOperator opnode> -{ - // shift zeros, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (timm:$Imm), - (neon_mov_imm_LSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (timm:$Imm), - (neon_mov_imm_LSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; - } - - // shift zeros, per halfword - def _4H : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (timm:$Imm), - (neon_mov_imm_LSLH_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b0}; - } - - def _8H : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (timm:$Imm), - (neon_mov_imm_LSLH_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b0}; - } -} - -multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op, - SDPatternOperator opnode, - SDPatternOperator neonopnode> -{ - let Constraints = "$src = $Rd" in { - // shift zeros, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (v2i32 VPR64:$src), - (v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (v4i32 VPR128:$src), - (v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; - } - - // shift zeros, per halfword - def _4H : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (v4i16 VPR64:$src), - (v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b1}; - } - - def _8H : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (v8i16 VPR128:$src), - (v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b1}; - } - } -} - -multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op, - SDPatternOperator opnode> -{ - // shift ones, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_MSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (timm:$Imm), - (neon_mov_imm_MSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b1, 0b0, Simm}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_MSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (timm:$Imm), - (neon_mov_imm_MSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b1, 0b0, Simm}; - } -} - -// Vector Move Immediate Shifted -let isReMaterializable = 1 in { -defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>; -} - -// Vector Move Inverted Immediate Shifted -let isReMaterializable = 1 in { -defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>; -} - -// Vector Bitwise Bit Clear (AND NOT) - immediate -let isReMaterializable = 1 in { -defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1, - and, Neon_mvni>; -} - -// Vector Bitwise OR - immedidate - -let isReMaterializable = 1 in { -defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0, - or, Neon_movi>; -} - -// Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate -// LowerBUILD_VECTOR favors lowering MOVI over MVNI. -// BIC immediate instructions selection requires additional patterns to -// transform Neon_movi operands into BIC immediate operands - -def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{ - uint64_t OpCmode = N->getZExtValue(); - unsigned ShiftImm; - unsigned ShiftOnesIn; - (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); - // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1 - // Transform encoded shift amount 0 to 1 and 1 to 0. - return CurDAG->getTargetConstant(!ShiftImm, MVT::i32); -}]>; - -def neon_mov_imm_LSLH_transform_operand - : ImmLeaf<i32, [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && !ShiftOnesIn); }], - neon_mov_imm_LSLH_transform_XFORM>; - -// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0xff, LSL 8) -// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0xff) -def : Pat<(v4i16 (and VPR64:$src, - (v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm)))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0xff, LSL 8) -// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0xff) -def : Pat<(v8i16 (and VPR128:$src, - (v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm)))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -def : Pat<(v8i8 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v2i32 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v1i64 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -def : Pat<(v16i8 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v4i32 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v2i64 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode, - SDPatternOperator neonopnode, - Instruction INST4H, - Instruction INST8H, - Instruction INST2S, - Instruction INST4S> { - def : Pat<(v8i8 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i32 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v1i64 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v16i8 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v4i32 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i64 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v8i8 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v4i16 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v1i64 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v16i8 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v8i16 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i64 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; -} - -// Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate -defm : Neon_bitwiseVi_patterns<and, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H, - BICvi_lsl_2S, BICvi_lsl_4S>; - -// Additional patterns for Vector Bitwise OR - immedidate -defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H, - ORRvi_lsl_2S, ORRvi_lsl_4S>; - - -// Vector Move Immediate Masked -let isReMaterializable = 1 in { -defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>; -} - -// Vector Move Inverted Immediate Masked -let isReMaterializable = 1 in { -defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>; -} - -class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane, - Instruction inst, RegisterOperand VPRC> - : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"), - (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>; - -// Aliases for Vector Move Immediate Shifted -def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>; - -// Aliases for Vector Move Inverted Immediate Shifted -def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>; - -// Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate -def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>; - -// Aliases for Vector Bitwise OR - immedidate -def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>; - -// Vector Move Immediate - per byte -let isReMaterializable = 1 in { -def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0, - (outs VPR64:$Rd), (ins neon_uimm8:$Imm), - "movi\t$Rd.8b, $Imm", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} - -def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0, - (outs VPR128:$Rd), (ins neon_uimm8:$Imm), - "movi\t$Rd.16b, $Imm", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Move Immediate - bytemask, per double word -let isReMaterializable = 1 in { -def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1, - (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm), - "movi\t $Rd.2d, $Imm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Move Immediate - bytemask, one doubleword - -let isReMaterializable = 1 in { -def MOVIdi : NeonI_1VModImm<0b0, 0b1, - (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm), - "movi\t $Rd, $Imm", - [(set (v1i64 FPR64:$Rd), - (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Floating Point Move Immediate - -class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy, - Operand immOpType, bit q, bit op> - : NeonI_1VModImm<q, op, - (outs VPRC:$Rd), (ins immOpType:$Imm), - "fmov\t$Rd" # asmlane # ", $Imm", - [(set (OpTy VPRC:$Rd), - (OpTy (Neon_fmovi (timm:$Imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1111; - } - -let isReMaterializable = 1 in { -def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>; -def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; -def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; -} - -// Vector Shift (Immediate) - -// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded -// as follows: -// -// Offset Encoding -// 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0> -// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0> -// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0> -// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0> -// -// The shift right immediate amount, in the range 1 to element bits, is computed -// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0 -// to element bits - 1, is computed as UInt(immh:immb) - Offset. - -class shr_imm_asmoperands<string OFFSET> : AsmOperandClass { - let Name = "ShrImm" # OFFSET; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "ShrImm" # OFFSET; -} - -class shr_imm<string OFFSET> : Operand<i32> { - let EncoderMethod = "getShiftRightImm" # OFFSET; - let DecoderMethod = "DecodeShiftRightImm" # OFFSET; - let ParserMatchClass = - !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand"); -} - -def shr_imm8_asmoperand : shr_imm_asmoperands<"8">; -def shr_imm16_asmoperand : shr_imm_asmoperands<"16">; -def shr_imm32_asmoperand : shr_imm_asmoperands<"32">; -def shr_imm64_asmoperand : shr_imm_asmoperands<"64">; - -def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>; -def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>; -def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>; -def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>; - -class shl_imm_asmoperands<string OFFSET> : AsmOperandClass { - let Name = "ShlImm" # OFFSET; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "ShlImm" # OFFSET; -} - -class shl_imm<string OFFSET> : Operand<i32> { - let EncoderMethod = "getShiftLeftImm" # OFFSET; - let DecoderMethod = "DecodeShiftLeftImm" # OFFSET; - let ParserMatchClass = - !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand"); -} - -def shl_imm8_asmoperand : shl_imm_asmoperands<"8">; -def shl_imm16_asmoperand : shl_imm_asmoperands<"16">; -def shl_imm32_asmoperand : shl_imm_asmoperands<"32">; -def shl_imm64_asmoperand : shl_imm_asmoperands<"64">; - -def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>; -def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>; -def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>; -def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>; - -class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> - : NeonI_2VShiftImm<q, u, opcode, - (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", - [(set (Ty VPRC:$Rd), - (Ty (OpNode (Ty VPRC:$Rn), - (Ty (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> { - // 64-bit vector types. - def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // 128-bit vector types. - def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> { - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - } -} - -multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> { - def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Shift left - -defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; - -// Additional patterns to match vector shift left by immediate. -// (v1i8/v1i16/v1i32 types) -def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shl_imm8:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shl_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shl_imm16:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shl_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shl_imm32:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shl_imm32:$Imm), - sub_32)>; - -// Shift right -defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>; -defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>; - -// Additional patterns to match vector shift right by immediate. -// (v1i8/v1i16/v1i32 types) -def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shr_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shr_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shr_imm32:$Imm), - sub_32)>; -def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shr_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shr_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shr_imm32:$Imm), - sub_32)>; - -def Neon_High16B : PatFrag<(ops node:$in), - (extract_subvector (v16i8 node:$in), (iPTR 8))>; -def Neon_High8H : PatFrag<(ops node:$in), - (extract_subvector (v8i16 node:$in), (iPTR 4))>; -def Neon_High4S : PatFrag<(ops node:$in), - (extract_subvector (v4i32 node:$in), (iPTR 2))>; -def Neon_High2D : PatFrag<(ops node:$in), - (extract_subvector (v2i64 node:$in), (iPTR 1))>; -def Neon_High4float : PatFrag<(ops node:$in), - (extract_subvector (v4f32 node:$in), (iPTR 2))>; -def Neon_High2double : PatFrag<(ops node:$in), - (extract_subvector (v2f64 node:$in), (iPTR 1))>; - -def Neon_Low16B : PatFrag<(ops node:$in), - (v8i8 (extract_subvector (v16i8 node:$in), - (iPTR 0)))>; -def Neon_Low8H : PatFrag<(ops node:$in), - (v4i16 (extract_subvector (v8i16 node:$in), - (iPTR 0)))>; -def Neon_Low4S : PatFrag<(ops node:$in), - (v2i32 (extract_subvector (v4i32 node:$in), - (iPTR 0)))>; -def Neon_Low2D : PatFrag<(ops node:$in), - (v1i64 (extract_subvector (v2i64 node:$in), - (iPTR 0)))>; -def Neon_Low4float : PatFrag<(ops node:$in), - (v2f32 (extract_subvector (v4f32 node:$in), - (iPTR 0)))>; -def Neon_Low2double : PatFrag<(ops node:$in), - (v1f64 (extract_subvector (v2f64 node:$in), - (iPTR 0)))>; - -class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT, - string SrcT, ValueType DestTy, ValueType SrcTy, - Operand ImmTy, SDPatternOperator ExtOp> - : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), - (ins VPR64:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", - [(set (DestTy VPR128:$Rd), - (DestTy (shl - (DestTy (ExtOp (SrcTy VPR64:$Rn))), - (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT, - string SrcT, ValueType DestTy, ValueType SrcTy, - int StartIndex, Operand ImmTy, - SDPatternOperator ExtOp, PatFrag getTop> - : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), - (ins VPR128:$Rn, ImmTy:$Imm), - asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", - [(set (DestTy VPR128:$Rd), - (DestTy (shl - (DestTy (ExtOp - (SrcTy (getTop VPR128:$Rn)))), - (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop, - SDNode ExtOp> { - // 64-bit vector types. - def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, - shl_imm8, ExtOp> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, - shl_imm16, ExtOp> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, - shl_imm32, ExtOp> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // 128-bit vector types - def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8, - 8, shl_imm8, ExtOp, Neon_High16B> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16, - 4, shl_imm16, ExtOp, Neon_High8H> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32, - 2, shl_imm32, ExtOp, Neon_High4S> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // Use other patterns to match when the immediate is 0. - def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))), - (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>; - - def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))), - (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>; - - def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))), - (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>; - - def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))), - (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>; - - def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))), - (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>; - - def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))), - (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>; -} - -// Shift left long -defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; -defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; - -class NeonI_ext_len_alias<string asmop, string lane, string laneOp, - Instruction inst, RegisterOperand VPRC, - RegisterOperand VPRCOp> - : NeonInstAlias<asmop # "\t$Rd" # lane #", $Rn" # laneOp, - (inst VPRC:$Rd, VPRCOp:$Rn, 0), 0b0>; - -// Signed integer lengthen (vector) is alias for SSHLL Vd, Vn, #0 -// Signed integer lengthen (vector, second part) is alias for SSHLL2 Vd, Vn, #0 -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -def SXTLvv_8B : NeonI_ext_len_alias<"sxtl", ".8h", ".8b", SSHLLvvi_8B, VPR128, VPR64>; -def SXTLvv_4H : NeonI_ext_len_alias<"sxtl", ".4s", ".4h", SSHLLvvi_4H, VPR128, VPR64>; -def SXTLvv_2S : NeonI_ext_len_alias<"sxtl", ".2d", ".2s", SSHLLvvi_2S, VPR128, VPR64>; -def SXTL2vv_16B : NeonI_ext_len_alias<"sxtl2", ".8h", ".16b", SSHLLvvi_16B, VPR128, VPR128>; -def SXTL2vv_8H : NeonI_ext_len_alias<"sxtl2", ".4s", ".8h", SSHLLvvi_8H, VPR128, VPR128>; -def SXTL2vv_4S : NeonI_ext_len_alias<"sxtl2", ".2d", ".4s", SSHLLvvi_4S, VPR128, VPR128>; - -// Unsigned integer lengthen (vector) is alias for USHLL Vd, Vn, #0 -// Unsigned integer lengthen (vector, second part) is alias for USHLL2 Vd, Vn, #0 -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -def UXTLvv_8B : NeonI_ext_len_alias<"uxtl", ".8h", ".8b", USHLLvvi_8B, VPR128, VPR64>; -def UXTLvv_4H : NeonI_ext_len_alias<"uxtl", ".4s", ".4h", USHLLvvi_4H, VPR128, VPR64>; -def UXTLvv_2S : NeonI_ext_len_alias<"uxtl", ".2d", ".2s", USHLLvvi_2S, VPR128, VPR64>; -def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b", USHLLvvi_16B, VPR128, VPR128>; -def UXTL2vv_8H : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h", USHLLvvi_8H, VPR128, VPR128>; -def UXTL2vv_4S : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s", USHLLvvi_4S, VPR128, VPR128>; - -def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>; -def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>; -def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>; - -// Rounding/Saturating shift -class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm<q, u, opcode, - (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", - [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn), - (i32 ImmTy:$Imm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -// shift right (vector by immediate) -multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop, - SDPatternOperator OpNode> { - def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop, - SDPatternOperator OpNode> { - // 64-bit vector types. - def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types. - def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Rounding shift right -defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr", - int_aarch64_neon_vsrshr>; -defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr", - int_aarch64_neon_vurshr>; - -// Saturating shift left unsigned -defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>; - -// Saturating shift left -defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>; -defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; - -class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDNode OpNode> - : NeonI_2VShiftImm<q, u, opcode, - (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", - [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), - (Ty (OpNode (Ty VPRC:$Rn), - (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// Shift Right accumulate -multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> { - def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Shift right and accumulate -defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>; -defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>; - -// Rounding shift accumulate -class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm<q, u, opcode, - (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", - [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), - (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop, - SDPatternOperator OpNode> { - def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Rounding shift right and accumulate -defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>; -defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>; - -// Shift insert by immediate -class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm<q, u, opcode, - (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", - [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn), - (i32 ImmTy:$Imm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// shift left insert (vector by immediate) -multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> { - def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, - int_aarch64_neon_vsli> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, - int_aarch64_neon_vsli> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, - int_aarch64_neon_vsli> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types - def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, - int_aarch64_neon_vsli> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, - int_aarch64_neon_vsli> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, - int_aarch64_neon_vsli> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, - int_aarch64_neon_vsli> { - let Inst{22} = 0b1; - } -} - -// shift right insert (vector by immediate) -multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> { - // 64-bit vector types. - def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - int_aarch64_neon_vsri> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - int_aarch64_neon_vsri> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - int_aarch64_neon_vsri> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types - def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - int_aarch64_neon_vsri> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - int_aarch64_neon_vsri> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - int_aarch64_neon_vsri> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - int_aarch64_neon_vsri> { - let Inst{22} = 0b1; - } -} - -// Shift left and insert -defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">; - -// Shift right and insert -defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">; - -class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT, - string SrcT, Operand ImmTy> - : NeonI_2VShiftImm<q, u, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT, - string SrcT, Operand ImmTy> - : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), - (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// left long shift by immediate -multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> { - def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> { - let Inst{22-21} = 0b01; - } - - // Shift Narrow High - def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h", - shr_imm8> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s", - shr_imm16> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d", - shr_imm32> { - let Inst{22-21} = 0b01; - } -} - -// Shift right narrow -defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">; - -// Shift right narrow (prefix Q is saturating, prefix R is rounding) -defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">; -defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">; -defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">; -defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">; -defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">; -defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">; -defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">; - -def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn), - (v2i64 (concat_vectors (v1i64 node:$Rm), - (v1i64 node:$Rn)))>; -def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn), - (v8i16 (concat_vectors (v4i16 node:$Rm), - (v4i16 node:$Rn)))>; -def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn), - (v4i32 (concat_vectors (v2i32 node:$Rm), - (v2i32 node:$Rn)))>; -def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn), - (v4f32 (concat_vectors (v2f32 node:$Rm), - (v2f32 node:$Rn)))>; -def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn), - (v2f64 (concat_vectors (v1f64 node:$Rm), - (v1f64 node:$Rn)))>; - -def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs), - (v8i16 (srl (v8i16 node:$lhs), - (v8i16 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs), - (v4i32 (srl (v4i32 node:$lhs), - (v4i32 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs), - (v2i64 (srl (v2i64 node:$lhs), - (v2i64 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs), - (v8i16 (sra (v8i16 node:$lhs), - (v8i16 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs), - (v4i32 (sra (v4i32 node:$lhs), - (v4i32 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs), - (v2i64 (sra (v2i64 node:$lhs), - (v2i64 (Neon_vdup (i32 node:$rhs)))))>; - -// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors) -multiclass Neon_shiftNarrow_patterns<string shr> { - def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn, - (i32 shr_imm8:$Imm)))), - (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>; - def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn, - (i32 shr_imm16:$Imm)))), - (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>; - def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn, - (i32 shr_imm32:$Imm)))), - (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>; - - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") - VPR128:$Rn, (i32 shr_imm8:$Imm))))))), - (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") - VPR128:$Rn, (i32 shr_imm16:$Imm))))))), - (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") - VPR128:$Rn, (i32 shr_imm32:$Imm))))))), - (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; -} - -multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> { - def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)), - (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>; - def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)), - (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>; - def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)), - (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; - - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v8i8 - (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))), - (!cast<Instruction>(prefix # "_16B") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v4i16 - (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))), - (!cast<Instruction>(prefix # "_8H") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v2i32 - (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))), - (!cast<Instruction>(prefix # "_4S") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; -} - -defm : Neon_shiftNarrow_patterns<"lshr">; -defm : Neon_shiftNarrow_patterns<"ashr">; - -defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">; -defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">; -defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">; -defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">; -defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">; -defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">; -defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">; - -// Convert fix-point and float-pointing -class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T, - RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy, - Operand ImmTy, SDPatternOperator IntOp> - : NeonI_2VShiftImm<q, u, opcode, - (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), - asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", - [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn), - (i32 ImmTy:$Imm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop, - SDPatternOperator IntOp> { - def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64, - shr_imm64, IntOp> { - let Inst{22} = 0b1; - } -} - -multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop, - SDPatternOperator IntOp> { - def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64, - shr_imm64, IntOp> { - let Inst{22} = 0b1; - } -} - -// Convert fixed-point to floating-point -defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf", - int_arm_neon_vcvtfxs2fp>; -defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf", - int_arm_neon_vcvtfxu2fp>; - -// Convert floating-point to fixed-point -defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", - int_arm_neon_vcvtfp2fxs>; -defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", - int_arm_neon_vcvtfp2fxu>; - -multiclass Neon_sshll2_0<SDNode ext> -{ - def _v8i8 : PatFrag<(ops node:$Rn), - (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>; - def _v4i16 : PatFrag<(ops node:$Rn), - (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>; - def _v2i32 : PatFrag<(ops node:$Rn), - (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>; -} - -defm NI_sext_high : Neon_sshll2_0<sext>; -defm NI_zext_high : Neon_sshll2_0<zext>; - - -//===----------------------------------------------------------------------===// -// Multiclasses for NeonI_Across -//===----------------------------------------------------------------------===// - -// Variant 1 - -multiclass NeonI_2VAcross_1<bit u, bits<5> opcode, - string asmop, SDPatternOperator opnode> -{ - def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode, - (outs FPR16:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.8b", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode, - (outs FPR16:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.16b", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode, - (outs FPR32:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.4h", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.8h", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - // _1d2s doesn't exist! - - def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode, - (outs FPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (v1i64 FPR64:$Rd), - (v1i64 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>; -defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>; - -// Variant 2 - -multiclass NeonI_2VAcross_2<bit u, bits<5> opcode, - string asmop, SDPatternOperator opnode> -{ - def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode, - (outs FPR8:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.8b", - [(set (v1i8 FPR8:$Rd), - (v1i8 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode, - (outs FPR8:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.16b", - [(set (v1i8 FPR8:$Rd), - (v1i8 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode, - (outs FPR16:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.4h", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode, - (outs FPR16:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.8h", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - // _1s2s doesn't exist! - - def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>; -defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>; - -defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>; -defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>; - -defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>; - -// Variant 3 - -multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size, - string asmop, SDPatternOperator opnode> { - def _1s4s: NeonI_2VAcross<0b1, u, size, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (f32 FPR32:$Rd), - (f32 (opnode (v4f32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv", - int_aarch64_neon_vmaxnmv>; -defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv", - int_aarch64_neon_vminnmv>; - -defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv", - int_aarch64_neon_vmaxv>; -defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv", - int_aarch64_neon_vminv>; - -// The followings are for instruction class (Perm) - -class NeonI_Permute<bit q, bits<2> size, bits<3> opcode, - string asmop, RegisterOperand OpVPR, string OpS, - SDPatternOperator opnode, ValueType Ty> - : NeonI_Perm<q, size, opcode, - (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (Ty OpVPR:$Rd), - (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Perm_pat<bits<3> opcode, string asmop, - SDPatternOperator opnode> { - def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, - VPR64, "8b", opnode, v8i8>; - def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, - VPR128, "16b",opnode, v16i8>; - def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, - VPR64, "4h", opnode, v4i16>; - def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, - VPR128, "8h", opnode, v8i16>; - def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, - VPR64, "2s", opnode, v2i32>; - def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, - VPR128, "4s", opnode, v4i32>; - def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, - VPR128, "2d", opnode, v2i64>; -} - -defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>; -defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>; -defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>; -defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>; -defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>; -defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>; - -multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> { - def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), - (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>; - - def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), - (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>; - - def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), - (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>; -} - -defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>; -defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>; -defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>; -defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>; -defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>; -defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>; - -// The followings are for instruction class (3V Diff) - -// normal long/long2 pattern -class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator ext, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (ResTy VPR128:$Rd), - (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))), - (ResTy (ext (OpTy OpVPR:$Rm))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDL_s<bit u, bits<4> opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, sext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, sext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, sext, VPR64, v2i64, v2i32>; - } -} - -multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; - } -} - -multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, zext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, zext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, zext, VPR64, v2i64, v2i32>; - } -} - -multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; - } -} - -defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>; -defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>; - -defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>; -defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>; - -defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>; -defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>; - -defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>; -defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>; - -// normal wide/wide2 pattern -class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator ext, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS, - [(set (ResTy VPR128:$Rd), - (ResTy (opnode (ResTy VPR128:$Rn), - (ResTy (ext (OpTy OpVPR:$Rm))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, sext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, sext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, sext, VPR64, v2i64, v2i32>; -} - -defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>; -defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>; - -multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode> { - def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; -} - -defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>; -defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>; - -multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, zext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, zext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, zext, VPR64, v2i64, v2i32>; -} - -defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>; -defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>; - -multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode> { - def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; -} - -defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>; -defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>; - -// Get the high half part of the vector element. -multiclass NeonI_get_high { - def _8h : PatFrag<(ops node:$Rn), - (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn), - (v8i16 (Neon_vdup (i32 8)))))))>; - def _4s : PatFrag<(ops node:$Rn), - (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn), - (v4i32 (Neon_vdup (i32 16)))))))>; - def _2d : PatFrag<(ops node:$Rn), - (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn), - (v2i64 (Neon_vdup (i32 32)))))))>; -} - -defm NI_get_hi : NeonI_get_high; - -// pattern for addhn/subhn with 2 operands -class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator get_hi, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (ResTy VPR64:$Rd), - (ResTy (get_hi - (OpTy (opnode (OpTy VPR128:$Rn), - (OpTy VPR128:$Rm))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", - opnode, NI_get_hi_8h, v8i8, v8i16>; - def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", - opnode, NI_get_hi_4s, v4i16, v4i32>; - def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", - opnode, NI_get_hi_2d, v2i32, v2i64>; - } -} - -defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>; -defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; - -// pattern for operation with 2 operands -class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - RegisterOperand ResVPR, RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (ResTy ResVPR:$Rd), - (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -// normal narrow pattern -multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", - opnode, VPR64, VPR128, v8i8, v8i16>; - def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", - opnode, VPR64, VPR128, v4i16, v4i32>; - def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", - opnode, VPR64, VPR128, v2i32, v2i64>; - } -} - -defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; -defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; - -// pattern for acle intrinsic with 3 operands -class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let neverHasSideEffects = 1; -} - -multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> { - def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">; - def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">; - def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">; -} - -defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">; -defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">; - -defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">; -defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">; - -// Patterns have to be separate because there's a SUBREG_TO_REG in the output -// part. -class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy, - SDPatternOperator coreop> - : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn), - (SrcTy VPR128:$Rm)))))), - (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, VPR128:$Rm)>; - -// addhn2 patterns -def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16, - BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>; -def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32, - BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>; -def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64, - BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>; - -// subhn2 patterns -def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16, - BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>; -def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32, - BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>; -def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64, - BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>; - -// raddhn2 patterns -def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>; -def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>; -def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>; - -// rsubhn2 patterns -def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>; -def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>; -def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>; - -// pattern that need to extend result -class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, ValueType OpSTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (ResTy VPR128:$Rd), - (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn), - (OpTy OpVPR:$Rm))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, VPR64, v8i16, v8i8, v8i8>; - def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR64, v4i32, v4i16, v4i16>; - def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR64, v2i64, v2i32, v2i32>; - } -} - -defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>; -defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; - -multiclass NeonI_Op_High<SDPatternOperator op> { - def _16B : PatFrag<(ops node:$Rn, node:$Rm), - (op (v8i8 (Neon_High16B node:$Rn)), - (v8i8 (Neon_High16B node:$Rm)))>; - def _8H : PatFrag<(ops node:$Rn, node:$Rm), - (op (v4i16 (Neon_High8H node:$Rn)), - (v4i16 (Neon_High8H node:$Rm)))>; - def _4S : PatFrag<(ops node:$Rn, node:$Rm), - (op (v2i32 (Neon_High4S node:$Rn)), - (v2i32 (Neon_High4S node:$Rm)))>; -} - -defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>; -defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>; -defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>; -defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>; -defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>; -defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>; - -multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast<PatFrag>(opnode # "_16B"), - VPR128, v8i16, v16i8, v8i8>; - def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast<PatFrag>(opnode # "_8H"), - VPR128, v4i32, v8i16, v4i16>; - def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast<PatFrag>(opnode # "_4S"), - VPR128, v2i64, v4i32, v2i32>; - } -} - -defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>; -defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>; - -// For pattern that need two operators being chained. -class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator subop, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, ValueType OpSTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (ResTy VPR128:$Rd), - (ResTy (opnode - (ResTy VPR128:$src), - (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn), - (OpTy OpVPR:$Rm))))))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, SDPatternOperator subop>{ - def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, subop, VPR64, v8i16, v8i8, v8i8>; - def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, subop, VPR64, v4i32, v4i16, v4i16>; - def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, subop, VPR64, v2i64, v2i32, v2i32>; -} - -defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal", - add, int_arm_neon_vabds>; -defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal", - add, int_arm_neon_vabdu>; - -multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, string subop> { - def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, !cast<PatFrag>(subop # "_16B"), - VPR128, v8i16, v16i8, v8i8>; - def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, !cast<PatFrag>(subop # "_8H"), - VPR128, v4i32, v8i16, v4i16>; - def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, !cast<PatFrag>(subop # "_4S"), - VPR128, v2i64, v4i32, v2i32>; -} - -defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add, - "NI_sabdl_hi">; -defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, - "NI_uabdl_hi">; - -// Long pattern with 2 operands -multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable, - SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, VPR128, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR128, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR128, VPR64, v2i64, v2i32>; - } -} - -defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>; -defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>; - -class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (ResTy VPR128:$Rd), - (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; - -multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast<PatFrag>(opnode # "_16B"), - v8i16, v16i8>; - def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast<PatFrag>(opnode # "_8H"), - v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast<PatFrag>(opnode # "_4S"), - v2i64, v4i32>; - } -} - -defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2", - "NI_smull_hi", 1>; -defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2", - "NI_umull_hi", 1>; - -// Long pattern with 3 operands -class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (ResTy VPR128:$Rd), - (ResTy (opnode - (ResTy VPR128:$src), - (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, v8i16, v8i8>; - def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, v4i32, v4i16>; - def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, v2i64, v2i32>; -} - -def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (add node:$Rd, - (int_arm_neon_vmulls node:$Rn, node:$Rm))>; - -def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (add node:$Rd, - (int_arm_neon_vmullu node:$Rn, node:$Rm))>; - -def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (sub node:$Rd, - (int_arm_neon_vmulls node:$Rn, node:$Rm))>; - -def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (sub node:$Rd, - (int_arm_neon_vmullu node:$Rn, node:$Rm))>; - -defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>; -defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>; - -defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>; -defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>; - -class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator subop, SDPatternOperator opnode, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff<q, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, - [(set (ResTy VPR128:$Rd), - (ResTy (subop - (ResTy VPR128:$src), - (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))], - NoItinerary>, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop, - SDPatternOperator subop, string opnode> { - def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b", - subop, !cast<PatFrag>(opnode # "_16B"), - VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", - subop, !cast<PatFrag>(opnode # "_8H"), - VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", - subop, !cast<PatFrag>(opnode # "_4S"), - VPR128, v2i64, v4i32>; -} - -defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2", - add, "NI_smull_hi">; -defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2", - add, "NI_umull_hi">; - -defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2", - sub, "NI_smull_hi">; -defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2", - sub, "NI_umull_hi">; - -multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode> { - def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, int_arm_neon_vqdmull, - VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, int_arm_neon_vqdmull, - VPR64, v2i64, v2i32>; -} - -defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal", - int_arm_neon_vqadds>; -defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl", - int_arm_neon_vqsubs>; - -multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR128, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR128, VPR64, v2i64, v2i32>; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", - int_arm_neon_vqdmull, 1>; -} - -multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast<PatFrag>(opnode # "_8H"), - v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast<PatFrag>(opnode # "_4S"), - v2i64, v4i32>; - } -} - -defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", - "NI_qdmull_hi", 1>; - -multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode> { - def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_qdmull_hi_8H, - VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_qdmull_hi_4S, - VPR128, v2i64, v4i32>; -} - -defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2", - int_arm_neon_vqadds>; -defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2", - int_arm_neon_vqsubs>; - -multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop, - SDPatternOperator opnode_8h8b, - SDPatternOperator opnode_1q1d, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode_8h8b, VPR128, VPR64, v8i16, v8i8>; - - def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d", - opnode_1q1d, VPR128, VPR64, v16i8, v1i64>; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in -defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, - int_aarch64_neon_vmull_p64, 1>; - -multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast<PatFrag>(opnode # "_16B"), - v8i16, v16i8>; - - def _1q2d : - NeonI_3VDiff<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d", - [(set (v16i8 VPR128:$Rd), - (v16i8 (int_aarch64_neon_vmull_p64 - (v1i64 (scalar_to_vector - (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))), - (v1i64 (scalar_to_vector - (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))], - NoItinerary>, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; - } - - def : Pat<(v16i8 (int_aarch64_neon_vmull_p64 - (v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 1))), - (v1i64 (extract_subvector (v2i64 VPR128:$Rm), (i64 1))))), - (!cast<Instruction>(NAME # "_1q2d") VPR128:$Rn, VPR128:$Rm)>; -} - -defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi", - 1>; - -// End of implementation for instruction class (3V Diff) - -// The followings are vector load/store multiple N-element structure -// (class SIMD lselem). - -// ld1: load multiple 1-element structure to 1/2/3/4 registers. -// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4). -// The structure consists of a sequence of sets of N values. -// The first element of the structure is placed in the first lane -// of the first first vector, the second element in the first lane -// of the second vector, and so on. -// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into -// the three 64-bit vectors list {BA, DC, FE}. -// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three -// 64-bit vectors list {DA, EB, FC}. -// Store instructions store multiple structure to N registers like load. - - -class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdStMult<q, 1, opcode, size, - (outs VecList:$Rt), (ins GPR64xsp:$Rn), - asmop # "\t$Rt, [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; -} - -multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> { - def _8B : NeonI_LDVList<0, opcode, 0b00, - !cast<RegisterOperand>(List # "8B_operand"), asmop>; - - def _4H : NeonI_LDVList<0, opcode, 0b01, - !cast<RegisterOperand>(List # "4H_operand"), asmop>; - - def _2S : NeonI_LDVList<0, opcode, 0b10, - !cast<RegisterOperand>(List # "2S_operand"), asmop>; - - def _16B : NeonI_LDVList<1, opcode, 0b00, - !cast<RegisterOperand>(List # "16B_operand"), asmop>; - - def _8H : NeonI_LDVList<1, opcode, 0b01, - !cast<RegisterOperand>(List # "8H_operand"), asmop>; - - def _4S : NeonI_LDVList<1, opcode, 0b10, - !cast<RegisterOperand>(List # "4S_operand"), asmop>; - - def _2D : NeonI_LDVList<1, opcode, 0b11, - !cast<RegisterOperand>(List # "2D_operand"), asmop>; -} - -// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4) -defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">; -def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">; - -defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; - -defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; - -defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; - -// Load multiple 1-element structure to N consecutive registers (N = 2,3,4) -defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">; -def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">; - -defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">; -def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">; - -defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">; -def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">; - -class NeonI_STVList<bit q, bits<4> opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdStMult<q, 0, opcode, size, - (outs), (ins GPR64xsp:$Rn, VecList:$Rt), - asmop # "\t$Rt, [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let mayStore = 1; - let neverHasSideEffects = 1; -} - -multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> { - def _8B : NeonI_STVList<0, opcode, 0b00, - !cast<RegisterOperand>(List # "8B_operand"), asmop>; - - def _4H : NeonI_STVList<0, opcode, 0b01, - !cast<RegisterOperand>(List # "4H_operand"), asmop>; - - def _2S : NeonI_STVList<0, opcode, 0b10, - !cast<RegisterOperand>(List # "2S_operand"), asmop>; - - def _16B : NeonI_STVList<1, opcode, 0b00, - !cast<RegisterOperand>(List # "16B_operand"), asmop>; - - def _8H : NeonI_STVList<1, opcode, 0b01, - !cast<RegisterOperand>(List # "8H_operand"), asmop>; - - def _4S : NeonI_STVList<1, opcode, 0b10, - !cast<RegisterOperand>(List # "4S_operand"), asmop>; - - def _2D : NeonI_STVList<1, opcode, 0b11, - !cast<RegisterOperand>(List # "2D_operand"), asmop>; -} - -// Store multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; -def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">; - -defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; - -defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; - -defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; - -// Store multiple 1-element structures from N consecutive registers (N = 2,3,4) -defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">; -def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; - -defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">; -def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; - -defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">; -def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; - -def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; -def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; - -def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; -def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; - -def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>; -def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>; - -def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; -def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; - -def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; -def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; - -def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>; -def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>; - -def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr), - (ST1_2D GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr), - (ST1_2D GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr), - (ST1_4S GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr), - (ST1_4S GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr), - (ST1_8H GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr), - (ST1_16B GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr), - (ST1_1D GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr), - (ST1_1D GPR64xsp:$addr, VPR64:$value)>; - -def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr), - (ST1_2S GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr), - (ST1_2S GPR64xsp:$addr, VPR64:$value)>; - -def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), - (ST1_4H GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), - (ST1_8B GPR64xsp:$addr, VPR64:$value)>; - -// Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store. -// FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal, -// these patterns are not needed any more. -def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>; -def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>; -def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>; - -def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr), - (LSFP8_STR $value, $addr, 0)>; -def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr), - (LSFP16_STR $value, $addr, 0)>; -def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr), - (LSFP32_STR $value, $addr, 0)>; - - -// End of vector load/store multiple N-element structure(class SIMD lselem) - -// The followings are post-index vector load/store multiple N-element -// structure(class SIMD lselem-post) -def exact1_asmoperand : AsmOperandClass { - let Name = "Exact1"; - let PredicateMethod = "isExactImm<1>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> { - let ParserMatchClass = exact1_asmoperand; -} - -def exact2_asmoperand : AsmOperandClass { - let Name = "Exact2"; - let PredicateMethod = "isExactImm<2>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> { - let ParserMatchClass = exact2_asmoperand; -} - -def exact3_asmoperand : AsmOperandClass { - let Name = "Exact3"; - let PredicateMethod = "isExactImm<3>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> { - let ParserMatchClass = exact3_asmoperand; -} - -def exact4_asmoperand : AsmOperandClass { - let Name = "Exact4"; - let PredicateMethod = "isExactImm<4>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> { - let ParserMatchClass = exact4_asmoperand; -} - -def exact6_asmoperand : AsmOperandClass { - let Name = "Exact6"; - let PredicateMethod = "isExactImm<6>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> { - let ParserMatchClass = exact6_asmoperand; -} - -def exact8_asmoperand : AsmOperandClass { - let Name = "Exact8"; - let PredicateMethod = "isExactImm<8>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> { - let ParserMatchClass = exact8_asmoperand; -} - -def exact12_asmoperand : AsmOperandClass { - let Name = "Exact12"; - let PredicateMethod = "isExactImm<12>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> { - let ParserMatchClass = exact12_asmoperand; -} - -def exact16_asmoperand : AsmOperandClass { - let Name = "Exact16"; - let PredicateMethod = "isExactImm<16>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> { - let ParserMatchClass = exact16_asmoperand; -} - -def exact24_asmoperand : AsmOperandClass { - let Name = "Exact24"; - let PredicateMethod = "isExactImm<24>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> { - let ParserMatchClass = exact24_asmoperand; -} - -def exact32_asmoperand : AsmOperandClass { - let Name = "Exact32"; - let PredicateMethod = "isExactImm<32>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> { - let ParserMatchClass = exact32_asmoperand; -} - -def exact48_asmoperand : AsmOperandClass { - let Name = "Exact48"; - let PredicateMethod = "isExactImm<48>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> { - let ParserMatchClass = exact48_asmoperand; -} - -def exact64_asmoperand : AsmOperandClass { - let Name = "Exact64"; - let PredicateMethod = "isExactImm<64>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> { - let ParserMatchClass = exact64_asmoperand; -} - -multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1, - DecoderMethod = "DecodeVLDSTPostInstruction" in { - def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size, - (outs VecList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt), - asmop # "\t$Rt, [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdStMult_Post<q, 1, opcode, size, - (outs VecList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm), - asmop # "\t$Rt, [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; - } -} - -multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy, - Operand ImmTy2, string asmop> { - defm _8B : NeonI_LDWB_VList<0, opcode, 0b00, - !cast<RegisterOperand>(List # "8B_operand"), - ImmTy, asmop>; - - defm _4H : NeonI_LDWB_VList<0, opcode, 0b01, - !cast<RegisterOperand>(List # "4H_operand"), - ImmTy, asmop>; - - defm _2S : NeonI_LDWB_VList<0, opcode, 0b10, - !cast<RegisterOperand>(List # "2S_operand"), - ImmTy, asmop>; - - defm _16B : NeonI_LDWB_VList<1, opcode, 0b00, - !cast<RegisterOperand>(List # "16B_operand"), - ImmTy2, asmop>; - - defm _8H : NeonI_LDWB_VList<1, opcode, 0b01, - !cast<RegisterOperand>(List # "8H_operand"), - ImmTy2, asmop>; - - defm _4S : NeonI_LDWB_VList<1, opcode, 0b10, - !cast<RegisterOperand>(List # "4S_operand"), - ImmTy2, asmop>; - - defm _2D : NeonI_LDWB_VList<1, opcode, 0b11, - !cast<RegisterOperand>(List # "2D_operand"), - ImmTy2, asmop>; -} - -// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) -defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">; -defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, - "ld1">; - -defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">; - -defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, - "ld3">; - -defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">; - -// Post-index load multiple 1-element structures from N consecutive registers -// (N = 2,3,4) -defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, - "ld1">; -defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand, - uimm_exact16, "ld1">; - -defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, - "ld1">; -defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand, - uimm_exact24, "ld1">; - -defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, - "ld1">; -defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand, - uimm_exact32, "ld1">; - -multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1, - DecoderMethod = "DecodeVLDSTPostInstruction" in { - def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size, - (outs GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt), - asmop # "\t$Rt, [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdStMult_Post<q, 0, opcode, size, - (outs GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt), - asmop # "\t$Rt, [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; - } -} - -multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy, - Operand ImmTy2, string asmop> { - defm _8B : NeonI_STWB_VList<0, opcode, 0b00, - !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>; - - defm _4H : NeonI_STWB_VList<0, opcode, 0b01, - !cast<RegisterOperand>(List # "4H_operand"), - ImmTy, asmop>; - - defm _2S : NeonI_STWB_VList<0, opcode, 0b10, - !cast<RegisterOperand>(List # "2S_operand"), - ImmTy, asmop>; - - defm _16B : NeonI_STWB_VList<1, opcode, 0b00, - !cast<RegisterOperand>(List # "16B_operand"), - ImmTy2, asmop>; - - defm _8H : NeonI_STWB_VList<1, opcode, 0b01, - !cast<RegisterOperand>(List # "8H_operand"), - ImmTy2, asmop>; - - defm _4S : NeonI_STWB_VList<1, opcode, 0b10, - !cast<RegisterOperand>(List # "4S_operand"), - ImmTy2, asmop>; - - defm _2D : NeonI_STWB_VList<1, opcode, 0b11, - !cast<RegisterOperand>(List # "2D_operand"), - ImmTy2, asmop>; -} - -// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">; -defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, - "st1">; - -defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">; - -defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, - "st3">; - -defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">; - -// Post-index load multiple 1-element structures from N consecutive registers -// (N = 2,3,4) -defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, - "st1">; -defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand, - uimm_exact16, "st1">; - -defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, - "st1">; -defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand, - uimm_exact24, "st1">; - -defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, - "st1">; -defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand, - uimm_exact32, "st1">; - -// End of post-index vector load/store multiple N-element structure -// (class SIMD lselem-post) - -// The followings are vector load/store single N-element structure -// (class SIMD lsone). -def neon_uimm0_bare : Operand<i64>, - ImmLeaf<i64, [{return Imm == 0;}]> { - let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm1_bare : Operand<i64>, - ImmLeaf<i64, [{return Imm < 2;}]> { - let ParserMatchClass = neon_uimm1_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm2_bare : Operand<i64>, - ImmLeaf<i64, [{return Imm < 4;}]> { - let ParserMatchClass = neon_uimm2_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm3_bare : Operand<i64>, - ImmLeaf<i64, [{return Imm < 8;}]> { - let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm4_bare : Operand<i64>, - ImmLeaf<i64, [{return Imm < 16;}]> { - let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdOne_Dup<q, r, opcode, size, - (outs VecList:$Rt), (ins GPR64xsp:$Rn), - asmop # "\t$Rt, [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; -} - -multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> { - def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00, - !cast<RegisterOperand>(List # "8B_operand"), asmop>; - - def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01, - !cast<RegisterOperand>(List # "4H_operand"), asmop>; - - def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10, - !cast<RegisterOperand>(List # "2S_operand"), asmop>; - - def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11, - !cast<RegisterOperand>(List # "1D_operand"), asmop>; - - def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00, - !cast<RegisterOperand>(List # "16B_operand"), asmop>; - - def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01, - !cast<RegisterOperand>(List # "8H_operand"), asmop>; - - def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10, - !cast<RegisterOperand>(List # "4S_operand"), asmop>; - - def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11, - !cast<RegisterOperand>(List # "2D_operand"), asmop>; -} - -// Load single 1-element structure to all lanes of 1 register -defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">; - -// Load single N-element structure to all lanes of N consecutive -// registers (N = 2,3,4) -defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">; -defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">; -defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">; - - -class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp, - Instruction INST> - : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))), - (VTy (INST GPR64xsp:$Rn))>; - -// Match all LD1R instructions -def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>; - -def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>; - -def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>; - -def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>; - -def : LD1R_pattern<v2i32, i32, load, LD1R_2S>; -def : LD1R_pattern<v2f32, f32, load, LD1R_2S>; - -def : LD1R_pattern<v4i32, i32, load, LD1R_4S>; -def : LD1R_pattern<v4f32, f32, load, LD1R_4S>; - -def : LD1R_pattern<v2i64, i64, load, LD1R_2D>; -def : LD1R_pattern<v2f64, f64, load, LD1R_2D>; - -class LD1R_pattern_v1 <ValueType VTy, ValueType DTy, PatFrag LoadOp, - Instruction INST> - : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))), - (VTy (INST GPR64xsp:$Rn))>; - -def : LD1R_pattern_v1<v1i64, i64, load, LD1R_1D>; -def : LD1R_pattern_v1<v1f64, f64, load, LD1R_1D>; - -multiclass VectorList_Bare_BHSD<string PREFIX, int Count, - RegisterClass RegList> { - defm B : VectorList_operands<PREFIX, "B", Count, RegList>; - defm H : VectorList_operands<PREFIX, "H", Count, RegList>; - defm S : VectorList_operands<PREFIX, "S", Count, RegList>; - defm D : VectorList_operands<PREFIX, "D", Count, RegList>; -} - -// Special vector list operand of 128-bit vectors with bare layout. -// i.e. only show ".b", ".h", ".s", ".d" -defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>; -defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>; -defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>; -defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>; - -class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, - Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane<1, r, op2_1, op0, - (outs VList:$Rt), - (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecLd, ReadVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; - let hasExtraDefRegAllocReq = 1; - let Constraints = "$src = $Rt"; -} - -multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> { - def _B : NeonI_LDN_Lane<r, 0b00, op0, - !cast<RegisterOperand>(List # "B_operand"), - neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H : NeonI_LDN_Lane<r, 0b01, op0, - !cast<RegisterOperand>(List # "H_operand"), - neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S : NeonI_LDN_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "S_operand"), - neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D : NeonI_LDN_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "D_operand"), - neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Load single 1-element structure to one lane of 1 register. -defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">; - -// Load single N-element structure to one lane of N consecutive registers -// (N = 2,3,4) -defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">; -defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">; -defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">; - -multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy, - Operand ImmOp, Operand ImmOp2, PatFrag LoadOp, - Instruction INST> { - def : Pat<(VTy (vector_insert (VTy VPR64:$src), - (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))), - (VTy (EXTRACT_SUBREG - (INST GPR64xsp:$Rn, - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - ImmOp:$lane), - sub_64))>; - - def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src), - (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))), - (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>; -} - -// Match all LD1LN instructions -defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare, - extloadi8, LD1LN_B>; - -defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare, - extloadi16, LD1LN_H>; - -defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare, - load, LD1LN_S>; -defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare, - load, LD1LN_S>; - -defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare, - load, LD1LN_D>; -defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare, - load, LD1LN_D>; - -class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, - Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane<0, r, op2_1, op0, - (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let mayStore = 1; - let neverHasSideEffects = 1; - let hasExtraDefRegAllocReq = 1; -} - -multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> { - def _B : NeonI_STN_Lane<r, 0b00, op0, - !cast<RegisterOperand>(List # "B_operand"), - neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H : NeonI_STN_Lane<r, 0b01, op0, - !cast<RegisterOperand>(List # "H_operand"), - neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S : NeonI_STN_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "S_operand"), - neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D : NeonI_STN_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "D_operand"), - neon_uimm1_bare, asmop>{ - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Store single 1-element structure from one lane of 1 register. -defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">; - -// Store single N-element structure from one lane of N consecutive registers -// (N = 2,3,4) -defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">; -defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">; -defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">; - -multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy, - Operand ImmOp, Operand ImmOp2, PatFrag StoreOp, - Instruction INST> { - def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)), - GPR64xsp:$Rn), - (INST GPR64xsp:$Rn, - (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64), - ImmOp:$lane)>; - - def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)), - GPR64xsp:$Rn), - (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>; -} - -// Match all ST1LN instructions -defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare, - truncstorei8, ST1LN_B>; - -defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare, - truncstorei16, ST1LN_H>; - -defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare, - store, ST1LN_S>; -defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare, - store, ST1LN_S>; - -defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare, - store, ST1LN_D>; -defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare, - store, ST1LN_D>; - -// End of vector load/store single N-element structure (class SIMD lsone). - - -// The following are post-index load/store single N-element instructions -// (class SIMD lsone-post) - -multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size, - (outs VecList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt), - asmop # "\t$Rt, [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size, - (outs VecList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm), - asmop # "\t$Rt, [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; - } -} - -multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop, - Operand uimm_b, Operand uimm_h, - Operand uimm_s, Operand uimm_d> { - defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00, - !cast<RegisterOperand>(List # "8B_operand"), - uimm_b, asmop>; - - defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01, - !cast<RegisterOperand>(List # "4H_operand"), - uimm_h, asmop>; - - defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10, - !cast<RegisterOperand>(List # "2S_operand"), - uimm_s, asmop>; - - defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11, - !cast<RegisterOperand>(List # "1D_operand"), - uimm_d, asmop>; - - defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00, - !cast<RegisterOperand>(List # "16B_operand"), - uimm_b, asmop>; - - defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01, - !cast<RegisterOperand>(List # "8H_operand"), - uimm_h, asmop>; - - defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10, - !cast<RegisterOperand>(List # "4S_operand"), - uimm_s, asmop>; - - defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11, - !cast<RegisterOperand>(List # "2D_operand"), - uimm_d, asmop>; -} - -// Post-index load single 1-element structure to all lanes of 1 register -defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index load single N-element structure to all lanes of N consecutive -// registers (N = 2,3,4) -defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, - Constraints = "$Rn = $wb, $Rt = $src", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, - (outs VList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt, - VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, - (outs VList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, - VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd, ReadVecLd]>; -} - -multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop, - Operand uimm_b, Operand uimm_h, - Operand uimm_s, Operand uimm_d> { - def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0, - !cast<RegisterOperand>(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _B_register : LDN_WBReg_Lane<r, 0b00, op0, - !cast<RegisterOperand>(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0, - !cast<RegisterOperand>(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _H_register : LDN_WBReg_Lane<r, 0b01, op0, - !cast<RegisterOperand>(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _S_register : LDN_WBReg_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } - - def _D_register : LDN_WBReg_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Post-index load single 1-element structure to one lane of 1 register. -defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index load single N-element structure to one lane of N consecutive -// registers -// (N = 2,3,4) -defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -let mayStore = 1, neverHasSideEffects = 1, - hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, - (outs GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt, - VList:$Rt, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let Rm = 0b11111; - } - - class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, - (outs GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt, - ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; -} - -multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop, - Operand uimm_b, Operand uimm_h, - Operand uimm_s, Operand uimm_d> { - def _B_fixed : STN_WBFx_Lane<r, 0b00, op0, - !cast<RegisterOperand>(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _B_register : STN_WBReg_Lane<r, 0b00, op0, - !cast<RegisterOperand>(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H_fixed : STN_WBFx_Lane<r, 0b01, op0, - !cast<RegisterOperand>(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _H_register : STN_WBReg_Lane<r, 0b01, op0, - !cast<RegisterOperand>(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S_fixed : STN_WBFx_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _S_register : STN_WBReg_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D_fixed : STN_WBFx_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } - - def _D_register : STN_WBReg_Lane<r, 0b10, op0, - !cast<RegisterOperand>(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Post-index store single 1-element structure from one lane of 1 register. -defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index store single N-element structure from one lane of N consecutive -// registers (N = 2,3,4) -defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -// End of post-index load/store single N-element instructions -// (class SIMD lsone-post) - -// Neon Scalar instructions implementation -// Scalar Three Same - -class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop, - RegisterClass FPRC> - : NeonI_Scalar3Same<u, size, opcode, - (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> - : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>; - -multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop, - bit Commutable = 0> { - let isCommutable = Commutable in { - def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>; - def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>; - } -} - -multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>; - def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>; - } -} - -multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>; - def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>; - def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>; - def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>; - } -} - -multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> { - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode, - Instruction INSTB, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> - : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> { - def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (INSTB FPR8:$Rn, FPR8:$Rm)>; - def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS> { - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode, - ValueType SResTy, ValueType STy, - Instruction INSTS, ValueType DResTy, - ValueType DTy, Instruction INSTD> { - def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC, - Instruction INSTD> - : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Three Different - -class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS> - : NeonI_Scalar3Diff<u, size, opcode, - (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> { - def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>; - def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>; -} - -multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> { - let Constraints = "$Src = $Rd" in { - def shh : NeonI_Scalar3Diff<u, 0b01, opcode, - (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; - def dss : NeonI_Scalar3Diff<u, 0b10, opcode, - (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; - } -} - -multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS> { - def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS> { - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>; -} - -// Scalar Two Registers Miscellaneous - -class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS> - : NeonI_Scalar2SameMisc<u, size, opcode, - (outs FPRCD:$Rd), (ins FPRCS:$Rn), - !strconcat(asmop, "\t$Rd, $Rn"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode, - string asmop> { - def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32, - FPR32>; - def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64, - FPR64>; -} - -multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> { - def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>; -} - -multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop> - : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> { - def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>; - def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>; - def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>; -} - -class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop> - : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>; - -multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode, - string asmop> { - def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>; - def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>; - def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>; -} - -class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode, - string asmop, RegisterClass FPRC> - : NeonI_Scalar2SameMisc<u, size, opcode, - (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn), - !strconcat(asmop, "\t$Rd, $Rn"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode, - string asmop> { - - let Constraints = "$Src = $Rd" in { - def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>; - def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>; - def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>; - def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>; - } -} - -class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> - : Pat<(f32 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(f32 (opnode (f32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(f64 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop> - : NeonI_Scalar2SameMisc<u, 0b11, opcode, - (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode, - string asmop> { - def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode, - (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm), - !strconcat(asmop, "\t$Rd, $Rn, $FPImm"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode, - (outs FPR64:$Rd), (ins FPR64:$Rn, fpzz32:$FPImm), - !strconcat(asmop, "\t$Rd, $Rn, $FPImm"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (bitconvert (v8i8 Neon_AllZero))))), - (INSTD FPR64:$Rn, 0)>; - -class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC, - Instruction INSTD> - : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn), - (i32 neon_uimm0:$Imm), CC)), - (INSTD FPR64:$Rn, neon_uimm0:$Imm)>; - -multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode, - CondCode CC, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpzz32:$FPImm))), - (INSTS FPR32:$Rn, fpzz32:$FPImm)>; - def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpzz32:$FPImm))), - (INSTD FPR64:$Rn, fpzz32:$FPImm)>; - def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpzz32:$FPImm), CC)), - (INSTD FPR64:$Rn, fpzz32:$FPImm)>; -} - -multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> { - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode, - Instruction INSTB, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> - : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> { - def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))), - (INSTB FPR8:$Rn)>; - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Rn)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Rn)>; - def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -} - -multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTB, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))), - (INSTB FPR8:$Src, FPR8:$Rn)>; - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Src, FPR16:$Rn)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Src, FPR32:$Rn)>; - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Src, FPR64:$Rn)>; -} - -// Scalar Shift By Immediate - -class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop, - RegisterClass FPRC, Operand ImmTy> - : NeonI_ScalarShiftImm<u, opcode, - (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode, - string asmop> { - def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode, - string asmop> - : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> { - def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode, - string asmop> { - def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode, - string asmop> - : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> { - def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop> - : NeonI_ScalarShiftImm<u, opcode, - (outs FPR64:$Rd), - (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - let Constraints = "$Src = $Rd"; -} - -class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop> - : NeonI_ScalarShiftImm<u, opcode, - (outs FPR64:$Rd), - (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - let Constraints = "$Src = $Rd"; -} - -class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS, - Operand ImmTy> - : NeonI_ScalarShiftImm<u, opcode, - (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode, - string asmop> { - def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16, - shr_imm8> { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32, - shr_imm16> { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64, - shr_imm32> { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> { - def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } - def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> { - def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> { - def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -class Neon_ScalarShiftLImm_V1_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))), - (INSTD FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftRImm_V1_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))), - (INSTD FPR64:$Rn, imm:$Imm)>; - -multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode, - Instruction INSTB, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> - : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> { - def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))), - (INSTB FPR8:$Rn, imm:$Imm)>; - def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))), - (INSTH FPR16:$Rn, imm:$Imm)>; - def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; -} - -class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> - : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), - (i32 shl_imm64:$Imm))), - (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> - : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), - (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; - -multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))), - (INSTH FPR16:$Rn, imm:$Imm)>; - def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode, - Instruction INSTS, - Instruction INSTD> { - def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode, - Instruction INSTS, - Instruction INSTD> { - def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -// Scalar Signed Shift Right (Immediate) -defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftRImm_V1_D_size_patterns<sra, SSHRddi>; - -// Scalar Unsigned Shift Right (Immediate) -defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">; -defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftRImm_V1_D_size_patterns<srl, USHRddi>; - -// Scalar Signed Rounding Shift Right (Immediate) -defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>; - -// Scalar Unigned Rounding Shift Right (Immediate) -defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>; - -// Scalar Signed Shift Right and Accumulate (Immediate) -def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - <int_aarch64_neon_vsrads_n, SSRA>; - -// Scalar Unsigned Shift Right and Accumulate (Immediate) -def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - <int_aarch64_neon_vsradu_n, USRA>; - -// Scalar Signed Rounding Shift Right and Accumulate (Immediate) -def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - <int_aarch64_neon_vrsrads_n, SRSRA>; - -// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) -def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - <int_aarch64_neon_vrsradu_n, URSRA>; - -// Scalar Shift Left (Immediate) -defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">; -defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftLImm_V1_D_size_patterns<shl, SHLddi>; - -// Signed Saturating Shift Left (Immediate) -defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n, - SQSHLbbi, SQSHLhhi, - SQSHLssi, SQSHLddi>; -// Pattern to match llvm.arm.* intrinsic. -defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>; - -// Unsigned Saturating Shift Left (Immediate) -defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n, - UQSHLbbi, UQSHLhhi, - UQSHLssi, UQSHLddi>; -// Pattern to match llvm.arm.* intrinsic. -defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>; - -// Signed Saturating Shift Left Unsigned (Immediate) -defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu, - SQSHLUbbi, SQSHLUhhi, - SQSHLUssi, SQSHLUddi>; - -// Shift Right And Insert (Immediate) -def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - <int_aarch64_neon_vsri, SRI>; - -// Shift Left And Insert (Immediate) -def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">; -def : Neon_ScalarShiftLImm_accum_D_size_patterns - <int_aarch64_neon_vsli, SLI>; - -// Signed Saturating Shift Right Narrow (Immediate) -defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn, - SQSHRNbhi, SQSHRNhsi, - SQSHRNsdi>; - -// Unsigned Saturating Shift Right Narrow (Immediate) -defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn, - UQSHRNbhi, UQSHRNhsi, - UQSHRNsdi>; - -// Signed Saturating Rounded Shift Right Narrow (Immediate) -defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn, - SQRSHRNbhi, SQRSHRNhsi, - SQRSHRNsdi>; - -// Unsigned Saturating Rounded Shift Right Narrow (Immediate) -defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn, - UQRSHRNbhi, UQRSHRNhsi, - UQRSHRNsdi>; - -// Signed Saturating Shift Right Unsigned Narrow (Immediate) -defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun, - SQSHRUNbhi, SQSHRUNhsi, - SQSHRUNsdi>; - -// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) -defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun, - SQRSHRUNbhi, SQRSHRUNhsi, - SQRSHRUNsdi>; - -// Scalar Signed Fixed-point Convert To Floating-Point (Immediate) -defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">; -defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n, - SCVTF_Nssi, SCVTF_Nddi>; - -// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) -defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">; -defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n, - UCVTF_Nssi, UCVTF_Nddi>; - -// Scalar Floating-point Convert To Signed Fixed-point (Immediate) -defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">; -defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n, - FCVTZS_Nssi, FCVTZS_Nddi>; - -// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) -defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">; -defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n, - FCVTZU_Nssi, FCVTZU_Nddi>; - -// Patterns For Convert Instructions Between v1f64 and v1i64 -class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode, - Instruction INST> - : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INST FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode, - Instruction INST> - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INST FPR64:$Rn, imm:$Imm)>; - -def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp, - SCVTF_Nddi>; - -def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp, - UCVTF_Nddi>; - -def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs, - FCVTZS_Nddi>; - -def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu, - FCVTZU_Nddi>; - -// Scalar Integer Add -let isCommutable = 1 in { -def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; -} - -// Scalar Integer Sub -def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; - -// Pattern for Scalar Integer Add and Sub with D register only -defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>; -defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>; - -// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>; - -// Scalar Integer Saturating Add (Signed, Unsigned) -defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; -defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; - -// Scalar Integer Saturating Sub (Signed, Unsigned) -defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; -defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; - - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Add, Sub (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb, - SQADDhhh, SQADDsss, SQADDddd>; -defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb, - UQADDhhh, UQADDsss, UQADDddd>; -defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb, - SQSUBhhh, SQSUBsss, SQSUBddd>; -defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb, - UQSUBhhh, UQSUBsss, UQSUBddd>; - -// Scalar Integer Saturating Doubling Multiply Half High -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in -defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>; - -// Scalar Integer Saturating Rounding Doubling Multiply Half High -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>; -} - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Doubling Multiply Half High and -// Scalar Integer Saturating Rounding Doubling Multiply Half High -defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh, - SQDMULHsss>; -defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh, - SQRDMULHsss>; - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { -// Scalar Floating-point Multiply Extended -defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; -} - -// Scalar Floating-point Reciprocal Step -defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; -defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32, - FRECPSsss, f64, f64, FRECPSddd>; -def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FRECPSddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Reciprocal Square Root Step -defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>; -defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32, - FRSQRTSsss, f64, f64, FRSQRTSddd>; -def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Floating-point Multiply Extended, -multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx, - FMULXsss, FMULXddd>; -def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMULXddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Integer Shift Left (Signed, Unsigned) -def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; -def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>; - -// Scalar Integer Saturating Shift Left (Signed, Unsigned) -defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; -defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Shift Letf (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, - SQSHLhhh, SQSHLsss, SQSHLddd>; -defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, - UQSHLhhh, UQSHLsss, UQSHLddd>; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Shift Letf (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>; - -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; -def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>; - -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; -defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, - SQRSHLhhh, SQRSHLsss, SQRSHLddd>; -defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, - UQRSHLhhh, UQRSHLsss, UQRSHLddd>; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>; -defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>; - -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { -// Signed Saturating Doubling Multiply-Add Long -defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">; -} -defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal, - SQDMLALshh, SQDMLALdss>; - -// Signed Saturating Doubling Multiply-Subtract Long -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { -defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">; -} -defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl, - SQDMLSLshh, SQDMLSLdss>; - -// Signed Saturating Doubling Multiply Long -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { -defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">; -} -defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull, - SQDMULLshh, SQDMULLdss>; - -// Scalar Signed Integer Convert To Floating-point -defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">; -defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps, - SCVTFss, SCVTFdd>; - -// Scalar Unsigned Integer Convert To Floating-point -defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">; -defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu, - UCVTFss, UCVTFdd>; - -// Scalar Floating-point Converts -def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">; -def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn, - FCVTXN>; - -defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns, - FCVTNSss, FCVTNSdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>; - -defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu, - FCVTNUss, FCVTNUdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>; - -defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms, - FCVTMSss, FCVTMSdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>; - -defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu, - FCVTMUss, FCVTMUdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>; - -defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas, - FCVTASss, FCVTASdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>; - -defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau, - FCVTAUss, FCVTAUdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>; - -defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps, - FCVTPSss, FCVTPSdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>; - -defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu, - FCVTPUss, FCVTPUdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>; - -defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs, - FCVTZSss, FCVTZSdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs, - FCVTZSdd>; - -defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu, - FCVTZUss, FCVTZUdd>; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu, - FCVTZUdd>; - -// Patterns For Convert Instructions Between v1f64 and v1i64 -class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode, - Instruction INST> - : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode, - Instruction INST> - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>; -def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>; - -def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>; -def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>; - -// Scalar Floating-point Reciprocal Estimate -defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">; -defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe, - FRECPEss, FRECPEdd>; -def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe, - FRECPEdd>; - -// Scalar Floating-point Reciprocal Exponent -defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">; -defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx, - FRECPXss, FRECPXdd>; - -// Scalar Floating-point Reciprocal Square Root Estimate -defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">; -defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte, - FRSQRTEss, FRSQRTEdd>; -def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte, - FRSQRTEdd>; - -// Scalar Floating-point Round -class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST> - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>; -def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>; -def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>; -def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>; -def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>; -def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>; -def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>; - -// Scalar Integer Compare - -// Scalar Compare Bitwise Equal -def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>; - -class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode, - Instruction INSTD, - CondCode CC> - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - -def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>; - -// Scalar Compare Signed Greather Than Or Equal -def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>; - -// Scalar Compare Unsigned Higher Or Same -def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>; - -// Scalar Compare Unsigned Higher -def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>; - -// Scalar Compare Signed Greater Than -def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>; - -// Scalar Compare Bitwise Test Bits -def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">; -defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>; -defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>; - -// Scalar Compare Bitwise Equal To Zero -def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq, - CMEQddi>; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>; - -// Scalar Compare Signed Greather Than Or Equal To Zero -def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge, - CMGEddi>; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>; - -// Scalar Compare Signed Greater Than Zero -def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt, - CMGTddi>; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>; - -// Scalar Compare Signed Less Than Or Equal To Zero -def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez, - CMLEddi>; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>; - -// Scalar Compare Less Than Zero -def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz, - CMLTddi>; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>; - -// Scalar Floating-point Compare - -// Scalar Floating-point Compare Mask Equal -defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">; -defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32, - FCMEQsss, v1i64, f64, FCMEQddd>; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>; - -// Scalar Floating-point Compare Mask Equal To Zero -defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ, - FCMEQZssi, FCMEQZddi>; - -// Scalar Floating-point Compare Mask Greater Than Or Equal -defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; -defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32, - FCMGEsss, v1i64, f64, FCMGEddd>; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>; - -// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero -defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE, - FCMGEZssi, FCMGEZddi>; - -// Scalar Floating-point Compare Mask Greather Than -defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">; -defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32, - FCMGTsss, v1i64, f64, FCMGTddd>; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>; - -// Scalar Floating-point Compare Mask Greather Than Zero -defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT, - FCMGTZssi, FCMGTZddi>; - -// Scalar Floating-point Compare Mask Less Than Or Equal To Zero -defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE, - FCMLEZssi, FCMLEZddi>; - -// Scalar Floating-point Compare Mask Less Than Zero -defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT, - FCMLTZssi, FCMLTZddi>; - -// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal -defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">; -defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32, - FACGEsss, v1i64, f64, FACGEddd>; -def : Pat<(v1i64 (int_arm_neon_vacge (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FACGEddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Absolute Compare Mask Greater Than -defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; -defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32, - FACGTsss, v1i64, f64, FACGTddd>; -def : Pat<(v1i64 (int_arm_neon_vacgt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FACGTddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Absolute Difference -defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">; -defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32, - FABDsss, f64, f64, FABDddd>; - -// Scalar Absolute Value -defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">; -defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>; - -// Scalar Signed Saturating Absolute Value -defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">; -defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs, - SQABSbb, SQABShh, SQABSss, SQABSdd>; - -// Scalar Negate -defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">; -defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>; - -// Scalar Signed Saturating Negate -defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">; -defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg, - SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>; - -// Scalar Signed Saturating Accumulated of Unsigned Value -defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">; -defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd, - SUQADDbb, SUQADDhh, - SUQADDss, SUQADDdd>; - -// Scalar Unsigned Saturating Accumulated of Signed Value -defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">; -defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd, - USQADDbb, USQADDhh, - USQADDss, USQADDdd>; - -def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src), - (v1i64 FPR64:$Rn))), - (SUQADDdd FPR64:$Src, FPR64:$Rn)>; - -def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src), - (v1i64 FPR64:$Rn))), - (USQADDdd FPR64:$Src, FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))), - (ABSdd FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))), - (SQABSdd FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))), - (SQNEGdd FPR64:$Rn)>; - -def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))), - (v1i64 FPR64:$Rn))), - (NEGdd FPR64:$Rn)>; - -// Scalar Signed Saturating Extract Unsigned Narrow -defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu, - SQXTUNbh, SQXTUNhs, - SQXTUNsd>; - -// Scalar Signed Saturating Extract Narrow -defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns, - SQXTNbh, SQXTNhs, - SQXTNsd>; - -// Scalar Unsigned Saturating Extract Narrow -defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu, - UQXTNbh, UQXTNhs, - UQXTNsd>; - -// Scalar Reduce Pairwise - -multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode, - (outs FPR64:$Rd), (ins VPR128:$Rn), - !strconcat(asmop, "\t$Rd, $Rn.2d"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode, - string asmop, bit Commutable = 0> - : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> { - let isCommutable = Commutable in { - def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode, - (outs FPR32:$Rd), (ins VPR64:$Rn), - !strconcat(asmop, "\t$Rd, $Rn.2s"), - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -// Scalar Reduce Addition Pairwise (Integer) with -// Pattern to match llvm.arm.* intrinsic -defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>; - -// Pattern to match llvm.aarch64.* intrinsic for -// Scalar Reduce Addition Pairwise (Integer) -def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), - (ADDPvv_D_2D VPR128:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))), - (ADDPvv_D_2D VPR128:$Rn)>; - -// Scalar Reduce Addition Pairwise (Floating Point) -defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; - -// Scalar Reduce Maximum Pairwise (Floating Point) -defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>; - -// Scalar Reduce Minimum Pairwise (Floating Point) -defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>; - -// Scalar Reduce maxNum Pairwise (Floating Point) -defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; - -// Scalar Reduce minNum Pairwise (Floating Point) -defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; - -multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))), - (INSTS VPR64:$Rn)>; - def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))), - (INSTD VPR128:$Rn)>; -} - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) -defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd, - FADDPvv_S_2S, FADDPvv_D_2D>; - -defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax, - FMAXPvv_S_2S, FMAXPvv_D_2D>; - -defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin, - FMINPvv_S_2S, FMINPvv_D_2D>; - -defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm, - FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; - -defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm, - FMINNMPvv_S_2S, FMINNMPvv_D_2D>; - -def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))), - (FADDPvv_S_2S (v2f32 - (EXTRACT_SUBREG - (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))), - sub_64)))>; - -// Scalar by element Arithmetic - -class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode, - string rmlane, bit u, bit szhi, bit szlo, - RegisterClass ResFPR, RegisterClass OpFPR, - RegisterOperand OpVPR, Operand OpImm> - : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode, - (outs ResFPR:$Rd), - (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), - asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", - [], - NoItinerary>, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]> { - bits<3> Imm; - bits<5> MRm; -} - -class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode, - string rmlane, - bit u, bit szhi, bit szlo, - RegisterClass ResFPR, - RegisterClass OpFPR, - RegisterOperand OpVPR, - Operand OpImm> - : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode, - (outs ResFPR:$Rd), - (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), - asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", - [], - NoItinerary>, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; - bits<3> Imm; - bits<5> MRm; -} - -// Scalar Floating Point multiply (scalar, by element) -def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul", - 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul", - 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -// Scalar Floating Point multiply extended (scalar, by element) -def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx", - 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx", - 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns< - SDPatternOperator opnode, - Instruction INST, - ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, - ValueType OpNTy, ValueType ExTy, Operand OpNImm> { - - def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))), - (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))), - (ResTy (INST (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped operands - def : Pat<(ResTy (opnode - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Rn))), - (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Rn))), - (ResTy (INST (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for Scalar Floating Point multiply (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S, - f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>; -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D, - f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>; - -// Patterns for Scalar Floating Point multiply extended (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx, - FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare, - v2f32, v4f32, neon_uimm1_bare>; -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx, - FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare, - v1f64, v2f64, neon_uimm0_bare>; - -// Scalar Floating Point fused multiply-add (scalar, by element) -def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", - 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", - 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -// Scalar Floating Point fused multiply-subtract (scalar, by element) -def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", - 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", - 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} -// We are allowed to match the fma instruction regardless of compile options. -multiclass Neon_ScalarXIndexedElem_FMA_Patterns< - Instruction FMLAI, Instruction FMLSI, - ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, - ValueType OpNTy, ValueType ExTy, Operand OpNImm> { - // fmla - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped fmla operands - def : Pat<(ResTy (fma - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // fmls - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped fmls operands - def : Pat<(ResTy (fma - (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma - (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; -} - -// Scalar Floating Point fused multiply-add and -// multiply-subtract (scalar, by element) -defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S, - f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>; -defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D, - f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>; -defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D, - f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>; - -// Scalar Signed saturating doubling multiply long (scalar, by element) -def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MUL_Patterns< - SDPatternOperator opnode, - Instruction INST, - ValueType ResTy, RegisterClass FPRC, - ValueType OpVTy, ValueType OpTy, - ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { - - def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), - (OpVTy (scalar_to_vector - (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), - (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - //swapped operands - def : Pat<(ResTy (opnode - (OpVTy (scalar_to_vector - (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))), - (OpVTy FPRC:$Rn))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)), - (OpVTy FPRC:$Rn))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; -} - - -// Patterns for Scalar Signed saturating doubling -// multiply long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull, - SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16, - i32, VPR64Lo, neon_uimm2_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull, - SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16, - i32, VPR128Lo, neon_uimm3_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull, - SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32, - i32, VPR64Lo, neon_uimm1_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull, - SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32, - i32, VPR128Lo, neon_uimm2_bare>; - -// Scalar Signed saturating doubling multiply-add long (scalar, by element) -def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -// Scalar Signed saturating doubling -// multiply-subtract long (scalar, by element) -def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MLAL_Patterns< - SDPatternOperator opnode, - SDPatternOperator coreopnode, - Instruction INST, - ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC, - ValueType OpTy, - ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode (OpTy FPRC:$Rn), - (OpTy (scalar_to_vector - (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode (OpTy FPRC:$Rn), - (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - // swapped operands - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode - (OpTy (scalar_to_vector - (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))), - (OpTy FPRC:$Rn))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode - (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)), - (OpTy FPRC:$Rn))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; -} - -// Patterns for Scalar Signed saturating -// doubling multiply-add long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds, - int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16, - i32, VPR64Lo, neon_uimm2_bare>; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds, - int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16, - i32, VPR128Lo, neon_uimm3_bare>; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds, - int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32, - i32, VPR64Lo, neon_uimm1_bare>; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds, - int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32, - i32, VPR128Lo, neon_uimm2_bare>; - -// Patterns for Scalar Signed saturating -// doubling multiply-sub long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs, - int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16, - i32, VPR64Lo, neon_uimm2_bare>; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs, - int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16, - i32, VPR128Lo, neon_uimm3_bare>; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs, - int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32, - i32, VPR64Lo, neon_uimm1_bare>; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs, - int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32, - i32, VPR128Lo, neon_uimm2_bare>; - -// Scalar Signed saturating doubling multiply returning -// high half (scalar, by element) -def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -// Patterns for Scalar Signed saturating doubling multiply returning -// high half (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh, - SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, - i32, VPR64Lo, neon_uimm2_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh, - SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, - i32, VPR128Lo, neon_uimm3_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh, - SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, - i32, VPR64Lo, neon_uimm1_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh, - SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, - i32, VPR128Lo, neon_uimm2_bare>; - -// Scalar Signed saturating rounding doubling multiply -// returning high half (scalar, by element) -def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh, - SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32, - VPR64Lo, neon_uimm2_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh, - SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32, - VPR128Lo, neon_uimm3_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh, - SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32, - VPR64Lo, neon_uimm1_bare>; -defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh, - SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32, - VPR128Lo, neon_uimm2_bare>; - -// Scalar general arithmetic operation -class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode, - Instruction INST> - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode, - Instruction INST> - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (INST FPR64:$Rn, FPR64:$Rm)>; - -class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode, - Instruction INST> - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), - (v1f64 FPR64:$Ra))), - (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; - -def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>; -def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>; -def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>; -def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>; -def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>; -def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>; -def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>; -def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>; -def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>; - -def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>; -def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>; - -def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>; -def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>; - -// Scalar Copy - DUP element to scalar -class NeonI_Scalar_DUP<string asmop, string asmlane, - RegisterClass ResRC, RegisterOperand VPRC, - Operand OpImm> - : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm), - asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)), - (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>; - -def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)), - (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>; -def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)), - (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>; - -def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)), - (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>; -def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)), - (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - 1))>; - -def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)), - (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>; - -multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI, - ValueType ResTy, ValueType OpTy,Operand OpLImm, - ValueType NOpTy, ValueType ExTy, Operand OpNImm> { - - def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)), - (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>; - - def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for extract subvectors of v1ix data using scalar DUP instructions. -defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare, - v8i8, v16i8, neon_uimm3_bare>; -defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare, - v4i16, v8i16, neon_uimm2_bare>; -defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare, - v2i32, v4i32, neon_uimm1_bare>; - -multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy, - ValueType OpTy, ValueType ElemTy, - Operand OpImm, ValueType OpNTy, - ValueType ExTy, Operand OpNImm> { - - def : Pat<(ResTy (vector_insert (ResTy undef), - (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), - (neon_uimm0_bare:$Imm))), - (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; - - def : Pat<(ResTy (vector_insert (ResTy undef), - (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), - (OpNImm:$Imm))), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy, - ValueType OpTy, ValueType ElemTy, - Operand OpImm, ValueType OpNTy, - ValueType ExTy, Operand OpNImm> { - - def : Pat<(ResTy (scalar_to_vector - (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))), - (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; - - def : Pat<(ResTy (scalar_to_vector - (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP -// instructions. -defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D, - v1i64, v2i64, i64, neon_uimm1_bare, - v1i64, v2i64, neon_uimm0_bare>; -defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S, - v1i32, v4i32, i32, neon_uimm2_bare, - v2i32, v4i32, neon_uimm1_bare>; -defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H, - v1i16, v8i16, i32, neon_uimm3_bare, - v4i16, v8i16, neon_uimm2_bare>; -defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B, - v1i8, v16i8, i32, neon_uimm4_bare, - v8i8, v16i8, neon_uimm3_bare>; -defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D, - v1i64, v2i64, i64, neon_uimm1_bare, - v1i64, v2i64, neon_uimm0_bare>; -defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S, - v1i32, v4i32, i32, neon_uimm2_bare, - v2i32, v4i32, neon_uimm1_bare>; -defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H, - v1i16, v8i16, i32, neon_uimm3_bare, - v4i16, v8i16, neon_uimm2_bare>; -defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B, - v1i8, v16i8, i32, neon_uimm4_bare, - v8i8, v16i8, neon_uimm3_bare>; - -multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane, - Instruction DUPI, Operand OpImm, - RegisterClass ResRC> { - def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"), - (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>; -} - -// Aliases for Scalar copy - DUP element (scalar) -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>; -defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>; -defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; -defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; - -multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy, - ValueType OpTy> { - def : Pat<(ResTy (GetLow VPR128:$Rn)), - (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; - def : Pat<(ResTy (GetHigh VPR128:$Rn)), - (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; -} - -defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>; -defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>; -defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>; -defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>; -defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>; -defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>; - -// The following is for sext/zext from v1xx to v1xx -multiclass NeonI_ext<string prefix, SDNode ExtOp> { - // v1i32 -> v1i64 - def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))), - (EXTRACT_SUBREG - (v2i64 (!cast<Instruction>(prefix # "_2S") - (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)), - sub_64)>; - - // v1i16 -> v1i32 - def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))), - (EXTRACT_SUBREG - (v4i32 (!cast<Instruction>(prefix # "_4H") - (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)), - sub_32)>; - - // v1i8 -> v1i16 - def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v8i16 (!cast<Instruction>(prefix # "_8B") - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)>; -} - -defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>; -defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>; - -// zext v1i8 -> v1i32 -def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))), - (v1i32 (EXTRACT_SUBREG - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i8 (DUPbv_B - (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), - 0)), - sub_8)), - sub_32))>; - -// zext v1i8 -> v1i64 -def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))), - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i8 (DUPbv_B - (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), - 0)), - sub_8))>; - -// zext v1i16 -> v1i64 -def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))), - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i16 (DUPhv_H - (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), - 0)), - sub_16))>; - -// sext v1i8 -> v1i32 -def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), - (v1i16 (EXTRACT_SUBREG - (v8i16 (SSHLLvvi_8B - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)), - sub_16)), 0)), - sub_32)>; - -// sext v1i8 -> v1i64 -def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v2i64 (SSHLLvvi_2S - (v2i32 (SUBREG_TO_REG (i64 0), - (v1i32 (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), - (v1i16 (EXTRACT_SUBREG - (v8i16 (SSHLLvvi_8B - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)), - sub_16)), 0)), - sub_32)), - sub_32)), 0)), - sub_64)>; - - -// sext v1i16 -> v1i64 -def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))), - (EXTRACT_SUBREG - (v2i64 (SSHLLvvi_2S - (v2i32 (SUBREG_TO_REG (i64 0), - (v1i32 (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)), - sub_32)), - sub_32)), 0)), - sub_64)>; - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// 64-bit vector bitcasts... - -def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v1f64 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1f64 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1f64 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1f64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1f64 VPR64:$src))), (v8i8 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 VPR64:$src))), (f64 VPR64:$src)>; - -def : Pat<(v1f64 (bitconvert (v1i64 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2f32 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2i32 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v4i16 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v8i8 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 VPR64:$src))), (v1f64 VPR64:$src)>; - -// ..and 128-bit vector bitcasts... - -def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; - -// ...and scalar bitcasts... -def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; -def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; -def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; - -def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>; - -def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>; - -def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; - -def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>; - -def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; - -def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; -def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; - -def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>; - -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; - -// Scalar Three Same - -def neon_uimm3 : Operand<i64>, - ImmLeaf<i64, [{return Imm < 8;}]> { - let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -def neon_uimm4 : Operand<i64>, - ImmLeaf<i64, [{return Imm < 16;}]> { - let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -// Bitwise Extract -class NeonI_Extract<bit q, bits<2> op2, string asmop, - string OpS, RegisterOperand OpVPR, Operand OpImm> - : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd), - (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index), - asmop # "\t$Rd." # OpS # ", $Rn." # OpS # - ", $Rm." # OpS # ", $Index", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>{ - bits<4> Index; -} - -def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b", - VPR64, neon_uimm3> { - let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}}; -} - -def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b", - VPR128, neon_uimm4> { - let Inst{14-11} = Index; -} - -class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST, - Operand OpImm> - : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm), - (i64 OpImm:$Imm))), - (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>; - -def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>; -def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>; -def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>; -def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>; -def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>; -def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>; -def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>; -def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>; -def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>; -def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>; -def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>; -def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>; - -// Table lookup -class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op, - string asmop, string OpS, RegisterOperand OpVPR, - RegisterOperand VecList> - : NeonI_TBL<q, op2, len, op, - (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS, - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -// The vectors in look up table are always 16b -multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> { - def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64, - !cast<RegisterOperand>(List # "16B_operand")>; - - def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128, - !cast<RegisterOperand>(List # "16B_operand")>; -} - -defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">; -defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">; -defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">; -defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">; - -// Table lookup extension -class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op, - string asmop, string OpS, RegisterOperand OpVPR, - RegisterOperand VecList> - : NeonI_TBL<q, op2, len, op, - (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm), - asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS, - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// The vectors in look up table are always 16b -multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> { - def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64, - !cast<RegisterOperand>(List # "16B_operand")>; - - def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128, - !cast<RegisterOperand>(List # "16B_operand")>; -} - -defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">; -defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">; -defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">; -defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">; - -class NeonI_INS_main<string asmop, string Res, ValueType ResTy, - RegisterClass OpGPR, ValueType OpTy, Operand OpImm> - : NeonI_copy<0b1, 0b0, 0b0011, - (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm), - asmop # "\t$Rd." # Res # "[$Imm], $Rn", - [(set (ResTy VPR128:$Rd), - (ResTy (vector_insert - (ResTy VPR128:$src), - (OpTy OpGPR:$Rn), - (OpImm:$Imm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<4> Imm; - let Constraints = "$src = $Rd"; -} - -//Insert element (vector, from main) -def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64, - neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn", - (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn", - (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn", - (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn", - (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>; - -class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy, - RegisterClass OpGPR, ValueType OpTy, - Operand OpImm, Instruction INS> - : Pat<(ResTy (vector_insert - (ResTy VPR64:$src), - (OpTy OpGPR:$Rn), - (OpImm:$Imm))), - (ResTy (EXTRACT_SUBREG - (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - OpGPR:$Rn, OpImm:$Imm)), sub_64))>; - -def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32, - neon_uimm3_bare, INSbw>; -def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32, - neon_uimm2_bare, INShw>; -def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32, - neon_uimm1_bare, INSsw>; -def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64, - neon_uimm0_bare, INSdx>; - -class NeonI_INS_element<string asmop, string Res, Operand ResImm> - : NeonI_insert<0b1, 0b1, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, - ResImm:$Immd, ResImm:$Immn), - asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - bits<4> Immd; - bits<4> Immn; -} - -//Insert element (vector, from element) -def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> { - let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1}; - let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}}; -} -def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> { - let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0}; - let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0}; - // bit 11 is unspecified, but should be set to zero. -} -def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> { - let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0}; - let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0}; - // bits 11-12 are unspecified, but should be set to zero. -} -def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> { - let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0}; - let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0}; - // bits 11-13 are unspecified, but should be set to zero. -} - -def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]", - (INSELb VPR128:$Rd, VPR128:$Rn, - neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]", - (INSELh VPR128:$Rd, VPR128:$Rn, - neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]", - (INSELs VPR128:$Rd, VPR128:$Rn, - neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]", - (INSELd VPR128:$Rd, VPR128:$Rn, - neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>; - -multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy, - ValueType MidTy, Operand StImm, Operand NaImm, - Instruction INS> { -def : Pat<(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy (vector_extract - (ResTy VPR128:$Rn), - (StImm:$Immn))), - (StImm:$Immd))), - (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn), - StImm:$Immd, StImm:$Immn)>; - -def : Pat <(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy (vector_extract - (NaTy VPR64:$Rn), - (NaImm:$Immn))), - (StImm:$Immd))), - (INS (ResTy VPR128:$src), - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), - StImm:$Immd, NaImm:$Immn)>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy (vector_extract - (ResTy VPR128:$Rn), - (StImm:$Immn))), - (NaImm:$Immd))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy VPR128:$Rn), - NaImm:$Immd, StImm:$Immn)), - sub_64))>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy (vector_extract - (NaTy VPR64:$Rn), - (NaImm:$Immn))), - (NaImm:$Immd))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), - NaImm:$Immd, NaImm:$Immn)), - sub_64))>; -} - -defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare, - neon_uimm1_bare, INSELs>; -defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare, - neon_uimm0_bare, INSELd>; -defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare, - neon_uimm3_bare, INSELb>; -defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare, - neon_uimm2_bare, INSELh>; -defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare, - neon_uimm1_bare, INSELs>; -defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare, - neon_uimm0_bare, INSELd>; - -multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy, - ValueType MidTy, - RegisterClass OpFPR, Operand ResImm, - SubRegIndex SubIndex, Instruction INS> { -def : Pat <(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy OpFPR:$Rn), - (ResImm:$Imm))), - (INS (ResTy VPR128:$src), - (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)), - ResImm:$Imm, - (i64 0))>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy OpFPR:$Rn), - (ResImm:$Imm))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)), - ResImm:$Imm, - (i64 0))), - sub_64))>; -} - -defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare, - sub_32, INSELs>; -defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare, - sub_64, INSELd>; - -class NeonI_SMOV<string asmop, string Res, bit Q, - ValueType OpTy, ValueType eleTy, - Operand OpImm, RegisterClass ResGPR, ValueType ResTy> - : NeonI_copy<Q, 0b0, 0b0101, - (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm), - asmop # "\t$Rd, $Rn." # Res # "[$Imm]", - [(set (ResTy ResGPR:$Rd), - (ResTy (sext_inreg - (ResTy (vector_extract - (OpTy VPR128:$Rn), (OpImm:$Imm))), - eleTy)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -//Signed integer move (main, from element) -def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy, - ValueType eleTy, Operand StImm, Operand NaImm, - Instruction SMOVI> { - def : Pat<(i64 (sext_inreg - (i64 (anyext - (i32 (vector_extract - (StTy VPR128:$Rn), (StImm:$Imm))))), - eleTy)), - (SMOVI VPR128:$Rn, StImm:$Imm)>; - - def : Pat<(i64 (sext - (i32 (vector_extract - (StTy VPR128:$Rn), (StImm:$Imm))))), - (SMOVI VPR128:$Rn, StImm:$Imm)>; - - def : Pat<(i64 (sext_inreg - (i64 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - - def : Pat<(i64 (sext_inreg - (i64 (anyext - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - - def : Pat<(i64 (sext - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))))), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; -} - -defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare, - neon_uimm3_bare, SMOVxb>; -defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare, - neon_uimm2_bare, SMOVxh>; -defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare, - neon_uimm1_bare, SMOVxs>; - -class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy, - ValueType eleTy, Operand StImm, Operand NaImm, - Instruction SMOVI> - : Pat<(i32 (sext_inreg - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - -def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare, - neon_uimm3_bare, SMOVwb>; -def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare, - neon_uimm2_bare, SMOVwh>; - -class NeonI_UMOV<string asmop, string Res, bit Q, - ValueType OpTy, Operand OpImm, - RegisterClass ResGPR, ValueType ResTy> - : NeonI_copy<Q, 0b0, 0b0111, - (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm), - asmop # "\t$Rd, $Rn." # Res # "[$Imm]", - [(set (ResTy ResGPR:$Rd), - (ResTy (vector_extract - (OpTy VPR128:$Rn), (OpImm:$Imm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -//Unsigned integer move (main, from element) -def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare, - GPR64, i64> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]", - (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]", - (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>; - -class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy, - Operand StImm, Operand NaImm, - Instruction SMOVI> - : Pat<(ResTy (vector_extract - (NaTy VPR64:$Rn), NaImm:$Imm)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - -def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare, - neon_uimm3_bare, UMOVwb>; -def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare, - neon_uimm2_bare, UMOVwh>; -def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare, - neon_uimm1_bare, UMOVws>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))), - 255)), - (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))), - 65535)), - (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>; - -def : Pat<(i64 (zext - (i32 (vector_extract - (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))), - (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))), - 255)), - (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm3_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))), - 65535)), - (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm2_bare:$Imm)>; - -def : Pat<(i64 (zext - (i32 (vector_extract - (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))), - (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm0_bare:$Imm)>; - -// Additional copy patterns for scalar types -def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))), - (UMOVwb (v16i8 - (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>; - -def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))), - (UMOVwh (v8i16 - (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>; - -def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), - (FMOVws FPR32:$Rn)>; - -def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), - (FMOVxd FPR64:$Rn)>; - -def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), - (f64 FPR64:$Rn)>; - -def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), - (v1i8 (EXTRACT_SUBREG (v16i8 - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_8))>; - -def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)), - (v1i16 (EXTRACT_SUBREG (v8i16 - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_16))>; - -def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), - (FMOVsw $src)>; - -def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), - (FMOVdx $src)>; - -def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), - (v8i8 (EXTRACT_SUBREG (v16i8 - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), - (v4i16 (EXTRACT_SUBREG (v8i16 - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)), - (v2i32 (EXTRACT_SUBREG (v16i8 - (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)), - (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)), - (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>; -def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>; - -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), - (v1f64 FPR64:$Rn)>; - -def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), - (f64 FPR64:$src), sub_64)>; - -class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane, - RegisterOperand ResVPR, Operand OpImm> - : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd), - (ins VPR128:$Rn, OpImm:$Imm), - asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} - -def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} - -def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, - neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} - -def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} - -def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy, - ValueType OpTy,ValueType NaTy, - ValueType ExTy, Operand OpLImm, - Operand OpNImm> { -def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)), - (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>; - -def : Pat<(ResTy (Neon_vduplane - (NaTy VPR64:$Rn), OpNImm:$Imm)), - (ResTy (DUPELT - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>; -} -defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8, - neon_uimm4_bare, neon_uimm3_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8, - neon_uimm4_bare, neon_uimm3_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16, - neon_uimm3_bare, neon_uimm2_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16, - neon_uimm3_bare, neon_uimm2_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32, - neon_uimm2_bare, neon_uimm1_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32, - neon_uimm2_bare, neon_uimm1_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64, - neon_uimm1_bare, neon_uimm0_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32, - neon_uimm2_bare, neon_uimm1_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32, - neon_uimm2_bare, neon_uimm1_bare>; -defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64, - neon_uimm1_bare, neon_uimm0_bare>; - -def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))), - (v2f32 (DUPELT2s - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (i64 0)))>; -def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))), - (v4f32 (DUPELT4s - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (i64 0)))>; -def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))), - (v2f64 (DUPELT2d - (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64), - (i64 0)))>; - -multiclass NeonI_DUP_pattern<Instruction DUPELT, ValueType ResTy, - ValueType OpTy, RegisterClass OpRC, - Operand OpNImm, SubRegIndex SubIndex> { -def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)), - (ResTy (DUPELT - (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>; -} - -defm : NeonI_DUP_pattern<DUPELT4h, v4i16, v1i16, FPR16, neon_uimm2_bare,sub_16>; -defm : NeonI_DUP_pattern<DUPELT4s, v4i32, v1i32, FPR32, neon_uimm2_bare,sub_32>; -defm : NeonI_DUP_pattern<DUPELT8b, v8i8, v1i8, FPR8, neon_uimm3_bare, sub_8>; -defm : NeonI_DUP_pattern<DUPELT8h, v8i16, v1i16, FPR16, neon_uimm3_bare,sub_16>; -defm : NeonI_DUP_pattern<DUPELT16b, v16i8, v1i8, FPR8, neon_uimm4_bare, sub_8>; - -class NeonI_DUP<bit Q, string asmop, string rdlane, - RegisterOperand ResVPR, ValueType ResTy, - RegisterClass OpGPR, ValueType OpTy> - : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn), - asmop # "\t$Rd" # rdlane # ", $Rn", - [(set (ResTy ResVPR:$Rd), - (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> { - let Inst{20-16} = 0b00001; - // bits 17-20 are unspecified, but should be set to zero. -} - -def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> { - let Inst{20-16} = 0b00010; - // bits 18-20 are unspecified, but should be set to zero. -} - -def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> { - let Inst{20-16} = 0b00100; - // bits 19-20 are unspecified, but should be set to zero. -} - -def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> { - let Inst{20-16} = 0b01000; - // bit 20 is unspecified, but should be set to zero. -} - -def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> { - let Inst{20-16} = 0b00001; - // bits 17-20 are unspecified, but should be set to zero. -} - -def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> { - let Inst{20-16} = 0b00010; - // bits 18-20 are unspecified, but should be set to zero. -} - -def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> { - let Inst{20-16} = 0b00100; - // bits 19-20 are unspecified, but should be set to zero. -} - -// patterns for CONCAT_VECTORS -multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> { -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)), - (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>; -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))), - (INSELd - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)), - (i64 1), - (i64 0))>; -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))), - (DUPELT2d - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (i64 0))> ; -} - -defm : Concat_Vector_Pattern<v16i8, v8i8>; -defm : Concat_Vector_Pattern<v8i16, v4i16>; -defm : Concat_Vector_Pattern<v4i32, v2i32>; -defm : Concat_Vector_Pattern<v2i64, v1i64>; -defm : Concat_Vector_Pattern<v4f32, v2f32>; -defm : Concat_Vector_Pattern<v2f64, v1f64>; - -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), undef)), - (v2i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32))>; -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (v4i32 (INSELs - (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)), - (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - (i64 1), - (i64 0))), - sub_64)>; -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))), - (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>; - -//patterns for EXTRACT_SUBVECTOR -def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))), - (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))), - (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))), - (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))), - (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))), - (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))), - (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; - -// The followings are for instruction class (3V Elem) - -// Variant 1 - -class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, string EleOpS, - Operand OpImm, RegisterOperand ResVPR, - RegisterOperand OpVPR, RegisterOperand EleOpVPR> - : NeonI_2VElem<q, u, size, opcode, - (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn, - EleOpVPR:$Re, OpImm:$Index), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # - ", $Re." # EleOpS # "[$Index]", - [], - NoItinerary>, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - bits<3> Index; - bits<5> Re; - - let Constraints = "$src = $Rd"; -} - -multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", - neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">; -defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">; - -// Pattern for lane in 128-bit vector -class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand ResVPR, RegisterOperand OpVPR, - RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy, - ValueType EleOpTy> - : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand ResVPR, RegisterOperand OpVPR, - RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy, - ValueType EleOpTy> - : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST ResVPR:$src, OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op> -{ - def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>; - - def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>; - - def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare, - op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; - - def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare, - op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>; - - def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare, - op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; -} - -defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>; -defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>; - -class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode, - string asmop, string ResS, string OpS, string EleOpS, - Operand OpImm, RegisterOperand ResVPR, - RegisterOperand OpVPR, RegisterOperand EleOpVPR> - : NeonI_2VElem<q, u, size, opcode, - (outs ResVPR:$Rd), (ins OpVPR:$Rn, - EleOpVPR:$Re, OpImm:$Index), - asmop # "\t$Rd." # ResS # ", $Rn." # OpS # - ", $Re." # EleOpS # "[$Index]", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<3> Index; - bits<5> Re; -} - -multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", - neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; -defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; -defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; -} - -// Pattern for lane in 128-bit vector -class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand OpVPR, RegisterOperand EleOpVPR, - ValueType ResTy, ValueType OpTy, ValueType EleOpTy> - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand OpVPR, RegisterOperand EleOpVPR, - ValueType ResTy, ValueType OpTy, ValueType EleOpTy> - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> { - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR128, v2i32, v2i32, v4i32>; - - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, v4i32, v4i32, v4i32>; - - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare, - op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; - - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare, - op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, v2i32, v2i32, v2i32>; - - def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare, - op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; -} - -defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>; -defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>; -defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>; - -// Variant 2 - -multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // _1d2d doesn't exist! - - def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", - neon_uimm1_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{0}}; - let Inst{21} = 0b0; - let Inst{20-16} = Re; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; -defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; -} - -class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand OpVPR, RegisterOperand EleOpVPR, - ValueType ResTy, ValueType OpTy, ValueType EleOpTy, - SDPatternOperator coreop> - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))), - (INST OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>; - -multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> { - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR128, v2f32, v2f32, v4f32>; - - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, v4f32, v4f32, v4f32>; - - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare, - op, VPR128, VPR128, v2f64, v2f64, v2f64>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, v2f32, v2f32, v2f32>; - - def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare, - op, VPR128, VPR64, v2f64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; -} - -defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>; -defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>; - -def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))), - (v2f32 VPR64:$Rn))), - (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))), - (v4f32 VPR128:$Rn))), - (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))), - (v2f64 VPR128:$Rn))), - (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>; - -// The followings are patterns using fma -// -ffp-contract=fast generates fma - -multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // _1d2d doesn't exist! - - def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", - neon_uimm1_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{0}}; - let Inst{21} = 0b0; - let Inst{20-16} = Re; - } -} - -defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">; -defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">; - -// Pattern for lane in 128-bit vector -class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand ResVPR, RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, - SDPatternOperator coreop> - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane 0 -class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op, - RegisterOperand ResVPR, ValueType ResTy> - : Pat<(ResTy (op (ResTy ResVPR:$Rn), - (ResTy (Neon_vdup (f32 FPR32:$Re))), - (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -// Pattern for lane in 64-bit vector -class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand ResVPR, RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, - SDPatternOperator coreop> - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm, - SDPatternOperator op, - RegisterOperand ResVPR, RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, - SDPatternOperator coreop> - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>; - - -multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> { - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"), - op, VPR64, v2f32>; - - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"), - op, VPR128, v4f32>; - - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; -} - -defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>; - -// Pattern for lane 0 -class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op, - RegisterOperand ResVPR, ValueType ResTy> - : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)), - (ResTy (Neon_vdup (f32 FPR32:$Re))), - (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op> -{ - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"), - op, VPR64, v2f32>; - - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"), - op, VPR128, v4f32>; - - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"), - neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, - BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"), - neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, - BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(fneg (Neon_combine_2d - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d - (fneg node:$LHS), (fneg node:$RHS))>>; -} - -defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>; - -// Variant 3: Long type -// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S -// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S - -multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", - neon_uimm2_bare, VPR128, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", - neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">; -defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">; -defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">; -defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">; -defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">; -defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">; - -multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", - neon_uimm2_bare, VPR128, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", - neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; -defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; -defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; -} - -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), - (FMOVdd $src)>; - -// Pattern for lane in 128-bit vector -class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand EleOpVPR, ValueType ResTy, - ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy, - SDPatternOperator hiop> - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand EleOpVPR, ValueType ResTy, - ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy, - SDPatternOperator hiop> - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$src, VPR128:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op, - ValueType ResTy, ValueType OpTy, ValueType HalfOpTy, - SDPatternOperator hiop, Instruction DupInst> - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), - (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>; - -multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> { - def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare, - op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; - - def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare, - op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>; - - def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare, - op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare, - op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"), - op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"), - op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare, - op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; - - def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare, - op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>; - - def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare, - op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare, - op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>; -defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>; -defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>; -defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>; - -// Pattern for lane in 128-bit vector -class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand EleOpVPR, ValueType ResTy, - ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy, - SDPatternOperator hiop> - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op, - RegisterOperand EleOpVPR, ValueType ResTy, - ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy, - SDPatternOperator hiop> - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -// Pattern for fixed lane 0 -class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op, - ValueType ResTy, ValueType OpTy, ValueType HalfOpTy, - SDPatternOperator hiop, Instruction DupInst> - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), - (INST VPR128:$Rn, (DupInst $Re), 0)>; - -multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> { - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare, - op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; - - def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare, - op, VPR64, VPR128, v2i64, v2i32, v4i32>; - - def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare, - op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare, - op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"), - op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"), - op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare, - op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; - - def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare, - op, VPR64, VPR64, v2i64, v2i32, v2i32>; - - def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare, - op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare, - op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>; -defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>; -defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>; - -multiclass NI_qdma<SDPatternOperator op> { - def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (op node:$Ra, - (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; - - def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (op node:$Ra, - (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; -} - -defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>; -defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>; - -multiclass NI_2VEL_v3_qdma_pat<string subop, string op> { - def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare, - !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo, - v4i32, v4i16, v8i16>; - - def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare, - !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128, - v2i64, v2i32, v4i32>; - - def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare, - !cast<PatFrag>(op # "_4s"), VPR128Lo, - v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare, - !cast<PatFrag>(op # "_2d"), VPR128, - v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"), - !cast<PatFrag>(op # "_4s"), - v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"), - !cast<PatFrag>(op # "_2d"), - v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare, - !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo, - v4i32, v4i16, v4i16>; - - def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare, - !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64, - v2i64, v2i32, v2i32>; - - def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare, - !cast<PatFrag>(op # "_4s"), VPR64Lo, - v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare, - !cast<PatFrag>(op # "_2d"), VPR64, - v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">; -defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">; - -// End of implementation for instruction class (3V Elem) - -class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U, - bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy, - SDPatternOperator Neon_Rev> - : NeonI_2VMisc<Q, U, size, opcode, - (outs ResVPR:$Rd), (ins ResVPR:$Rn), - asmop # "\t$Rd." # Res # ", $Rn." # Res, - [(set (ResTy ResVPR:$Rd), - (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128, - v16i8, Neon_rev64>; -def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128, - v8i16, Neon_rev64>; -def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128, - v4i32, Neon_rev64>; -def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64, - v8i8, Neon_rev64>; -def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64, - v4i16, Neon_rev64>; -def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64, - v2i32, Neon_rev64>; - -def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>; -def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>; - -def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128, - v16i8, Neon_rev32>; -def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128, - v8i16, Neon_rev32>; -def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64, - v8i8, Neon_rev32>; -def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64, - v4i16, Neon_rev32>; - -def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128, - v16i8, Neon_rev16>; -def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64, - v8i8, Neon_rev16>; - -multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode, - SDPatternOperator Neon_Padd> { - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.16b", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.8b", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.8h", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.4h", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.4s", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.1d, $Rn.2s", - [(set (v1i64 VPR64:$Rd), - (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, - int_arm_neon_vpaddls>; -defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010, - int_arm_neon_vpaddlu>; - -def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))), - (SADDLP2s1d $Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))), - (UADDLP2s1d $Rn)>; - -multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode, - SDPatternOperator Neon_Padd> { - let Constraints = "$src = $Rd" in { - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.16b", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Padd - (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.8b", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Padd - (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.8h", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Padd - (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.4h", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Padd - (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.4s", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_Padd - (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.1d, $Rn.2s", - [(set (v1i64 VPR64:$Rd), - (v1i64 (Neon_Padd - (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110, - int_arm_neon_vpadals>; -defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110, - int_arm_neon_vpadalu>; - -multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> { - def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>; -defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>; -defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>; -defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>; - -multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix, - SDPatternOperator Neon_Op> { - def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))), - (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>; - - def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))), - (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))), - (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))), - (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>; - - def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))), - (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))), - (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))), - (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>; -} - -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>; -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>; -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>; - -def : Pat<(v16i8 (sub - (v16i8 Neon_AllZero), - (v16i8 VPR128:$Rn))), - (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (sub - (v8i8 Neon_AllZero), - (v8i8 VPR64:$Rn))), - (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v8i16 (sub - (v8i16 (bitconvert (v16i8 Neon_AllZero))), - (v8i16 VPR128:$Rn))), - (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>; -def : Pat<(v4i16 (sub - (v4i16 (bitconvert (v8i8 Neon_AllZero))), - (v4i16 VPR64:$Rn))), - (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>; -def : Pat<(v4i32 (sub - (v4i32 (bitconvert (v16i8 Neon_AllZero))), - (v4i32 VPR128:$Rn))), - (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>; -def : Pat<(v2i32 (sub - (v2i32 (bitconvert (v8i8 Neon_AllZero))), - (v2i32 VPR64:$Rn))), - (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>; -def : Pat<(v2i64 (sub - (v2i64 (bitconvert (v16i8 Neon_AllZero))), - (v2i64 VPR128:$Rn))), - (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>; - -multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> { - let Constraints = "$src = $Rd" in { - def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>; -defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>; - -multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix, - SDPatternOperator Neon_Op> { - def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))), - (v16i8 (!cast<Instruction>(Prefix # 16b) - (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>; - - def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))), - (v8i16 (!cast<Instruction>(Prefix # 8h) - (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))), - (v4i32 (!cast<Instruction>(Prefix # 4s) - (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))), - (v2i64 (!cast<Instruction>(Prefix # 2d) - (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>; - - def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))), - (v8i8 (!cast<Instruction>(Prefix # 8b) - (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))), - (v4i16 (!cast<Instruction>(Prefix # 4h) - (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))), - (v2i32 (!cast<Instruction>(Prefix # 2s) - (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>; -} - -defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>; -defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>; - -multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U, - SDPatternOperator Neon_Op> { - def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>; -defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>; - -multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size, - bits<5> Opcode> { - def 16b : NeonI_2VMisc<0b1, U, size, Opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, size, Opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>; -defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>; -defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>; - -def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b", - (NOT16b VPR128:$Rd, VPR128:$Rn), 0>; -def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b", - (NOT8b VPR64:$Rd, VPR64:$Rn), 0>; - -def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))), - (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))), - (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>; - -def : Pat<(v16i8 (xor - (v16i8 VPR128:$Rn), - (v16i8 Neon_AllOne))), - (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (xor - (v8i8 VPR64:$Rn), - (v8i8 Neon_AllOne))), - (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v8i16 (xor - (v8i16 VPR128:$Rn), - (v8i16 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; -def : Pat<(v4i16 (xor - (v4i16 VPR64:$Rn), - (v4i16 (bitconvert (v8i8 Neon_AllOne))))), - (NOT8b VPR64:$Rn)>; -def : Pat<(v4i32 (xor - (v4i32 VPR128:$Rn), - (v4i32 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; -def : Pat<(v2i32 (xor - (v2i32 VPR64:$Rn), - (v2i32 (bitconvert (v8i8 Neon_AllOne))))), - (NOT8b VPR64:$Rn)>; -def : Pat<(v2i64 (xor - (v2i64 VPR128:$Rn), - (v2i64 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; - -def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))), - (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))), - (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>; - -multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode, - SDPatternOperator Neon_Op> { - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4f32 VPR128:$Rd), - (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [(set (v2f64 VPR128:$Rd), - (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2f32 VPR64:$Rd), - (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>; -defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>; - -multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> { - def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8b, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - let Constraints = "$Rd = $src" in { - def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.16b, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>; -defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>; -defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>; -defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>; - -multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix, - SDPatternOperator Neon_Op> { - def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))), - (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))), - (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))), - (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>; - - def : Pat<(v16i8 (concat_vectors - (v8i8 VPR64:$src), - (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))), - (!cast<Instruction>(Prefix # 8h16b) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; - - def : Pat<(v8i16 (concat_vectors - (v4i16 VPR64:$src), - (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))), - (!cast<Instruction>(Prefix # 4s8h) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; - - def : Pat<(v4i32 (concat_vectors - (v2i32 VPR64:$src), - (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))), - (!cast<Instruction>(Prefix # 2d4s) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; -} - -defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>; -defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>; -defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>; -defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>; - -multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> { - let DecoderMethod = "DecodeSHLLInstruction" in { - def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact8:$Imm), - asmop # "\t$Rd.8h, $Rn.8b, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact16:$Imm), - asmop # "\t$Rd.4s, $Rn.4h, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact32:$Imm), - asmop # "\t$Rd.2d, $Rn.2s, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact8:$Imm), - asmop # "2\t$Rd.8h, $Rn.16b, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact16:$Imm), - asmop # "2\t$Rd.4s, $Rn.8h, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact32:$Imm), - asmop # "2\t$Rd.2d, $Rn.4s, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>; - -class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy, - SDPatternOperator ExtOp, Operand Neon_Imm, - string suffix> - : Pat<(DesTy (shl - (DesTy (ExtOp (OpTy VPR64:$Rn))), - (DesTy (Neon_vdup - (i32 Neon_Imm:$Imm))))), - (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>; - -class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy, - SDPatternOperator ExtOp, Operand Neon_Imm, - string suffix, PatFrag GetHigh> - : Pat<(DesTy (shl - (DesTy (ExtOp - (OpTy (GetHigh VPR128:$Rn)))), - (DesTy (Neon_vdup - (i32 Neon_Imm:$Imm))))), - (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>; - -def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">; -def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">; -def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">; -def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">; -def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">; -def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">; -def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h", - Neon_High16B>; -def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h", - Neon_High16B>; -def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s", - Neon_High8H>; -def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s", - Neon_High8H>; -def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d", - Neon_High4S>; -def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d", - Neon_High4S>; - -multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> { - def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - let Constraints = "$src = $Rd" in { - def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>; - -multiclass NeonI_2VMisc_Narrow_Pattern<string prefix, - SDPatternOperator f32_to_f16_Op, - SDPatternOperator f64_to_f32_Op> { - - def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))), - (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>; - - def : Pat<(v8i16 (concat_vectors - (v4i16 VPR64:$src), - (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))), - (!cast<Instruction>(prefix # "4s8h") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - (v4f32 VPR128:$Rn))>; - - def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))), - (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>; - - def : Pat<(v4f32 (concat_vectors - (v2f32 VPR64:$src), - (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))), - (!cast<Instruction>(prefix # "2d4s") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - (v2f64 VPR128:$Rn))>; -} - -defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>; - -multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U, - bits<5> opcode> { - def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - } - - def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))), - (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>; - - def : Pat<(v4f32 (concat_vectors - (v2f32 VPR64:$src), - (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))), - (!cast<Instruction>(prefix # "2d4s") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - VPR128:$Rn)>; -} - -defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>; - -def Neon_High4Float : PatFrag<(ops node:$in), - (extract_subvector (v4f32 node:$in), (iPTR 2))>; - -multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> { - def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4s, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2d, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "2\t$Rd.2d, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; - -multiclass NeonI_2VMisc_Extend_Pattern<string prefix> { - def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))), - (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>; - - def : Pat<(v4f32 (int_arm_neon_vcvthf2fp - (v4i16 (Neon_High8H - (v8i16 VPR128:$Rn))))), - (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>; - - def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))), - (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>; - - def : Pat<(v2f64 (fextend - (v2f32 (Neon_High4Float - (v4f32 VPR128:$Rn))))), - (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>; -} - -defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">; - -multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode, - ValueType ResTy4s, ValueType OpTy4s, - ValueType ResTy2d, ValueType OpTy2d, - ValueType ResTy2s, ValueType OpTy2s, - SDPatternOperator Neon_Op> { - - def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (ResTy4s VPR128:$Rd), - (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [(set (ResTy2d VPR128:$Rd), - (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (ResTy2s VPR64:$Rd), - (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U, - bits<5> opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64, - v2f64, v2i32, v2f32, Neon_Op>; -} - -defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010, - int_arm_neon_vcvtns>; -defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010, - int_arm_neon_vcvtnu>; -defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010, - int_arm_neon_vcvtps>; -defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010, - int_arm_neon_vcvtpu>; -defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011, - int_arm_neon_vcvtms>; -defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011, - int_arm_neon_vcvtmu>; -defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>; -defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>; -defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100, - int_arm_neon_vcvtas>; -defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100, - int_arm_neon_vcvtau>; - -multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U, - bits<5> opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64, - v2i64, v2f32, v2i32, Neon_Op>; -} - -defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>; -defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>; - -multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U, - bits<5> opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64, - v2f64, v2f32, v2f32, Neon_Op>; -} - -defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000, - int_aarch64_neon_frintn>; -defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>; -defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>; -defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>; -defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>; -defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>; -defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>; -defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101, - int_arm_neon_vrecpe>; -defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, - int_arm_neon_vrsqrte>; -let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { -defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; -} - -multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U, - bits<5> opcode, SDPatternOperator Neon_Op> { - def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100, - int_arm_neon_vrecpe>; -defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100, - int_arm_neon_vrsqrte>; - -// Crypto Class -class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_AES<size, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (opnode (v16i8 VPR128:$src), - (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>; -def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>; - -class NeonI_Cryptoaes<bits<2> size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_AES<size, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>; -def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>; - -class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_SHA<size, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (v4i32 VPR128:$src), - (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1", - int_arm_neon_sha1su1>; -def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0", - int_arm_neon_sha256su0>; - -class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_SHA<size, opcode, - (outs FPR32:$Rd), (ins FPR32:$Rn), - asmop # "\t$Rd, $Rn", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - let Predicates = [HasNEON, HasCrypto]; - let hasSideEffects = 0; -} - -def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>; -def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), - (COPY_TO_REGCLASS (SHA1H (COPY_TO_REGCLASS i32:$Rn, FPR32)), GPR32)>; - - -class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop, - SDPatternOperator opnode> - : NeonI_Crypto_3VSHA<size, opcode, - (outs VPR128:$Rd), - (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (v4i32 VPR128:$src), - (v4i32 VPR128:$Rn), - (v4i32 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0", - int_arm_neon_sha1su0>; -def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1", - int_arm_neon_sha256su1>; - -class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop, - SDPatternOperator opnode> - : NeonI_Crypto_3VSHA<size, opcode, - (outs FPR128:$Rd), - (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd, $Rn, $Rm.4s", - [(set (v4i32 FPR128:$Rd), - (v4i32 (opnode (v4i32 FPR128:$src), - (v4i32 FPR128:$Rn), - (v4i32 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h", - int_arm_neon_sha256h>; -def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2", - int_arm_neon_sha256h2>; - -class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop> - : NeonI_Crypto_3VSHA<size, opcode, - (outs FPR128:$Rd), - (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm), - asmop # "\t$Rd, $Rn, $Rm.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let hasSideEffects = 0; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c">; -def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p">; -def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m">; - -def : Pat<(int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1C v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; -def : Pat<(int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1M v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; -def : Pat<(int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1P v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; - -// Additional patterns to match shl to USHL. -def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (USHLvvv_8B $Rn, $Rm)>; -def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (USHLvvv_4H $Rn, $Rm)>; -def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (USHLvvv_2S $Rn, $Rm)>; -def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (USHLddd $Rn, $Rm)>; -def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (USHLvvv_16B $Rn, $Rm)>; -def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (USHLvvv_8H $Rn, $Rm)>; -def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (USHLvvv_4S $Rn, $Rm)>; -def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (USHLvvv_2D $Rn, $Rm)>; - -def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Additional patterns to match sra, srl. -// For a vector right shift by vector, the shift amounts of SSHL/USHL are -// negative. Negate the vector of shift amount first. -def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (USHLvvv_8B $Rn, (NEG8b $Rm))>; -def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (USHLvvv_4H $Rn, (NEG4h $Rm))>; -def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (USHLvvv_2S $Rn, (NEG2s $Rm))>; -def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (USHLddd $Rn, (NEGdd $Rm))>; -def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (USHLvvv_16B $Rn, (NEG16b $Rm))>; -def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (USHLvvv_8H $Rn, (NEG8h $Rm))>; -def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (USHLvvv_4S $Rn, (NEG4s $Rm))>; -def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (USHLvvv_2D $Rn, (NEG2d $Rm))>; - -def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))), - sub_8)>; -def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))), - sub_16)>; -def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))), - sub_32)>; - -def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (SSHLvvv_8B $Rn, (NEG8b $Rm))>; -def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (SSHLvvv_4H $Rn, (NEG4h $Rm))>; -def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (SSHLvvv_2S $Rn, (NEG2s $Rm))>; -def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (SSHLddd $Rn, (NEGdd $Rm))>; -def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (SSHLvvv_16B $Rn, (NEG16b $Rm))>; -def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (SSHLvvv_8H $Rn, (NEG8h $Rm))>; -def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (SSHLvvv_4S $Rn, (NEG4s $Rm))>; -def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (SSHLvvv_2D $Rn, (NEG2d $Rm))>; - -def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))), - sub_8)>; -def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))), - sub_16)>; -def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))), - sub_32)>; - -// -// Patterns for handling half-precision values -// - -// Convert between f16 value and f32 value -def : Pat<(f32 (f16_to_f32 (i32 GPR32:$Rn))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw $Rn), sub_16))>; -def : Pat<(i32 (f32_to_f16 (f32 FPR32:$Rn))), - (FMOVws (SUBREG_TO_REG (i64 0), (f16 (FCVThs $Rn)), sub_16))>; - -// Convert f16 value coming in as i16 value to f32 -def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; -def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; - -def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 ( - f32_to_f16 (f32 FPR32:$Rn))))))), - (f32 FPR32:$Rn)>; - -// Patterns for vector extract of half-precision FP value in i16 storage type -def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract - (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))), - (FCVTsh (f16 (DUPhv_H - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - neon_uimm2_bare:$Imm)))>; - -def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract - (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))), - (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>; - -// Patterns for vector insert of half-precision FP value 0 in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), - (neon_uimm3_bare:$Imm))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), - sub_16)), - neon_uimm3_bare:$Imm, 0))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), - (neon_uimm2_bare:$Imm))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), - sub_16)), - neon_uimm2_bare:$Imm, 0)), - sub_64))>; - -// Patterns for vector insert of half-precision FP value in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint - (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), - (neon_uimm3_bare:$Imm))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), - sub_16)), - neon_uimm3_bare:$Imm, 0))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint - (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), - (neon_uimm2_bare:$Imm))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), - sub_16)), - neon_uimm2_bare:$Imm, 0)), - sub_64))>; - -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), - (neon_uimm3_bare:$Imm1))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; - -// Patterns for vector copy of half-precision FP value in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 - (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), - 65535)))))))), - (neon_uimm3_bare:$Imm1))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 - (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)), - 65535)))))))), - (neon_uimm3_bare:$Imm1))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)), - sub_64))>; - - diff --git a/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp deleted file mode 100644 index 103aeb00d87..00000000000 --- a/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp +++ /dev/null @@ -1,157 +0,0 @@ -//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to lower AArch64 MachineInstrs to their corresponding -// MCInst records. -// -//===----------------------------------------------------------------------===// - -#include "AArch64AsmPrinter.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCExpr.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" - -using namespace llvm; - -MCOperand -AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Sym) const { - const MCExpr *Expr = nullptr; - - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext); - - switch (MO.getTargetFlags()) { - case AArch64II::MO_GOT: - Expr = AArch64MCExpr::CreateGOT(Expr, OutContext); - break; - case AArch64II::MO_GOT_LO12: - Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext); - break; - case AArch64II::MO_LO12: - Expr = AArch64MCExpr::CreateLo12(Expr, OutContext); - break; - case AArch64II::MO_DTPREL_G1: - Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext); - break; - case AArch64II::MO_DTPREL_G0_NC: - Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_GOTTPREL: - Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext); - break; - case AArch64II::MO_GOTTPREL_LO12: - Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext); - break; - case AArch64II::MO_TLSDESC: - Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext); - break; - case AArch64II::MO_TLSDESC_LO12: - Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext); - break; - case AArch64II::MO_TPREL_G1: - Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext); - break; - case AArch64II::MO_TPREL_G0_NC: - Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G3: - Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext); - break; - case AArch64II::MO_ABS_G2_NC: - Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G1_NC: - Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G0_NC: - Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_NO_FLAG: - // Expr is already correct - break; - default: - llvm_unreachable("Unexpected MachineOperand flag"); - } - - if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), - OutContext), - OutContext); - - return MCOperand::CreateExpr(Expr); -} - -bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, - MCOperand &MCOp) const { - switch (MO.getType()) { - default: llvm_unreachable("unknown operand type"); - case MachineOperand::MO_Register: - if (MO.isImplicit()) - return false; - assert(!MO.getSubReg() && "Subregs should be eliminated!"); - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_FPImmediate: { - assert(MO.getFPImm()->isZero() && "Only fp imm 0.0 is supported"); - MCOp = MCOperand::CreateFPImm(0.0); - break; - } - case MachineOperand::MO_BlockAddress: - MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress())); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName())); - break; - case MachineOperand::MO_GlobalAddress: - MCOp = lowerSymbolOperand(MO, getSymbol(MO.getGlobal())); - break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), OutContext)); - break; - case MachineOperand::MO_JumpTableIndex: - MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex())); - break; - case MachineOperand::MO_ConstantPoolIndex: - MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex())); - break; - case MachineOperand::MO_RegisterMask: - // Ignore call clobbers - return false; - - } - - return true; -} - -void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, - MCInst &OutMI, - AArch64AsmPrinter &AP) { - OutMI.setOpcode(MI->getOpcode()); - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - - MCOperand MCOp; - if (AP.lowerOperand(MO, MCOp)) - OutMI.addOperand(MCOp); - } -} diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp deleted file mode 100644 index f45d8f784f4..00000000000 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file just contains the anchor for the AArch64MachineFunctionInfo to -// force vtable emission. -// -//===----------------------------------------------------------------------===// -#include "AArch64MachineFunctionInfo.h" - -using namespace llvm; - -void AArch64MachineFunctionInfo::anchor() { } diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h deleted file mode 100644 index 33da54f97fd..00000000000 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ /dev/null @@ -1,149 +0,0 @@ -//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares AArch64-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef AARCH64MACHINEFUNCTIONINFO_H -#define AARCH64MACHINEFUNCTIONINFO_H - -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -/// This class is derived from MachineFunctionInfo and contains private AArch64 -/// target-specific information for each MachineFunction. -class AArch64MachineFunctionInfo : public MachineFunctionInfo { - virtual void anchor(); - - /// Number of bytes of arguments this function has on the stack. If the callee - /// is expected to restore the argument stack this should be a multiple of 16, - /// all usable during a tail call. - /// - /// The alternative would forbid tail call optimisation in some cases: if we - /// want to transfer control from a function with 8-bytes of stack-argument - /// space to a function with 16-bytes then misalignment of this value would - /// make a stack adjustment necessary, which could not be undone by the - /// callee. - unsigned BytesInStackArgArea; - - /// The number of bytes to restore to deallocate space for incoming - /// arguments. Canonically 0 in the C calling convention, but non-zero when - /// callee is expected to pop the args. - unsigned ArgumentStackToRestore; - - /// If the stack needs to be adjusted on frame entry in two stages, this - /// records the size of the first adjustment just prior to storing - /// callee-saved registers. The callee-saved slots are addressed assuming - /// SP == <incoming-SP> - InitialStackAdjust. - unsigned InitialStackAdjust; - - /// Number of local-dynamic TLS accesses. - unsigned NumLocalDynamics; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of the area where LowerFormalArguments puts the - /// general-purpose registers that might contain variadic parameters. - int VariadicGPRIdx; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The size of the frame object used to store the general-purpose registers - /// which might contain variadic arguments. This is the offset from - /// VariadicGPRIdx to what's stored in __gr_top. - unsigned VariadicGPRSize; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of the area where LowerFormalArguments puts the - /// floating-point registers that might contain variadic parameters. - int VariadicFPRIdx; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The size of the frame object used to store the floating-point registers - /// which might contain variadic arguments. This is the offset from - /// VariadicFPRIdx to what's stored in __vr_top. - unsigned VariadicFPRSize; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of an object pointing just past the last known stacked - /// argument on entry to a variadic function. This goes into the __stack field - /// of the va_list type. - int VariadicStackIdx; - - /// The offset of the frame pointer from the stack pointer on function - /// entry. This is expected to be negative. - int FramePointerOffset; - -public: - AArch64MachineFunctionInfo() - : BytesInStackArgArea(0), - ArgumentStackToRestore(0), - InitialStackAdjust(0), - NumLocalDynamics(0), - VariadicGPRIdx(0), - VariadicGPRSize(0), - VariadicFPRIdx(0), - VariadicFPRSize(0), - VariadicStackIdx(0), - FramePointerOffset(0) {} - - explicit AArch64MachineFunctionInfo(MachineFunction &MF) - : BytesInStackArgArea(0), - ArgumentStackToRestore(0), - InitialStackAdjust(0), - NumLocalDynamics(0), - VariadicGPRIdx(0), - VariadicGPRSize(0), - VariadicFPRIdx(0), - VariadicFPRSize(0), - VariadicStackIdx(0), - FramePointerOffset(0) {} - - unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } - void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;} - - unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; } - void setArgumentStackToRestore(unsigned bytes) { - ArgumentStackToRestore = bytes; - } - - unsigned getInitialStackAdjust() const { return InitialStackAdjust; } - void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; } - - unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } - void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } - - int getVariadicGPRIdx() const { return VariadicGPRIdx; } - void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; } - - unsigned getVariadicGPRSize() const { return VariadicGPRSize; } - void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; } - - int getVariadicFPRIdx() const { return VariadicFPRIdx; } - void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; } - - unsigned getVariadicFPRSize() const { return VariadicFPRSize; } - void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; } - - int getVariadicStackIdx() const { return VariadicStackIdx; } - void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; } - - int getFramePointerOffset() const { return FramePointerOffset; } - void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; } - -}; - -} // End llvm namespace - -#endif diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp deleted file mode 100644 index 5382effd7bb..00000000000 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ /dev/null @@ -1,186 +0,0 @@ -//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - - -#include "AArch64RegisterInfo.h" -#include "AArch64FrameLowering.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" - -using namespace llvm; - -#define GET_REGINFO_TARGET_DESC -#include "AArch64GenRegisterInfo.inc" - -AArch64RegisterInfo::AArch64RegisterInfo() - : AArch64GenRegisterInfo(AArch64::X30) { -} - -const MCPhysReg * -AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - return CSR_PCS_SaveList; -} - -const uint32_t* -AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const { - return CSR_PCS_RegMask; -} - -const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const { - return TLSDesc_RegMask; -} - -const TargetRegisterClass * -AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { - if (RC == &AArch64::FlagClassRegClass) - return &AArch64::GPR64RegClass; - - return RC; -} - - - -BitVector -AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - Reserved.set(AArch64::XSP); - Reserved.set(AArch64::WSP); - - Reserved.set(AArch64::XZR); - Reserved.set(AArch64::WZR); - - if (TFI->hasFP(MF)) { - Reserved.set(AArch64::X29); - Reserved.set(AArch64::W29); - } - - return Reserved; -} - -static bool hasFrameOffset(int opcode) { - return opcode != AArch64::LD1x2_8B && opcode != AArch64::LD1x3_8B && - opcode != AArch64::LD1x4_8B && opcode != AArch64::ST1x2_8B && - opcode != AArch64::ST1x3_8B && opcode != AArch64::ST1x4_8B && - opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B && - opcode != AArch64::LD1x4_16B && opcode != AArch64::ST1x2_16B && - opcode != AArch64::ST1x3_16B && opcode != AArch64::ST1x4_16B; -} - -void -AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, - int SPAdj, - unsigned FIOperandNum, - RegScavenger *RS) const { - assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet"); - MachineInstr &MI = *MBBI; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const AArch64FrameLowering *TFI = - static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering()); - - // In order to work out the base and offset for addressing, the FrameLowering - // code needs to know (sometimes) whether the instruction is storing/loading a - // callee-saved register, or whether it's a more generic - // operation. Fortunately the frame indices are used *only* for that purpose - // and are contiguous, so we can check here. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI; - - unsigned FrameReg; - int64_t Offset; - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, - IsCalleeSaveOp); - // A vector load/store instruction doesn't have an offset operand. - bool HasOffsetOp = hasFrameOffset(MI.getOpcode()); - if (HasOffsetOp) - Offset += MI.getOperand(FIOperandNum + 1).getImm(); - - // DBG_VALUE instructions have no real restrictions so they can be handled - // easily. - if (MI.isDebugValue()) { - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } - - const AArch64InstrInfo &TII = - *static_cast<const AArch64InstrInfo*>(MF.getTarget().getInstrInfo()); - int MinOffset, MaxOffset, OffsetScale; - if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s || !HasOffsetOp) { - MinOffset = 0; - MaxOffset = 0xfff; - OffsetScale = 1; - } else { - // Load/store of a stack object - TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset); - } - - // There are two situations we don't use frame + offset directly in the - // instruction: - // (1) The offset can't really be scaled - // (2) Can't encode offset as it doesn't have an offset operand - if ((Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) || - (!HasOffsetOp && Offset != 0)) { - unsigned BaseReg = - MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); - emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII, - BaseReg, FrameReg, BaseReg, Offset); - FrameReg = BaseReg; - Offset = 0; - } - - // Negative offsets are expected if we address from FP, but for - // now this checks nothing has gone horribly wrong. - assert(Offset >= 0 && "Unexpected negative offset from SP"); - - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true); - if (HasOffsetOp) - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); -} - -unsigned -AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (TFI->hasFP(MF)) - return AArch64::X29; - else - return AArch64::XSP; -} - -bool -AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const AArch64FrameLowering *AFI - = static_cast<const AArch64FrameLowering*>(TFI); - return AFI->useFPForAddressing(MF); -} diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h deleted file mode 100644 index 5b501f9cc16..00000000000 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ /dev/null @@ -1,79 +0,0 @@ -//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the MCRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H -#define LLVM_TARGET_AARCH64REGISTERINFO_H - -#include "llvm/Target/TargetRegisterInfo.h" - -#define GET_REGINFO_HEADER -#include "AArch64GenRegisterInfo.inc" - -namespace llvm { - -class AArch64InstrInfo; -class AArch64Subtarget; - -struct AArch64RegisterInfo : public AArch64GenRegisterInfo { - AArch64RegisterInfo(); - - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; - - unsigned getCSRFirstUseCost() const override { - // The cost will be compared against BlockFrequency where entry has the - // value of 1 << 14. A value of 5 will choose to spill or split really - // cold path instead of using a callee-saved register. - return 5; - } - - const uint32_t *getTLSDescCallPreservedMask() const; - - BitVector getReservedRegs(const MachineFunction &MF) const override; - unsigned getFrameRegister(const MachineFunction &MF) const override; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - unsigned FIOperandNum, - RegScavenger *Rs = nullptr) const override; - - /// getCrossCopyRegClass - Returns a legal register class to copy a register - /// in the specified class to or from. Returns original class if it is - /// possible to copy between a two registers of the specified class. - const TargetRegisterClass * - getCrossCopyRegClass(const TargetRegisterClass *RC) const override; - - /// getLargestLegalSuperClass - Returns the largest super class of RC that is - /// legal to use in the current sub-target and has the same spill size. - const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override { - if (RC == &AArch64::tcGPR64RegClass) - return &AArch64::GPR64RegClass; - - return RC; - } - - bool requiresRegisterScavenging(const MachineFunction &MF) const override { - return true; - } - - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { - return true; - } - - bool useFPForScavengingIndex(const MachineFunction &MF) const override; -}; - -} // end namespace llvm - -#endif // LLVM_TARGET_AARCH64REGISTERINFO_H diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td deleted file mode 100644 index 9de7abdf5ff..00000000000 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ /dev/null @@ -1,290 +0,0 @@ -//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains declarations that describe the AArch64 register file -// -//===----------------------------------------------------------------------===// - -let Namespace = "AArch64" in { -def sub_128 : SubRegIndex<128>; -def sub_64 : SubRegIndex<64>; -def sub_32 : SubRegIndex<32>; -def sub_16 : SubRegIndex<16>; -def sub_8 : SubRegIndex<8>; - -// Note: Code depends on these having consecutive numbers. -def qqsub : SubRegIndex<256, 256>; - -def qsub_0 : SubRegIndex<128>; -def qsub_1 : SubRegIndex<128, 128>; -def qsub_2 : ComposedSubRegIndex<qqsub, qsub_0>; -def qsub_3 : ComposedSubRegIndex<qqsub, qsub_1>; - -def dsub_0 : SubRegIndex<64>; -def dsub_1 : SubRegIndex<64, 64>; -def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>; -def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>; -} - -// Registers are identified with 5-bit ID numbers. -class AArch64Reg<bits<16> enc, string n> : Register<n> { - let HWEncoding = enc; - let Namespace = "AArch64"; -} - -class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [], - list<SubRegIndex> inds = []> - : AArch64Reg<enc, n> { - let SubRegs = subregs; - let SubRegIndices = inds; -} - -//===----------------------------------------------------------------------===// -// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp -//===----------------------------------------------------------------------===// - -foreach Index = 0-30 in { - def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>; -} - -def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; -def WZR : AArch64Reg<31, "wzr">; - -// Could be combined with previous loop, but this way leaves w and x registers -// consecutive as LLVM register numbers, which makes for easier debugging. -foreach Index = 0-30 in { - def X#Index : AArch64RegWithSubs<Index, "x"#Index, - [!cast<Register>("W"#Index)], [sub_32]>, - DwarfRegNum<[Index]>; -} - -def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>; -def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>; - -// Most instructions treat register 31 as zero for reads and a black-hole for -// writes. - -// Note that the order of registers is important for the Disassembler here: -// tablegen uses it to form MCRegisterClass::getRegister, which we assume can -// take an encoding value. -def GPR32 : RegisterClass<"AArch64", [i32], 32, - (add (sequence "W%u", 0, 30), WZR)> { -} - -def GPR64 : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 30), XZR)> { -} - -def GPR32nowzr : RegisterClass<"AArch64", [i32], 32, - (sequence "W%u", 0, 30)> { -} - -def GPR64noxzr : RegisterClass<"AArch64", [i64], 64, - (sequence "X%u", 0, 30)> { -} - -// For tail calls, we can't use callee-saved registers or the structure-return -// register, as they are supposed to be live across function calls and may be -// clobbered by the epilogue. -def tcGPR64 : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 7), - (sequence "X%u", 9, 18))> { -} - - -// Certain addressing-useful instructions accept sp directly. Again the order of -// registers is important to the Disassembler. -def GPR32wsp : RegisterClass<"AArch64", [i32], 32, - (add (sequence "W%u", 0, 30), WSP)> { -} - -def GPR64xsp : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 30), XSP)> { -} - -// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and -// non-SP variants). We can't use a bare register in those patterns because -// TableGen doesn't like it, so we need a class containing just stack registers -def Rxsp : RegisterClass<"AArch64", [i64], 64, - (add XSP)> { -} - -def Rwsp : RegisterClass<"AArch64", [i32], 32, - (add WSP)> { -} - -//===----------------------------------------------------------------------===// -// Scalar registers in the vector unit: -// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31 -//===----------------------------------------------------------------------===// - -foreach Index = 0-31 in { - def B # Index : AArch64Reg< Index, "b" # Index>, - DwarfRegNum<[!add(Index, 64)]>; - - def H # Index : AArch64RegWithSubs<Index, "h" # Index, - [!cast<Register>("B" # Index)], [sub_8]>, - DwarfRegNum<[!add(Index, 64)]>; - - def S # Index : AArch64RegWithSubs<Index, "s" # Index, - [!cast<Register>("H" # Index)], [sub_16]>, - DwarfRegNum<[!add(Index, 64)]>; - - def D # Index : AArch64RegWithSubs<Index, "d" # Index, - [!cast<Register>("S" # Index)], [sub_32]>, - DwarfRegNum<[!add(Index, 64)]>; - - def Q # Index : AArch64RegWithSubs<Index, "q" # Index, - [!cast<Register>("D" # Index)], [sub_64]>, - DwarfRegNum<[!add(Index, 64)]>; -} - - -def FPR8 : RegisterClass<"AArch64", [v1i8], 8, - (sequence "B%u", 0, 31)> { -} - -def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, - (sequence "H%u", 0, 31)> { -} - -def FPR32 : RegisterClass<"AArch64", [f32, v1i32], 32, - (sequence "S%u", 0, 31)> { -} - -def FPR64 : RegisterClass<"AArch64", - [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], - 64, (sequence "D%u", 0, 31)>; - -def FPR128 : RegisterClass<"AArch64", - [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], - 128, (sequence "Q%u", 0, 31)>; - -def FPR64Lo : RegisterClass<"AArch64", - [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], - 64, (sequence "D%u", 0, 15)>; - -def FPR128Lo : RegisterClass<"AArch64", - [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], - 128, (sequence "Q%u", 0, 15)>; - -//===----------------------------------------------------------------------===// -// Vector registers: -//===----------------------------------------------------------------------===// - -def VPR64AsmOperand : AsmOperandClass { - let Name = "VPR"; - let PredicateMethod = "isReg"; - let RenderMethod = "addRegOperands"; -} - -def VPR64 : RegisterOperand<FPR64, "printVPRRegister">; - -def VPR128 : RegisterOperand<FPR128, "printVPRRegister">; - -def VPR64Lo : RegisterOperand<FPR64Lo, "printVPRRegister">; - -def VPR128Lo : RegisterOperand<FPR128Lo, "printVPRRegister">; - -// Flags register -def NZCV : Register<"nzcv"> { - let Namespace = "AArch64"; -} - -def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { - let CopyCost = -1; - let isAllocatable = 0; -} - -//===----------------------------------------------------------------------===// -// Consecutive vector registers -//===----------------------------------------------------------------------===// -// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D31_D0 -def Tuples2D : RegisterTuples<[dsub_0, dsub_1], - [(rotl FPR64, 0), (rotl FPR64, 1)]>; - -// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1 -def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2)]>; - -// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2 -def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2), (rotl FPR64, 3)]>; - -// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31 -def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], - [(rotl FPR128, 0), (rotl FPR128, 1)]>; - -// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1 -def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2)]>; - -// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2 -def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2), (rotl FPR128, 3)]>; - -// The followings are super register classes to model 2/3/4 consecutive -// 64-bit/128-bit registers. - -def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>; - -def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> { - let Size = 192; // 3 x 64 bits, we have no predefined type of that size. -} - -def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>; - -def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>; - -def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> { - let Size = 384; // 3 x 128 bits, we have no predefined type of that size. -} - -def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>; - - -// The followings are vector list operands -multiclass VectorList_operands<string PREFIX, string LAYOUT, int Count, - RegisterClass RegList> { - def _asmoperand : AsmOperandClass { - let Name = PREFIX # LAYOUT # Count; - let RenderMethod = "addVectorListOperands"; - let PredicateMethod = - "isVectorList<A64Layout::VL_" # LAYOUT # ", " # Count # ">"; - let ParserMethod = "ParseVectorList"; - } - - def _operand : RegisterOperand<RegList, - "printVectorList<A64Layout::VL_" # LAYOUT # ", " # Count # ">"> { - let ParserMatchClass = - !cast<AsmOperandClass>(PREFIX # LAYOUT # "_asmoperand"); - } -} - -multiclass VectorList_BHSD<string PREFIX, int Count, RegisterClass DRegList, - RegisterClass QRegList> { - defm 8B : VectorList_operands<PREFIX, "8B", Count, DRegList>; - defm 4H : VectorList_operands<PREFIX, "4H", Count, DRegList>; - defm 2S : VectorList_operands<PREFIX, "2S", Count, DRegList>; - defm 1D : VectorList_operands<PREFIX, "1D", Count, DRegList>; - defm 16B : VectorList_operands<PREFIX, "16B", Count, QRegList>; - defm 8H : VectorList_operands<PREFIX, "8H", Count, QRegList>; - defm 4S : VectorList_operands<PREFIX, "4S", Count, QRegList>; - defm 2D : VectorList_operands<PREFIX, "2D", Count, QRegList>; -} - -// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand -defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>; -defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>; -defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>; -defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>; diff --git a/llvm/lib/Target/AArch64/AArch64Schedule.td b/llvm/lib/Target/AArch64/AArch64Schedule.td deleted file mode 100644 index 6ec47dbaa58..00000000000 --- a/llvm/lib/Target/AArch64/AArch64Schedule.td +++ /dev/null @@ -1,80 +0,0 @@ -//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Generic processor itineraries for legacy compatibility. - -def GenericItineraries : ProcessorItineraries<[], [], []>; - - -//===----------------------------------------------------------------------===// -// Base SchedReadWrite types - -// Basic ALU -def WriteALU : SchedWrite; // Generic: may contain shift and/or ALU operation -def WriteALUs : SchedWrite; // Shift only with no ALU operation -def ReadALU : SchedRead; // Operand not needed for shifting -def ReadALUs : SchedRead; // Operand needed for shifting - -// Multiply with optional accumulate -def WriteMAC : SchedWrite; -def ReadMAC : SchedRead; - -// Compares -def WriteCMP : SchedWrite; -def ReadCMP : SchedRead; - -// Division -def WriteDiv : SchedWrite; -def ReadDiv : SchedRead; - -// Loads -def WriteLd : SchedWrite; -def WritePreLd : SchedWrite; -def WriteVecLd : SchedWrite; -def ReadLd : SchedRead; -def ReadPreLd : SchedRead; -def ReadVecLd : SchedRead; - -// Stores -def WriteSt : SchedWrite; -def WriteVecSt : SchedWrite; -def ReadSt : SchedRead; -def ReadVecSt : SchedRead; - -// Branches -def WriteBr : SchedWrite; -def WriteBrL : SchedWrite; -def ReadBr : SchedRead; - -// Floating Point ALU -def WriteFPALU : SchedWrite; -def ReadFPALU : SchedRead; - -// Floating Point MAC, Mul, Div, Sqrt -// Most processors will simply send all of these down a dedicated pipe, but -// they're explicitly separated here for flexibility of modeling later. May -// consider consolidating them into a single WriteFPXXXX type in the future. -def WriteFPMAC : SchedWrite; -def WriteFPMul : SchedWrite; -def WriteFPDiv : SchedWrite; -def WriteFPSqrt : SchedWrite; -def ReadFPMAC : SchedRead; -def ReadFPMul : SchedRead; -def ReadFPDiv : SchedRead; -def ReadFPSqrt : SchedRead; - -// Noop -def WriteNoop : SchedWrite; - - -//===----------------------------------------------------------------------===// -// Subtarget specific Machine Models. - -include "AArch64ScheduleA53.td" diff --git a/llvm/lib/Target/AArch64/AArch64ScheduleA53.td b/llvm/lib/Target/AArch64/AArch64ScheduleA53.td deleted file mode 100644 index 20a14e79228..00000000000 --- a/llvm/lib/Target/AArch64/AArch64ScheduleA53.td +++ /dev/null @@ -1,144 +0,0 @@ -//=- AArch64ScheduleA53.td - ARM Cortex-A53 Scheduling Definitions -*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the itinerary class data for the ARM Cortex A53 processors. -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the simpler per-operand machine model. -// This works with MachineScheduler. See MCSchedModel.h for details. - -// Cortex-A53 machine model for scheduling and other instruction cost heuristics. -def CortexA53Model : SchedMachineModel { - let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency. - let LoadLatency = 2; // Optimistic load latency assuming bypass. - // This is overriden by OperandCycles if the - // Itineraries are queried instead. - let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation - // Specification - Instruction Timings" - // v 1.0 Spreadsheet -} - - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available. - -// Modeling each pipeline as a ProcResource using the default BufferSize = -1. -// Cortex-A53 is in-order and therefore should be using BufferSize = 0. The -// current configuration performs better with the basic latencies provided so -// far. Will revisit BufferSize once the latency information is more accurate. - -let SchedModel = CortexA53Model in { - -def A53UnitALU : ProcResource<2>; // Int ALU -def A53UnitMAC : ProcResource<1>; // Int MAC -def A53UnitDiv : ProcResource<1>; // Int Division -def A53UnitLdSt : ProcResource<1>; // Load/Store -def A53UnitB : ProcResource<1>; // Branch -def A53UnitFPALU : ProcResource<1>; // FP ALU -def A53UnitFPMDS : ProcResource<1>; // FP Mult/Div/Sqrt - - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedWrite types which both map the ProcResources and -// set the latency. - -// Issue - Every instruction must consume an A53WriteIssue. Optionally, -// instructions that cannot be dual-issued will also include the -// A53WriteIssue2nd in their SchedRW list. That second WriteRes will -// ensure that a second issue slot is consumed. -def A53WriteIssue : SchedWriteRes<[]>; -def A53WriteIssue2nd : SchedWriteRes<[]> { let Latency = 0; } - -// ALU - These are reduced to 1 despite a true latency of 4 in order to easily -// model forwarding logic. Once forwarding is properly modelled, then -// they'll be corrected. -def : WriteRes<WriteALU, [A53UnitALU]> { let Latency = 1; } -def : WriteRes<WriteALUs, [A53UnitALU]> { let Latency = 1; } -def : WriteRes<WriteCMP, [A53UnitALU]> { let Latency = 1; } - -// MAC -def : WriteRes<WriteMAC, [A53UnitMAC]> { let Latency = 4; } - -// Div -def : WriteRes<WriteDiv, [A53UnitDiv]> { let Latency = 4; } - -// Load - Note: Vector loads take 1-5 cycles to issue. For the WriteVecLd below, -// choosing the median of 3 which makes the latency 6. May model this more -// carefully in the future. -def : WriteRes<WriteLd, [A53UnitLdSt]> { let Latency = 4; } -def : WriteRes<WritePreLd, [A53UnitLdSt]> { let Latency = 4; } -def : WriteRes<WriteVecLd, [A53UnitLdSt]> { let Latency = 6; } - -// Store - Note: Vector stores take 1-3 cycles to issue. For the ReadVecSt below, -// choosing the median of 2 which makes the latency 5. May model this more -// carefully in the future. -def : WriteRes<WriteSt, [A53UnitLdSt]> { let Latency = 4; } -def : WriteRes<WriteVecSt, [A53UnitLdSt]> { let Latency = 5; } - -// Branch -def : WriteRes<WriteBr, [A53UnitB]>; -def : WriteRes<WriteBrL, [A53UnitB]>; - -// FP ALU -def : WriteRes<WriteFPALU, [A53UnitFPALU]> {let Latency = 6; } - -// FP MAC, Mul, Div, Sqrt -// Using Double Precision numbers for now as a worst case. Additionally, not -// modeling the exact hazard but instead treating the whole pipe as a hazard. -// As an example VMUL, VMLA, and others are actually pipelined. VDIV and VSQRT -// have a total latency of 33 and 32 respectively but only a hazard of 29 and -// 28 (double-prescion example). -def : WriteRes<WriteFPMAC, [A53UnitFPMDS]> { let Latency = 10; } -def : WriteRes<WriteFPMul, [A53UnitFPMDS]> { let Latency = 6; } -def : WriteRes<WriteFPDiv, [A53UnitFPMDS]> { let Latency = 33; - let ResourceCycles = [29]; } -def : WriteRes<WriteFPSqrt, [A53UnitFPMDS]> { let Latency = 32; - let ResourceCycles = [28]; } - - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedRead types. - -// No forwarding defined for ReadALU yet. -def : ReadAdvance<ReadALU, 0>; - -// No forwarding defined for ReadCMP yet. -def : ReadAdvance<ReadCMP, 0>; - -// No forwarding defined for ReadBr yet. -def : ReadAdvance<ReadBr, 0>; - -// No forwarding defined for ReadMAC yet. -def : ReadAdvance<ReadMAC, 0>; - -// No forwarding defined for ReadDiv yet. -def : ReadAdvance<ReadDiv, 0>; - -// No forwarding defined for ReadLd, ReadPreLd, ReadVecLd yet. -def : ReadAdvance<ReadLd, 0>; -def : ReadAdvance<ReadPreLd, 0>; -def : ReadAdvance<ReadVecLd, 0>; - -// No forwarding defined for ReadSt and ReadVecSt yet. -def : ReadAdvance<ReadSt, 0>; -def : ReadAdvance<ReadVecSt, 0>; - -// No forwarding defined for ReadFPALU yet. -def : ReadAdvance<ReadFPALU, 0>; - -// No forwarding defined for ReadFPMAC/Mul/Div/Sqrt yet. -def : ReadAdvance<ReadFPMAC, 0>; -def : ReadAdvance<ReadFPMul, 0>; -def : ReadAdvance<ReadFPDiv, 0>; -def : ReadAdvance<ReadFPSqrt, 0>; - -} diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp deleted file mode 100644 index 17010d41ed4..00000000000 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AArch64SelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#include "AArch64TargetMachine.h" -#include "llvm/CodeGen/SelectionDAG.h" -using namespace llvm; - -#define DEBUG_TYPE "arm-selectiondag-info" - -AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { -} - -AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() { -} diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h deleted file mode 100644 index d412ed2be18..00000000000 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the AArch64 subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64SELECTIONDAGINFO_H -#define LLVM_AARCH64SELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class AArch64TargetMachine; - -class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { - const AArch64Subtarget *Subtarget; -public: - explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM); - ~AArch64SelectionDAGInfo(); -}; - -} - -#endif diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp deleted file mode 100644 index f88c899cc9c..00000000000 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ /dev/null @@ -1,99 +0,0 @@ -//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AArch64 specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "AArch64Subtarget.h" -#include "AArch64RegisterInfo.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-subtarget" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "AArch64GenSubtargetInfo.inc" - -enum AlignMode { - DefaultAlign, - StrictAlign, - NoStrictAlign -}; - -static cl::opt<AlignMode> -Align(cl::desc("Load/store alignment support"), - cl::Hidden, cl::init(DefaultAlign), - cl::values( - clEnumValN(DefaultAlign, "aarch64-default-align", - "Generate unaligned accesses only on hardware/OS " - "combinations that are known to support them"), - clEnumValN(StrictAlign, "aarch64-strict-align", - "Disallow all unaligned memory accesses"), - clEnumValN(NoStrictAlign, "aarch64-no-strict-align", - "Allow unaligned memory accesses"), - clEnumValEnd)); - -// Pin the vtable to this file. -void AArch64Subtarget::anchor() {} - -AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS, - bool LittleEndian) - : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - HasFPARMv8(false), HasNEON(false), HasCrypto(false), TargetTriple(TT), - CPUString(CPU), IsLittleEndian(LittleEndian) { - - initializeSubtargetFeatures(CPU, FS); -} - -void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU, - StringRef FS) { - AllowsUnalignedMem = false; - - if (CPU.empty()) - CPUString = "generic"; - - std::string FullFS = FS; - if (CPUString == "generic") { - // Enable FP by default. - if (FullFS.empty()) - FullFS = "+fp-armv8"; - else - FullFS = "+fp-armv8," + FullFS; - } - - ParseSubtargetFeatures(CPU, FullFS); - - switch (Align) { - case DefaultAlign: - // Linux targets support unaligned accesses on AARCH64 - AllowsUnalignedMem = isTargetLinux(); - break; - case StrictAlign: - AllowsUnalignedMem = false; - break; - case NoStrictAlign: - AllowsUnalignedMem = true; - break; - } -} - -bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, - Reloc::Model RelocM) const { - if (RelocM == Reloc::Static) - return false; - - return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility(); -} diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h deleted file mode 100644 index dd2b4d211f2..00000000000 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ /dev/null @@ -1,89 +0,0 @@ -//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the AArch64 specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H -#define LLVM_TARGET_AARCH64_SUBTARGET_H - -#include "llvm/ADT/Triple.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <string> - -#define GET_SUBTARGETINFO_HEADER -#include "AArch64GenSubtargetInfo.inc" - -namespace llvm { -class StringRef; -class GlobalValue; - -class AArch64Subtarget : public AArch64GenSubtargetInfo { - virtual void anchor(); -protected: - enum ARMProcFamilyEnum {Others, CortexA53, CortexA57}; - - /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. - ARMProcFamilyEnum ARMProcFamily; - - bool HasFPARMv8; - bool HasNEON; - bool HasCrypto; - - /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory - /// accesses for some types. For details, see - /// AArch64TargetLowering::allowsUnalignedMemoryAccesses(). - bool AllowsUnalignedMem; - - /// TargetTriple - What processor and OS we're targeting. - Triple TargetTriple; - - /// CPUString - String name of used CPU. - std::string CPUString; - - /// IsLittleEndian - The target is Little Endian - bool IsLittleEndian; - -private: - void initializeSubtargetFeatures(StringRef CPU, StringRef FS); - -public: - /// This constructor initializes the data members to match that - /// of the specified triple. - /// - AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS, - bool LittleEndian); - - bool enableMachineScheduler() const override { - return true; - } - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; - - bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } - bool isTargetLinux() const { return TargetTriple.isOSLinux(); } - - bool hasFPARMv8() const { return HasFPARMv8; } - bool hasNEON() const { return HasNEON; } - bool hasCrypto() const { return HasCrypto; } - - bool allowsUnalignedMem() const { return AllowsUnalignedMem; } - - bool isLittle() const { return IsLittleEndian; } - - const std::string & getCPUString() const { return CPUString; } -}; -} // End llvm namespace - -#endif // LLVM_TARGET_AARCH64_SUBTARGET_H diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp deleted file mode 100644 index 6bd6f5912d7..00000000000 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ /dev/null @@ -1,121 +0,0 @@ -//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the AArch64TargetMachine -// methods. Principally just setting up the passes needed to generate correct -// code on this architecture. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/PassManager.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; - -extern "C" void LLVMInitializeAArch64Target() { - RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget); - RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget); -} - -AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool LittleEndian) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, LittleEndian), - InstrInfo(Subtarget), - DL(LittleEndian ? - "e-m:e-i64:64-i128:128-n32:64-S128" : - "E-m:e-i64:64-i128:128-n32:64-S128"), - TLInfo(*this), - TSInfo(*this), - FrameLowering(Subtarget) { - initAsmInfo(); -} - -void AArch64leTargetMachine::anchor() { } - -AArch64leTargetMachine:: -AArch64leTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void AArch64beTargetMachine::anchor() { } - -AArch64beTargetMachine:: -AArch64beTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our AArch64 pass. This - // allows the AArch64 pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createAArch64TargetTransformInfoPass(this)); -} - -namespace { -/// AArch64 Code Generator Pass Configuration Options. -class AArch64PassConfig : public TargetPassConfig { -public: - AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - AArch64TargetMachine &getAArch64TargetMachine() const { - return getTM<AArch64TargetMachine>(); - } - - const AArch64Subtarget &getAArch64Subtarget() const { - return *getAArch64TargetMachine().getSubtargetImpl(); - } - - bool addPreISel() override; - bool addInstSelector() override; - bool addPreEmitPass() override; -}; -} // namespace - -bool AArch64PassConfig::addPreISel() { - if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); - - return false; -} - -TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { - return new AArch64PassConfig(this, PM); -} - -bool AArch64PassConfig::addPreEmitPass() { - addPass(&UnpackMachineBundlesID); - addPass(createAArch64BranchFixupPass()); - return true; -} - -bool AArch64PassConfig::addInstSelector() { - addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel())); - - // For ELF, cleanup any local-dynamic TLS accesses. - if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) - addPass(createAArch64CleanupLocalDynamicTLSPass()); - - return false; -} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h deleted file mode 100644 index 3800635e0fa..00000000000 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ /dev/null @@ -1,94 +0,0 @@ -//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the AArch64 specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64TARGETMACHINE_H -#define LLVM_AARCH64TARGETMACHINE_H - -#include "AArch64FrameLowering.h" -#include "AArch64ISelLowering.h" -#include "AArch64InstrInfo.h" -#include "AArch64SelectionDAGInfo.h" -#include "AArch64Subtarget.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -class AArch64TargetMachine : public LLVMTargetMachine { - AArch64Subtarget Subtarget; - AArch64InstrInfo InstrInfo; - const DataLayout DL; - AArch64TargetLowering TLInfo; - AArch64SelectionDAGInfo TSInfo; - AArch64FrameLowering FrameLowering; - -public: - AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool LittleEndian); - - const AArch64InstrInfo *getInstrInfo() const override { - return &InstrInfo; - } - - const AArch64FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const AArch64TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - const AArch64Subtarget *getSubtargetImpl() const override { return &Subtarget; } - - const DataLayout *getDataLayout() const override { return &DL; } - - const TargetRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - - void addAnalysisPasses(PassManagerBase &PM) override; -}; - -// AArch64leTargetMachine - AArch64 little endian target machine. -// -class AArch64leTargetMachine : public AArch64TargetMachine { - virtual void anchor(); -public: - AArch64leTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -// AArch64beTargetMachine - AArch64 big endian target machine. -// -class AArch64beTargetMachine : public AArch64TargetMachine { - virtual void anchor(); -public: - AArch64beTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -} // End llvm namespace - -#endif diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp deleted file mode 100644 index 663d61944a8..00000000000 --- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file deals with any AArch64 specific requirements on object files. -// -//===----------------------------------------------------------------------===// - - -#include "AArch64TargetObjectFile.h" - -using namespace llvm; - -void -AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - TargetLoweringObjectFileELF::Initialize(Ctx, TM); - InitializeELF(TM.Options.UseInitArray); -} diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h deleted file mode 100644 index 6e57103a426..00000000000 --- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file deals with any AArch64 specific requirements on object files. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H -#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H - -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - - /// AArch64ElfTargetObjectFile - This implementation is used for ELF - /// AArch64 targets. - class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF { - void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - }; - -} // end namespace llvm - -#endif diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp deleted file mode 100644 index 0228d123bc6..00000000000 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//===- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements a TargetTransformInfo analysis pass specific to the -/// AArch64 target machine. It uses the target's detailed information to provide -/// more precise answers to certain TTI queries, while letting the target -/// independent and default TTI implementations handle the rest. -/// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64TargetMachine.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/CostTable.h" -#include "llvm/Target/TargetLowering.h" -using namespace llvm; - -#define DEBUG_TYPE "aarch64tti" - -// Declare the pass initialization routine locally as target-specific passes -// don't have a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeAArch64TTIPass(PassRegistry &); -} - -namespace { - -class AArch64TTI final : public ImmutablePass, public TargetTransformInfo { - const AArch64Subtarget *ST; - const AArch64TargetLowering *TLI; - -public: - AArch64TTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - AArch64TTI(const AArch64TargetMachine *TM) - : ImmutablePass(ID), ST(TM->getSubtargetImpl()), - TLI(TM->getTargetLowering()) { - initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); - } - - virtual void initializePass() override { - pushTTIStack(this); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo*)this; - return this; - } - - /// \name Scalar TTI Implementations - /// @{ - - /// @} - - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 32; - return 0; - } - return 32; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 128; - return 0; - } - return 64; - } - - unsigned getMaximumUnrollFactor() const override { return 2; } - /// @} -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti", - "AArch64 Target Transform Info", true, true, false) -char AArch64TTI::ID = 0; - -ImmutablePass * -llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { - return new AArch64TTI(TM); -} diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp deleted file mode 100644 index 9fe3497c6a1..00000000000 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ /dev/null @@ -1,2677 +0,0 @@ -//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the (GNU-style) assembly parser for the AArch64 -// architecture. -// -//===----------------------------------------------------------------------===// - - -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "MCTargetDesc/AArch64MCExpr.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - -class AArch64Operand; - -class AArch64AsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; - MCAsmParser &Parser; - -#define GET_ASSEMBLER_HEADER -#include "AArch64GenAsmMatcher.inc" - -public: - enum AArch64MatchResultTy { - Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY, -#define GET_OPERAND_DIAGNOSTIC_TYPES -#include "AArch64GenAsmMatcher.inc" - }; - - AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { - MCAsmParserExtension::Initialize(_Parser); - - // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - } - - // These are the public interface of the MCTargetAsmParser - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) override; - - bool ParseDirective(AsmToken DirectiveID) override; - bool ParseDirectiveTLSDescCall(SMLoc L); - bool ParseDirectiveWord(unsigned Size, SMLoc L); - - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer&Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) override; - - // The rest of the sub-parsers have more freedom over interface: they return - // an OperandMatchResultTy because it's less ambiguous than true/false or - // -1/0/1 even if it is more verbose - OperandMatchResultTy - ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic); - - OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal); - - OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind); - - OperandMatchResultTy - ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - uint32_t NumLanes); - - OperandMatchResultTy - ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - uint32_t &NumLanes); - - OperandMatchResultTy - ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - OperandMatchResultTy - ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - OperandMatchResultTy - ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - OperandMatchResultTy - ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - OperandMatchResultTy - ParseFPImm0AndImm0Operand( SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - template<typename SomeNamedImmMapper> OperandMatchResultTy - ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return ParseNamedImmOperand(SomeNamedImmMapper(), Operands); - } - - OperandMatchResultTy - ParseNamedImmOperand(const NamedImmMapper &Mapper, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - OperandMatchResultTy - ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - OperandMatchResultTy - ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - OperandMatchResultTy - ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout, - SMLoc &LayoutLoc); - - OperandMatchResultTy ParseVectorList(SmallVectorImpl<MCParsedAsmOperand *> &); - - bool validateInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands); - - /// Scan the next token (which had better be an identifier) and determine - /// whether it represents a general-purpose or vector register. It returns - /// true if an identifier was found and populates its reference arguments. It - /// does not consume the token. - bool - IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec, - SMLoc &LayoutLoc) const; - -}; - -} - -namespace { - -/// Instances of this class represent a parsed AArch64 machine instruction. -class AArch64Operand : public MCParsedAsmOperand { -private: - enum KindTy { - k_ImmWithLSL, // #uimm {, LSL #amt } - k_CondCode, // eq/ne/... - k_FPImmediate, // Limited-precision floating-point imm - k_Immediate, // Including expressions referencing symbols - k_Register, - k_ShiftExtend, - k_VectorList, // A sequential list of 1 to 4 registers. - k_SysReg, // The register operand of MRS and MSR instructions - k_Token, // The mnemonic; other raw tokens the auto-generated - k_WrappedRegister // Load/store exclusive permit a wrapped register. - } Kind; - - SMLoc StartLoc, EndLoc; - - struct ImmWithLSLOp { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; - }; - - struct CondCodeOp { - A64CC::CondCodes Code; - }; - - struct FPImmOp { - double Val; - }; - - struct ImmOp { - const MCExpr *Val; - }; - - struct RegOp { - unsigned RegNum; - }; - - struct ShiftExtendOp { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; - }; - - // A vector register list is a sequential list of 1 to 4 registers. - struct VectorListOp { - unsigned RegNum; - unsigned Count; - A64Layout::VectorLayout Layout; - }; - - struct SysRegOp { - const char *Data; - unsigned Length; - }; - - struct TokOp { - const char *Data; - unsigned Length; - }; - - union { - struct ImmWithLSLOp ImmWithLSL; - struct CondCodeOp CondCode; - struct FPImmOp FPImm; - struct ImmOp Imm; - struct RegOp Reg; - struct ShiftExtendOp ShiftExtend; - struct VectorListOp VectorList; - struct SysRegOp SysReg; - struct TokOp Tok; - }; - - AArch64Operand(KindTy K, SMLoc S, SMLoc E) - : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {} - -public: - AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() { - } - - SMLoc getStartLoc() const override { return StartLoc; } - SMLoc getEndLoc() const override { return EndLoc; } - void print(raw_ostream&) const override; - void dump() const override; - - StringRef getToken() const { - assert(Kind == k_Token && "Invalid access!"); - return StringRef(Tok.Data, Tok.Length); - } - - unsigned getReg() const override { - assert((Kind == k_Register || Kind == k_WrappedRegister) - && "Invalid access!"); - return Reg.RegNum; - } - - const MCExpr *getImm() const { - assert(Kind == k_Immediate && "Invalid access!"); - return Imm.Val; - } - - A64CC::CondCodes getCondCode() const { - assert(Kind == k_CondCode && "Invalid access!"); - return CondCode.Code; - } - - static bool isNonConstantExpr(const MCExpr *E, - AArch64MCExpr::VariantKind &Variant) { - if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) { - Variant = A64E->getKind(); - return true; - } else if (!isa<MCConstantExpr>(E)) { - Variant = AArch64MCExpr::VK_AARCH64_None; - return true; - } - - return false; - } - - bool isCondCode() const { return Kind == k_CondCode; } - bool isToken() const override { return Kind == k_Token; } - bool isReg() const override { return Kind == k_Register; } - bool isImm() const override { return Kind == k_Immediate; } - bool isMem() const override { return false; } - bool isFPImm() const { return Kind == k_FPImmediate; } - bool isShiftOrExtend() const { return Kind == k_ShiftExtend; } - bool isSysReg() const { return Kind == k_SysReg; } - bool isImmWithLSL() const { return Kind == k_ImmWithLSL; } - bool isWrappedReg() const { return Kind == k_WrappedRegister; } - - bool isAddSubImmLSL0() const { - if (!isImmWithLSL()) return false; - if (ImmWithLSL.ShiftAmount != 0) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC - || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12; - } - - // Otherwise it should be a real immediate in range: - const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val); - return CE->getValue() >= 0 && CE->getValue() <= 0xfff; - } - - bool isAddSubImmLSL12() const { - if (!isImmWithLSL()) return false; - if (ImmWithLSL.ShiftAmount != 12) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12 - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12; - } - - // Otherwise it should be a real immediate in range: - const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val); - return CE->getValue() >= 0 && CE->getValue() <= 0xfff; - } - - template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const { - if (!isShiftOrExtend()) return false; - - A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType; - if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW)) - return false; - - if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX)) - return false; - - return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0; - } - - bool isAdrpLabel() const { - if (!isImm()) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(getImm(), Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_None - || Variant == AArch64MCExpr::VK_AARCH64_GOT - || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL - || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC; - } - - return isLabel<21, 4096>(); - } - - template<unsigned RegWidth> bool isBitfieldWidth() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - - return CE->getValue() >= 1 && CE->getValue() <= RegWidth; - } - - template<int RegWidth> - bool isCVTFixedPos() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - - return CE->getValue() >= 1 && CE->getValue() <= RegWidth; - } - - bool isFMOVImm() const { - if (!isFPImm()) return false; - - APFloat RealVal(FPImm.Val); - uint32_t ImmVal; - return A64Imms::isFPImm(RealVal, ImmVal); - } - - bool isFPZero() const { - if (!isFPImm()) return false; - - APFloat RealVal(FPImm.Val); - return RealVal.isPosZero(); - } - - template<unsigned field_width, unsigned scale> - bool isLabel() const { - if (!isImm()) return false; - - if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) { - return true; - } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) { - int64_t Val = CE->getValue(); - int64_t Min = - (scale * (1LL << (field_width - 1))); - int64_t Max = scale * ((1LL << (field_width - 1)) - 1); - return (Val % scale) == 0 && Val >= Min && Val <= Max; - } - - // N.b. this disallows explicit relocation specifications via an - // AArch64MCExpr. Users needing that behaviour - return false; - } - - bool isLane1() const { - if (!isImm()) return false; - - // Because it's come through custom assembly parsing, it must always be a - // constant expression. - return cast<MCConstantExpr>(getImm())->getValue() == 1; - } - - bool isLoadLitLabel() const { - if (!isImm()) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(getImm(), Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_None - || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL; - } - - return isLabel<19, 4>(); - } - - template<unsigned RegWidth> bool isLogicalImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); - if (!CE) return false; - - uint32_t Bits; - return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); - } - - template<unsigned RegWidth> bool isLogicalImmMOV() const { - if (!isLogicalImm<RegWidth>()) return false; - - const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val); - - // The move alias for ORR is only valid if the immediate cannot be - // represented with a move (immediate) instruction; they take priority. - int UImm16, Shift; - return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift) - && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift); - } - - template<int MemSize> - bool isOffsetUImm12() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - - // Assume they know what they're doing for now if they've given us a - // non-constant expression. In principle we could check for ridiculous - // things that can't possibly work or relocations that would almost - // certainly break resulting code. - if (!CE) - return true; - - int64_t Val = CE->getValue(); - - // Must be a multiple of the access size in bytes. - if ((Val & (MemSize - 1)) != 0) return false; - - // Must be 12-bit unsigned - return Val >= 0 && Val <= 0xfff * MemSize; - } - - template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit> - bool isShift() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != SHKind) - return false; - - return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31; - } - - bool isMOVN32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVN64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G2, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); - } - - - bool isMOVZ32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0, - AArch64MCExpr::VK_AARCH64_ABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVZ64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0, - AArch64MCExpr::VK_AARCH64_ABS_G1, - AArch64MCExpr::VK_AARCH64_ABS_G2, - AArch64MCExpr::VK_AARCH64_ABS_G3, - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G2, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); - } - - bool isMOVK32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0_NC, - AArch64MCExpr::VK_AARCH64_ABS_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVK64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0_NC, - AArch64MCExpr::VK_AARCH64_ABS_G1_NC, - AArch64MCExpr::VK_AARCH64_ABS_G2_NC, - AArch64MCExpr::VK_AARCH64_ABS_G3, - AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); - } - - bool isMoveWideImm(unsigned RegWidth, - const AArch64MCExpr::VariantKind *PermittedModifiers, - unsigned NumModifiers) const { - if (!isImmWithLSL()) return false; - - if (ImmWithLSL.ShiftAmount % 16 != 0) return false; - if (ImmWithLSL.ShiftAmount >= RegWidth) return false; - - AArch64MCExpr::VariantKind Modifier; - if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) { - // E.g. "#:abs_g0:sym, lsl #16" makes no sense. - if (!ImmWithLSL.ImplicitAmount) return false; - - for (unsigned i = 0; i < NumModifiers; ++i) - if (PermittedModifiers[i] == Modifier) return true; - - return false; - } - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val); - return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff; - } - - template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)> - bool isMoveWideMovAlias() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - - int UImm16, Shift; - uint64_t Value = CE->getValue(); - - // If this is a 32-bit instruction then all bits above 32 should be the - // same: either of these is fine because signed/unsigned values should be - // permitted. - if (RegWidth == 32) { - if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff) - return false; - - Value &= 0xffffffffULL; - } - - return isValidImm(RegWidth, Value, UImm16, Shift); - } - - bool isMSRWithReg() const { - if (!isSysReg()) return false; - - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64SysReg::MSRMapper().fromString(Name, IsKnownRegister); - - return IsKnownRegister; - } - - bool isMSRPState() const { - if (!isSysReg()) return false; - - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64PState::PStateMapper().fromString(Name, IsKnownRegister); - - return IsKnownRegister; - } - - bool isMRS() const { - if (!isSysReg()) return false; - - // First check against specific MSR-only (write-only) registers - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64SysReg::MRSMapper().fromString(Name, IsKnownRegister); - - return IsKnownRegister; - } - - bool isPRFM() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - - if (!CE) - return false; - - return CE->getValue() >= 0 && CE->getValue() <= 31; - } - - template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != SHKind) - return false; - - return ShiftExtend.Amount <= 4; - } - - bool isRegExtendLSL() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) - return false; - - return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; - } - - // if 0 < value <= w, return true - bool isShrFixedWidth(int w) const { - if (!isImm()) - return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) - return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= w; - } - - bool isShrImm8() const { return isShrFixedWidth(8); } - - bool isShrImm16() const { return isShrFixedWidth(16); } - - bool isShrImm32() const { return isShrFixedWidth(32); } - - bool isShrImm64() const { return isShrFixedWidth(64); } - - // if 0 <= value < w, return true - bool isShlFixedWidth(int w) const { - if (!isImm()) - return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) - return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < w; - } - - bool isShlImm8() const { return isShlFixedWidth(8); } - - bool isShlImm16() const { return isShlFixedWidth(16); } - - bool isShlImm32() const { return isShlFixedWidth(32); } - - bool isShlImm64() const { return isShlFixedWidth(64); } - - bool isNeonMovImmShiftLSL() const { - if (!isShiftOrExtend()) - return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) - return false; - - // Valid shift amount is 0, 8, 16 and 24. - return ShiftExtend.Amount % 8 == 0 && ShiftExtend.Amount <= 24; - } - - bool isNeonMovImmShiftLSLH() const { - if (!isShiftOrExtend()) - return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) - return false; - - // Valid shift amount is 0 and 8. - return ShiftExtend.Amount == 0 || ShiftExtend.Amount == 8; - } - - bool isNeonMovImmShiftMSL() const { - if (!isShiftOrExtend()) - return false; - - if (ShiftExtend.ShiftType != A64SE::MSL) - return false; - - // Valid shift amount is 8 and 16. - return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16; - } - - template <A64Layout::VectorLayout Layout, unsigned Count> - bool isVectorList() const { - return Kind == k_VectorList && VectorList.Layout == Layout && - VectorList.Count == Count; - } - - template <int MemSize> bool isSImm7Scaled() const { - if (!isImm()) - return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - - int64_t Val = CE->getValue(); - if (Val % MemSize != 0) return false; - - Val /= MemSize; - - return Val >= -64 && Val < 64; - } - - template<int BitWidth> - bool isSImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - - return CE->getValue() >= -(1LL << (BitWidth - 1)) - && CE->getValue() < (1LL << (BitWidth - 1)); - } - - template<int bitWidth> - bool isUImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - - return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth); - } - - bool isUImm() const { - if (!isImm()) return false; - - return isa<MCConstantExpr>(getImm()); - } - - bool isNeonUImm64Mask() const { - if (!isImm()) - return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) - return false; - - uint64_t Value = CE->getValue(); - - // i64 value with each byte being either 0x00 or 0xff. - for (unsigned i = 0; i < 8; ++i, Value >>= 8) - if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) - return false; - return true; - } - - // if value == N, return true - template<int N> - bool isExactImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - - return CE->getValue() == N; - } - - bool isFPZeroIZero() const { - return isFPZero(); - } - - static AArch64Operand *CreateImmWithLSL(const MCExpr *Val, - unsigned ShiftAmount, - bool ImplicitAmount, - SMLoc S,SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E); - Op->ImmWithLSL.Val = Val; - Op->ImmWithLSL.ShiftAmount = ShiftAmount; - Op->ImmWithLSL.ImplicitAmount = ImplicitAmount; - return Op; - } - - static AArch64Operand *CreateCondCode(A64CC::CondCodes Code, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E); - Op->CondCode.Code = Code; - return Op; - } - - static AArch64Operand *CreateFPImm(double Val, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E); - Op->FPImm.Val = Val; - return Op; - } - - static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E); - Op->Imm.Val = Val; - return Op; - } - - static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_Register, S, E); - Op->Reg.RegNum = RegNum; - return Op; - } - - static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E); - Op->Reg.RegNum = RegNum; - return Op; - } - - static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp, - unsigned Amount, - bool ImplicitAmount, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E); - Op->ShiftExtend.ShiftType = ShiftTyp; - Op->ShiftExtend.Amount = Amount; - Op->ShiftExtend.ImplicitAmount = ImplicitAmount; - return Op; - } - - static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) { - AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S); - Op->Tok.Data = Str.data(); - Op->Tok.Length = Str.size(); - return Op; - } - - static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, - A64Layout::VectorLayout Layout, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E); - Op->VectorList.RegNum = RegNum; - Op->VectorList.Count = Count; - Op->VectorList.Layout = Layout; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { - AArch64Operand *Op = new AArch64Operand(k_Token, S, S); - Op->Tok.Data = Str.data(); - Op->Tok.Length = Str.size(); - return Op; - } - - - void addExpr(MCInst &Inst, const MCExpr *Expr) const { - // Add as immediates when possible. - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - else - Inst.addOperand(MCOperand::CreateExpr(Expr)); - } - - template<unsigned RegWidth> - void addBFILSBOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); - unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth; - Inst.addOperand(MCOperand::CreateImm(EncodedVal)); - } - - void addBFIWidthOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); - } - - void addBFXWidthOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm(); - const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); - - Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1)); - } - - void addCondCodeOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getCondCode())); - } - - void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue())); - } - - void addFMOVImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - APFloat RealVal(FPImm.Val); - uint32_t ImmVal; - A64Imms::isFPImm(RealVal, ImmVal); - - Inst.addOperand(MCOperand::CreateImm(ImmVal)); - } - - void addFPZeroOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands"); - Inst.addOperand(MCOperand::CreateImm(0)); - } - - void addFPZeroIZeroOperands(MCInst &Inst, unsigned N) const { - addFPZeroOperands(Inst, N); - } - - void addInvCondCodeOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - unsigned Encoded = A64InvertCondCode(getCondCode()); - Inst.addOperand(MCOperand::CreateImm(Encoded)); - } - - void addRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); - } - - void addImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - template<int MemSize> - void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); - uint64_t Val = CE->getValue() / MemSize; - Inst.addOperand(MCOperand::CreateImm(Val & 0x7f)); - } - - template<int BitWidth> - void addSImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); - uint64_t Val = CE->getValue(); - Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1))); - } - - void addImmWithLSLOperands(MCInst &Inst, unsigned N) const { - assert (N == 1 && "Invalid number of operands!"); - - addExpr(Inst, ImmWithLSL.Val); - } - - template<unsigned field_width, unsigned scale> - void addLabelOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); - - if (!CE) { - addExpr(Inst, Imm.Val); - return; - } - - int64_t Val = CE->getValue(); - assert(Val % scale == 0 && "Unaligned immediate in instruction"); - Val /= scale; - - Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1))); - } - - template<int MemSize> - void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { - Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize)); - } else { - Inst.addOperand(MCOperand::CreateExpr(getImm())); - } - } - - template<unsigned RegWidth> - void addLogicalImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands"); - const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val); - - uint32_t Bits; - A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); - - Inst.addOperand(MCOperand::CreateImm(Bits)); - } - - void addMRSOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); - } - - void addMSRWithRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); - } - - void addMSRPStateOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); - } - - void addMoveWideImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && "Invalid number of operands!"); - - addExpr(Inst, ImmWithLSL.Val); - - AArch64MCExpr::VariantKind Variant; - if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) { - Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16)); - return; - } - - // We know it's relocated - switch (Variant) { - case AArch64MCExpr::VK_AARCH64_ABS_G0: - case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G0: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: - case AArch64MCExpr::VK_AARCH64_TPREL_G0: - case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: - Inst.addOperand(MCOperand::CreateImm(0)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G1: - case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: - Inst.addOperand(MCOperand::CreateImm(1)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G2: - case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - Inst.addOperand(MCOperand::CreateImm(2)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G3: - Inst.addOperand(MCOperand::CreateImm(3)); - break; - default: llvm_unreachable("Inappropriate move wide relocation"); - } - } - - template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)> - void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && "Invalid number of operands!"); - int UImm16, Shift; - - const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); - uint64_t Value = CE->getValue(); - - if (RegWidth == 32) { - Value &= 0xffffffffULL; - } - - bool Valid = isValidImm(RegWidth, Value, UImm16, Shift); - (void)Valid; - assert(Valid && "Invalid immediates should have been weeded out by now"); - - Inst.addOperand(MCOperand::CreateImm(UImm16)); - Inst.addOperand(MCOperand::CreateImm(Shift)); - } - - void addPRFMOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); - assert(CE->getValue() >= 0 && CE->getValue() <= 31 - && "PRFM operand should be 5-bits"); - - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - } - - // For Add-sub (extended register) operands. - void addRegExtendOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); - } - - // For Vector Immediates shifted imm operands. - void addNeonMovImmShiftLSLOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - if (ShiftExtend.Amount % 8 != 0 || ShiftExtend.Amount > 24) - llvm_unreachable("Invalid shift amount for vector immediate inst."); - - // Encode LSL shift amount 0, 8, 16, 24 as 0, 1, 2, 3. - int64_t Imm = ShiftExtend.Amount / 8; - Inst.addOperand(MCOperand::CreateImm(Imm)); - } - - void addNeonMovImmShiftLSLHOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - if (ShiftExtend.Amount != 0 && ShiftExtend.Amount != 8) - llvm_unreachable("Invalid shift amount for vector immediate inst."); - - // Encode LSLH shift amount 0, 8 as 0, 1. - int64_t Imm = ShiftExtend.Amount / 8; - Inst.addOperand(MCOperand::CreateImm(Imm)); - } - - void addNeonMovImmShiftMSLOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - if (ShiftExtend.Amount != 8 && ShiftExtend.Amount != 16) - llvm_unreachable("Invalid shift amount for vector immediate inst."); - - // Encode MSL shift amount 8, 16 as 0, 1. - int64_t Imm = ShiftExtend.Amount / 8 - 1; - Inst.addOperand(MCOperand::CreateImm(Imm)); - } - - // For the extend in load-store (register offset) instructions. - template<unsigned MemSize> - void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { - addAddrRegExtendOperands(Inst, N, MemSize); - } - - void addAddrRegExtendOperands(MCInst &Inst, unsigned N, - unsigned MemSize) const { - assert(N == 1 && "Invalid number of operands!"); - - // First bit of Option is set in instruction classes, the high two bits are - // as follows: - unsigned OptionHi = 0; - switch (ShiftExtend.ShiftType) { - case A64SE::UXTW: - case A64SE::LSL: - OptionHi = 1; - break; - case A64SE::SXTW: - case A64SE::SXTX: - OptionHi = 3; - break; - default: - llvm_unreachable("Invalid extend type for register offset"); - } - - unsigned S = 0; - if (MemSize == 1 && !ShiftExtend.ImplicitAmount) - S = 1; - else if (MemSize != 1 && ShiftExtend.Amount != 0) - S = 1; - - Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S)); - } - void addShiftOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); - } - - void addNeonUImm64MaskOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - // A bit from each byte in the constant forms the encoded immediate - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - uint64_t Value = CE->getValue(); - - unsigned Imm = 0; - for (unsigned i = 0; i < 8; ++i, Value >>= 8) { - Imm |= (Value & 1) << i; - } - Inst.addOperand(MCOperand::CreateImm(Imm)); - } - - void addVectorListOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } -}; - -} // end anonymous namespace. - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic) { - - // See if the operand has a custom parser - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); - - // It could either succeed, fail or just not care. - if (ResTy != MatchOperand_NoMatch) - return ResTy; - - switch (getLexer().getKind()) { - default: - Error(Parser.getTok().getLoc(), "unexpected token in operand"); - return MatchOperand_ParseFail; - case AsmToken::Identifier: { - // It might be in the LSL/UXTB family ... - OperandMatchResultTy GotShift = ParseShiftExtend(Operands); - - // We can only continue if no tokens were eaten. - if (GotShift != MatchOperand_NoMatch) - return GotShift; - - // ... or it might be a register ... - uint32_t NumLanes = 0; - OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes); - assert(GotReg != MatchOperand_ParseFail - && "register parsing shouldn't partially succeed"); - - if (GotReg == MatchOperand_Success) { - if (Parser.getTok().is(AsmToken::LBrac)) - return ParseNEONLane(Operands, NumLanes); - else - return MatchOperand_Success; - } - // ... or it might be a symbolish thing - } - // Fall through - case AsmToken::LParen: // E.g. (strcmp-4) - case AsmToken::Integer: // 1f, 2b labels - case AsmToken::String: // quoted labels - case AsmToken::Dot: // . is Current location - case AsmToken::Dollar: // $ is PC - case AsmToken::Colon: { - SMLoc StartLoc = Parser.getTok().getLoc(); - SMLoc EndLoc; - const MCExpr *ImmVal = nullptr; - - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; - - EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); - return MatchOperand_Success; - } - case AsmToken::Hash: { // Immediates - SMLoc StartLoc = Parser.getTok().getLoc(); - SMLoc EndLoc; - const MCExpr *ImmVal = nullptr; - Parser.Lex(); - - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; - - EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); - return MatchOperand_Success; - } - case AsmToken::LBrac: { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("[", Loc)); - Parser.Lex(); // Eat '[' - - // There's no comma after a '[', so we can parse the next operand - // immediately. - return ParseOperand(Operands, Mnemonic); - } - // The following will likely be useful later, but not in very early cases - case AsmToken::LCurly: // SIMD vector list is not parsed here - llvm_unreachable("Don't know how to deal with '{' in operand"); - return MatchOperand_ParseFail; - } -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { - if (getLexer().is(AsmToken::Colon)) { - AArch64MCExpr::VariantKind RefKind; - - OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind); - if (ResTy != MatchOperand_Success) - return ResTy; - - const MCExpr *SubExprVal; - if (getParser().parseExpression(SubExprVal)) - return MatchOperand_ParseFail; - - ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext()); - return MatchOperand_Success; - } - - // No weird AArch64MCExpr prefix - return getParser().parseExpression(ExprVal) - ? MatchOperand_ParseFail : MatchOperand_Success; -} - -// A lane attached to a NEON register. "[N]", which should yield three tokens: -// '[', N, ']'. A hash is not allowed to precede the immediate here. -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - uint32_t NumLanes) { - SMLoc Loc = Parser.getTok().getLoc(); - - assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand"); - Operands.push_back(AArch64Operand::CreateToken("[", Loc)); - Parser.Lex(); // Eat '[' - - if (Parser.getTok().isNot(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), "expected lane number"); - return MatchOperand_ParseFail; - } - - if (Parser.getTok().getIntVal() >= NumLanes) { - Error(Parser.getTok().getLoc(), "lane number incompatible with layout"); - return MatchOperand_ParseFail; - } - - const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(), - getContext()); - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat actual lane - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImm(Lane, S, E)); - - - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(Parser.getTok().getLoc(), "expected ']' after lane"); - return MatchOperand_ParseFail; - } - - Operands.push_back(AArch64Operand::CreateToken("]", Loc)); - Parser.Lex(); // Eat ']' - - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) { - assert(getLexer().is(AsmToken::Colon) && "expected a ':'"); - Parser.Lex(); - - if (getLexer().isNot(AsmToken::Identifier)) { - Error(Parser.getTok().getLoc(), - "expected relocation specifier in operand after ':'"); - return MatchOperand_ParseFail; - } - - std::string LowerCase = Parser.getTok().getIdentifier().lower(); - RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase) - .Case("got", AArch64MCExpr::VK_AARCH64_GOT) - .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12) - .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12) - .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0) - .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC) - .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1) - .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC) - .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2) - .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC) - .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3) - .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0) - .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1) - .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2) - .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2) - .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1) - .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC) - .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0) - .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC) - .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12) - .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12) - .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC) - .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1) - .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC) - .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL) - .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12) - .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2) - .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1) - .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC) - .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0) - .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC) - .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12) - .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12) - .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC) - .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC) - .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12) - .Default(AArch64MCExpr::VK_AARCH64_None); - - if (RefKind == AArch64MCExpr::VK_AARCH64_None) { - Error(Parser.getTok().getLoc(), - "expected relocation specifier in operand after ':'"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat identifier - - if (getLexer().isNot(AsmToken::Colon)) { - Error(Parser.getTok().getLoc(), - "expected ':' after relocation specifier"); - return MatchOperand_ParseFail; - } - Parser.Lex(); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseImmWithLSLOperand( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - - SMLoc S = Parser.getTok().getLoc(); - - if (Parser.getTok().is(AsmToken::Hash)) - Parser.Lex(); // Eat '#' - else if (Parser.getTok().isNot(AsmToken::Integer)) - // Operand should start from # or should be integer, emit error otherwise. - return MatchOperand_NoMatch; - - const MCExpr *Imm; - if (ParseImmediate(Imm) != MatchOperand_Success) - return MatchOperand_ParseFail; - else if (Parser.getTok().isNot(AsmToken::Comma)) { - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E)); - return MatchOperand_Success; - } - - // Eat ',' - Parser.Lex(); - - // The optional operand must be "lsl #N" where N is non-negative. - if (Parser.getTok().is(AsmToken::Identifier) - && Parser.getTok().getIdentifier().equals_lower("lsl")) { - Parser.Lex(); - - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); - - if (Parser.getTok().isNot(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); - return MatchOperand_ParseFail; - } - } - } - - int64_t ShiftAmount = Parser.getTok().getIntVal(); - - if (ShiftAmount < 0) { - Error(Parser.getTok().getLoc(), "positive shift amount required"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat the number - - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount, - false, S, E)); - return MatchOperand_Success; -} - - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseCondCodeOperand( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - if (Parser.getTok().isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - StringRef Tok = Parser.getTok().getIdentifier(); - A64CC::CondCodes CondCode = A64StringToCondCode(Tok); - - if (CondCode == A64CC::Invalid) - return MatchOperand_NoMatch; - - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat condition code - SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E)); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseCRxOperand( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - SMLoc S = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Identifier)) { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; - } - - StringRef Tok = Parser.getTok().getIdentifier(); - if (Tok[0] != 'c' && Tok[0] != 'C') { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; - } - - uint32_t CRNum; - bool BadNum = Tok.drop_front().getAsInteger(10, CRNum); - if (BadNum || CRNum > 15) { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; - } - - const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext()); - - Parser.Lex(); - SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E)); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseFPImmOperand( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - - SMLoc S = Parser.getTok().getLoc(); - - bool Hash = false; - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat '#' - Hash = true; - } - - bool Negative = false; - if (Parser.getTok().is(AsmToken::Minus)) { - Negative = true; - Parser.Lex(); // Eat '-' - } else if (Parser.getTok().is(AsmToken::Plus)) { - Parser.Lex(); // Eat '+' - } - - if (Parser.getTok().isNot(AsmToken::Real)) { - if (!Hash) - return MatchOperand_NoMatch; - Error(S, "Expected floating-point immediate"); - return MatchOperand_ParseFail; - } - - APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString()); - if (Negative) RealVal.changeSign(); - double DblVal = RealVal.convertToDouble(); - - Parser.Lex(); // Eat real number - SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E)); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseFPImm0AndImm0Operand( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - - SMLoc S = Parser.getTok().getLoc(); - - bool Hash = false; - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat '#' - Hash = true; - } - - APFloat RealVal(0.0); - if (Parser.getTok().is(AsmToken::Real)) { - if(Parser.getTok().getString() != "0.0") { - Error(S, "only #0.0 is acceptable as immediate"); - return MatchOperand_ParseFail; - } - } - else if (Parser.getTok().is(AsmToken::Integer)) { - if(Parser.getTok().getIntVal() != 0) { - Error(S, "only #0.0 is acceptable as immediate"); - return MatchOperand_ParseFail; - } - } - else { - if (!Hash) - return MatchOperand_NoMatch; - Error(S, "only #0.0 is acceptable as immediate"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat real number - SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateFPImm(0.0, S, E)); - return MatchOperand_Success; -} - -// Automatically generated -static unsigned MatchRegisterName(StringRef Name); - -bool -AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, - StringRef &Layout, - SMLoc &LayoutLoc) const { - const AsmToken &Tok = Parser.getTok(); - - if (Tok.isNot(AsmToken::Identifier)) - return false; - - std::string LowerReg = Tok.getString().lower(); - size_t DotPos = LowerReg.find('.'); - - bool IsVec128 = false; - SMLoc S = Tok.getLoc(); - RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); - - if (DotPos == std::string::npos) { - Layout = StringRef(); - } else { - // Everything afterwards needs to be a literal token, expected to be - // '.2d','.b' etc for vector registers. - - // This StringSwitch validates the input and (perhaps more importantly) - // gives us a permanent string to use in the token (a pointer into LowerReg - // would go out of scope when we return). - LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); - StringRef LayoutText = StringRef(LowerReg).substr(DotPos); - - // See if it's a 128-bit layout first. - Layout = StringSwitch<const char *>(LayoutText) - .Case(".q", ".q").Case(".1q", ".1q") - .Case(".d", ".d").Case(".2d", ".2d") - .Case(".s", ".s").Case(".4s", ".4s") - .Case(".h", ".h").Case(".8h", ".8h") - .Case(".b", ".b").Case(".16b", ".16b") - .Default(""); - - if (Layout.size() != 0) - IsVec128 = true; - else { - Layout = StringSwitch<const char *>(LayoutText) - .Case(".1d", ".1d") - .Case(".2s", ".2s") - .Case(".4h", ".4h") - .Case(".8b", ".8b") - .Default(""); - } - - if (Layout.size() == 0) { - // If we've still not pinned it down the register is malformed. - return false; - } - } - - RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); - if (RegNum == AArch64::NoRegister) { - RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos)) - .Case("ip0", AArch64::X16) - .Case("ip1", AArch64::X17) - .Case("fp", AArch64::X29) - .Case("lr", AArch64::X30) - .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0) - .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1) - .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2) - .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3) - .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4) - .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5) - .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6) - .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7) - .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8) - .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9) - .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10) - .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11) - .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12) - .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13) - .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14) - .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15) - .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16) - .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17) - .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18) - .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19) - .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20) - .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21) - .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22) - .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23) - .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24) - .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25) - .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26) - .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27) - .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28) - .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29) - .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30) - .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31) - .Default(AArch64::NoRegister); - } - if (RegNum == AArch64::NoRegister) - return false; - - return true; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - uint32_t &NumLanes) { - unsigned RegNum; - StringRef Layout; - SMLoc RegEndLoc, LayoutLoc; - SMLoc S = Parser.getTok().getLoc(); - - if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) - return MatchOperand_NoMatch; - - Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc)); - - if (Layout.size() != 0) { - unsigned long long TmpLanes = 0; - llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes); - if (TmpLanes != 0) { - NumLanes = TmpLanes; - } else { - // If the number of lanes isn't specified explicitly, a valid instruction - // will have an element specifier and be capable of acting on the entire - // vector register. - switch (Layout.back()) { - default: llvm_unreachable("Invalid layout specifier"); - case 'b': NumLanes = 16; break; - case 'h': NumLanes = 8; break; - case 's': NumLanes = 4; break; - case 'd': NumLanes = 2; break; - case 'q': NumLanes = 1; break; - } - } - - Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc)); - } - - Parser.Lex(); - return MatchOperand_Success; -} - -bool -AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { - // This callback is used for things like DWARF frame directives in - // assembly. They don't care about things like NEON layouts or lanes, they - // just want to be able to produce the DWARF register number. - StringRef LayoutSpec; - SMLoc RegEndLoc, LayoutLoc; - StartLoc = Parser.getTok().getLoc(); - - if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc)) - return true; - - Parser.Lex(); - EndLoc = Parser.getTok().getLoc(); - - return false; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Since these operands occur in very limited circumstances, without - // alternatives, we actually signal an error if there is no match. If relaxing - // this, beware of unintended consequences: an immediate will be accepted - // during matching, no matter how it gets into the AArch64Operand. - const AsmToken &Tok = Parser.getTok(); - SMLoc S = Tok.getLoc(); - - if (Tok.is(AsmToken::Identifier)) { - bool ValidName; - uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName); - - if (!ValidName) { - Error(S, "operand specifier not recognised"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // We're done with the identifier. Eat it - - SMLoc E = Parser.getTok().getLoc(); - const MCExpr *Imm = MCConstantExpr::Create(Code, getContext()); - Operands.push_back(AArch64Operand::CreateImm(Imm, S, E)); - return MatchOperand_Success; - } else if (Tok.is(AsmToken::Hash)) { - Parser.Lex(); - - const MCExpr *ImmVal; - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); - if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) { - Error(S, "Invalid immediate for instruction"); - return MatchOperand_ParseFail; - } - - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E)); - return MatchOperand_Success; - } - - Error(S, "unexpected operand for instruction"); - return MatchOperand_ParseFail; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseSysRegOperand( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - const AsmToken &Tok = Parser.getTok(); - - // Any MSR/MRS operand will be an identifier, and we want to store it as some - // kind of string: SPSel is valid for two different forms of MSR with two - // different encodings. There's no collision at the moment, but the potential - // is there. - if (!Tok.is(AsmToken::Identifier)) { - return MatchOperand_NoMatch; - } - - SMLoc S = Tok.getLoc(); - Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S)); - Parser.Lex(); // Eat identifier - - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseLSXAddressOperand( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - SMLoc S = Parser.getTok().getLoc(); - - unsigned RegNum; - SMLoc RegEndLoc, LayoutLoc; - StringRef Layout; - if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc) - || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum) - || Layout.size() != 0) { - // Check Layout.size because we don't want to let "x3.4s" or similar - // through. - return MatchOperand_NoMatch; - } - Parser.Lex(); // Eat register - - if (Parser.getTok().is(AsmToken::RBrac)) { - // We're done - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); - return MatchOperand_Success; - } - - // Otherwise, only ", #0" is valid - - if (Parser.getTok().isNot(AsmToken::Comma)) { - Error(Parser.getTok().getLoc(), "expected ',' or ']' after register"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat ',' - - if (Parser.getTok().isNot(AsmToken::Hash)) { - Error(Parser.getTok().getLoc(), "expected '#0'"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat '#' - - if (Parser.getTok().isNot(AsmToken::Integer) - || Parser.getTok().getIntVal() != 0 ) { - Error(Parser.getTok().getLoc(), "expected '#0'"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat '0' - - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseShiftExtend( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - StringRef IDVal = Parser.getTok().getIdentifier(); - std::string LowerID = IDVal.lower(); - - A64SE::ShiftExtSpecifiers Spec = - StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID) - .Case("lsl", A64SE::LSL) - .Case("msl", A64SE::MSL) - .Case("lsr", A64SE::LSR) - .Case("asr", A64SE::ASR) - .Case("ror", A64SE::ROR) - .Case("uxtb", A64SE::UXTB) - .Case("uxth", A64SE::UXTH) - .Case("uxtw", A64SE::UXTW) - .Case("uxtx", A64SE::UXTX) - .Case("sxtb", A64SE::SXTB) - .Case("sxth", A64SE::SXTH) - .Case("sxtw", A64SE::SXTW) - .Case("sxtx", A64SE::SXTX) - .Default(A64SE::Invalid); - - if (Spec == A64SE::Invalid) - return MatchOperand_NoMatch; - - // Eat the shift - SMLoc S, E; - S = Parser.getTok().getLoc(); - Parser.Lex(); - - if (Spec != A64SE::LSL && Spec != A64SE::LSR && Spec != A64SE::ASR && - Spec != A64SE::ROR && Spec != A64SE::MSL) { - // The shift amount can be omitted for the extending versions, but not real - // shifts: - // add x0, x0, x0, uxtb - // is valid, and equivalent to - // add x0, x0, x0, uxtb #0 - - if (Parser.getTok().is(AsmToken::Comma) || - Parser.getTok().is(AsmToken::EndOfStatement) || - Parser.getTok().is(AsmToken::RBrac)) { - Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, - S, E)); - return MatchOperand_Success; - } - } - - // Eat # at beginning of immediate - if (!Parser.getTok().is(AsmToken::Hash)) { - Error(Parser.getTok().getLoc(), - "expected #imm after shift specifier"); - return MatchOperand_ParseFail; - } - Parser.Lex(); - - // Make sure we do actually have a number - if (!Parser.getTok().is(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), - "expected integer shift amount"); - return MatchOperand_ParseFail; - } - unsigned Amount = Parser.getTok().getIntVal(); - Parser.Lex(); - E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, - S, E)); - - return MatchOperand_Success; -} - -/// Try to parse a vector register token, If it is a vector register, -/// the token is eaten and return true. Otherwise return false. -bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, - StringRef &Layout, SMLoc &LayoutLoc) { - bool IsVector = true; - - if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) - IsVector = false; - else if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID] - .contains(RegNum) && - !AArch64MCRegisterClasses[AArch64::FPR128RegClassID] - .contains(RegNum)) - IsVector = false; - else if (Layout.size() == 0) - IsVector = false; - - if (!IsVector) - Error(Parser.getTok().getLoc(), "expected vector type register"); - - Parser.Lex(); // Eat this token. - return IsVector; -} - - -// A vector list contains 1-4 consecutive registers. -// Now there are two kinds of vector list when number of vector > 1: -// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} -// (2) {Vn.layout - Vm.layout} -// If the layout is like .b/.h/.s/.d, also parse the lane. -AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList( - SmallVectorImpl<MCParsedAsmOperand *> &Operands) { - if (Parser.getTok().isNot(AsmToken::LCurly)) { - Error(Parser.getTok().getLoc(), "'{' expected"); - return MatchOperand_ParseFail; - } - SMLoc SLoc = Parser.getTok().getLoc(); - Parser.Lex(); // Eat '{' token. - - unsigned Reg, Count = 1; - StringRef LayoutStr; - SMLoc RegEndLoc, LayoutLoc; - if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc)) - return MatchOperand_ParseFail; - - if (Parser.getTok().is(AsmToken::Minus)) { - Parser.Lex(); // Eat the minus. - - unsigned Reg2; - StringRef LayoutStr2; - SMLoc RegEndLoc2, LayoutLoc2; - SMLoc RegLoc2 = Parser.getTok().getLoc(); - - if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) - return MatchOperand_ParseFail; - unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg); - - if (LayoutStr != LayoutStr2) { - Error(LayoutLoc2, "expected the same vector layout"); - return MatchOperand_ParseFail; - } - if (Space == 0 || Space > 3) { - Error(RegLoc2, "invalid number of vectors"); - return MatchOperand_ParseFail; - } - - Count += Space; - } else { - unsigned LastReg = Reg; - while (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - unsigned Reg2; - StringRef LayoutStr2; - SMLoc RegEndLoc2, LayoutLoc2; - SMLoc RegLoc2 = Parser.getTok().getLoc(); - - if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) - return MatchOperand_ParseFail; - unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg) - : (Reg2 + 32 - LastReg); - Count++; - - // The space between two vectors should be 1. And they should have the same layout. - // Total count shouldn't be great than 4 - if (Space != 1) { - Error(RegLoc2, "invalid space between two vectors"); - return MatchOperand_ParseFail; - } - if (LayoutStr != LayoutStr2) { - Error(LayoutLoc2, "expected the same vector layout"); - return MatchOperand_ParseFail; - } - if (Count > 4) { - Error(RegLoc2, "invalid number of vectors"); - return MatchOperand_ParseFail; - } - - LastReg = Reg2; - } - } - - if (Parser.getTok().isNot(AsmToken::RCurly)) { - Error(Parser.getTok().getLoc(), "'}' expected"); - return MatchOperand_ParseFail; - } - SMLoc ELoc = Parser.getTok().getLoc(); - Parser.Lex(); // Eat '}' token. - - A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr); - if (Count > 1) { // If count > 1, create vector list using super register. - bool IsVec64 = (Layout < A64Layout::VL_16B); - static unsigned SupRegIDs[3][2] = { - { AArch64::QPairRegClassID, AArch64::DPairRegClassID }, - { AArch64::QTripleRegClassID, AArch64::DTripleRegClassID }, - { AArch64::QQuadRegClassID, AArch64::DQuadRegClassID } - }; - unsigned SupRegID = SupRegIDs[Count - 2][static_cast<int>(IsVec64)]; - unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; - const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - Reg = MRI->getMatchingSuperReg(Reg, Sub0, - &AArch64MCRegisterClasses[SupRegID]); - } - Operands.push_back( - AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc)); - - if (Parser.getTok().is(AsmToken::LBrac)) { - uint32_t NumLanes = 0; - switch(Layout) { - case A64Layout::VL_B : NumLanes = 16; break; - case A64Layout::VL_H : NumLanes = 8; break; - case A64Layout::VL_S : NumLanes = 4; break; - case A64Layout::VL_D : NumLanes = 2; break; - default: - SMLoc Loc = getLexer().getLoc(); - Error(Loc, "expected comma before next operand"); - return MatchOperand_ParseFail; - } - return ParseNEONLane(Operands, NumLanes); - } else { - return MatchOperand_Success; - } -} - -// FIXME: We would really like to be able to tablegen'erate this. -bool AArch64AsmParser:: -validateInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - switch (Inst.getOpcode()) { - case AArch64::BFIwwii: - case AArch64::BFIxxii: - case AArch64::SBFIZwwii: - case AArch64::SBFIZxxii: - case AArch64::UBFIZwwii: - case AArch64::UBFIZxxii: { - unsigned ImmOps = Inst.getNumOperands() - 2; - int64_t ImmR = Inst.getOperand(ImmOps).getImm(); - int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); - - if (ImmR != 0 && ImmS >= ImmR) { - return Error(Operands[4]->getStartLoc(), - "requested insert overflows register"); - } - return false; - } - case AArch64::BFXILwwii: - case AArch64::BFXILxxii: - case AArch64::SBFXwwii: - case AArch64::SBFXxxii: - case AArch64::UBFXwwii: - case AArch64::UBFXxxii: { - unsigned ImmOps = Inst.getNumOperands() - 2; - int64_t ImmR = Inst.getOperand(ImmOps).getImm(); - int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); - int64_t RegWidth = 0; - switch (Inst.getOpcode()) { - case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii: - RegWidth = 64; - break; - case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii: - RegWidth = 32; - break; - } - - if (ImmS >= RegWidth || ImmS < ImmR) { - return Error(Operands[4]->getStartLoc(), - "requested extract overflows register"); - } - return false; - } - case AArch64::ICix: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal); - if (!A64IC::NeedsRegister(ICOp)) { - return Error(Operands[1]->getStartLoc(), - "specified IC op does not use a register"); - } - return false; - } - case AArch64::ICi: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal); - if (A64IC::NeedsRegister(ICOp)) { - return Error(Operands[1]->getStartLoc(), - "specified IC op requires a register"); - } - return false; - } - case AArch64::TLBIix: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal); - if (!A64TLBI::NeedsRegister(TLBIOp)) { - return Error(Operands[1]->getStartLoc(), - "specified TLBI op does not use a register"); - } - return false; - } - case AArch64::TLBIi: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal); - if (A64TLBI::NeedsRegister(TLBIOp)) { - return Error(Operands[1]->getStartLoc(), - "specified TLBI op requires a register"); - } - return false; - } - } - - return false; -} - - -// Parses the instruction *together with* all operands, appending each parsed -// operand to the "Operands" list -bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - StringRef PatchedName = StringSwitch<StringRef>(Name.lower()) - .Case("beq", "b.eq") - .Case("bne", "b.ne") - .Case("bhs", "b.hs") - .Case("bcs", "b.cs") - .Case("blo", "b.lo") - .Case("bcc", "b.cc") - .Case("bmi", "b.mi") - .Case("bpl", "b.pl") - .Case("bvs", "b.vs") - .Case("bvc", "b.vc") - .Case("bhi", "b.hi") - .Case("bls", "b.ls") - .Case("bge", "b.ge") - .Case("blt", "b.lt") - .Case("bgt", "b.gt") - .Case("ble", "b.le") - .Case("bal", "b.al") - .Case("bnv", "b.nv") - .Default(Name); - - size_t CondCodePos = PatchedName.find('.'); - - StringRef Mnemonic = PatchedName.substr(0, CondCodePos); - Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc)); - - if (CondCodePos != StringRef::npos) { - // We have a condition code - SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1); - StringRef CondStr = PatchedName.substr(CondCodePos + 1, StringRef::npos); - A64CC::CondCodes Code; - - Code = A64StringToCondCode(CondStr); - - if (Code == A64CC::Invalid) { - Error(S, "invalid condition code"); - Parser.eatToEndOfStatement(); - return true; - } - - SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos); - - Operands.push_back(AArch64Operand::CreateToken(".", DotL)); - SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3); - Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E)); - } - - // Now we parse the operands of this instruction - if (getLexer().isNot(AsmToken::EndOfStatement)) { - // Read the first operand. - if (ParseOperand(Operands, Mnemonic)) { - Parser.eatToEndOfStatement(); - return true; - } - - while (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - - // Parse and remember the operand. - if (ParseOperand(Operands, Mnemonic)) { - Parser.eatToEndOfStatement(); - return true; - } - - - // After successfully parsing some operands there are two special cases to - // consider (i.e. notional operands not separated by commas). Both are due - // to memory specifiers: - // + An RBrac will end an address for load/store/prefetch - // + An '!' will indicate a pre-indexed operation. - // - // It's someone else's responsibility to make sure these tokens are sane - // in the given context! - if (Parser.getTok().is(AsmToken::RBrac)) { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("]", Loc)); - Parser.Lex(); - } - - if (Parser.getTok().is(AsmToken::Exclaim)) { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("!", Loc)); - Parser.Lex(); - } - } - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "expected comma before next operand"); - } - - // Eat the EndOfStatement - Parser.Lex(); - - return false; -} - -bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { - StringRef IDVal = DirectiveID.getIdentifier(); - if (IDVal == ".hword") - return ParseDirectiveWord(2, DirectiveID.getLoc()); - else if (IDVal == ".word") - return ParseDirectiveWord(4, DirectiveID.getLoc()); - else if (IDVal == ".xword") - return ParseDirectiveWord(8, DirectiveID.getLoc()); - else if (IDVal == ".tlsdesccall") - return ParseDirectiveTLSDescCall(DirectiveID.getLoc()); - - return true; -} - -/// parseDirectiveWord -/// ::= .word [ expression (, expression)* ] -bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - const MCExpr *Value; - if (getParser().parseExpression(Value)) - return false; - - getParser().getStreamer().EmitValue(Value, Size); - - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) { - Error(L, "unexpected token in directive"); - return false; - } - Parser.Lex(); - } - } - - Parser.Lex(); - return false; -} - -// parseDirectiveTLSDescCall: -// ::= .tlsdesccall symbol -bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { - StringRef Name; - if (getParser().parseIdentifier(Name)) { - Error(L, "expected symbol after directive"); - return false; - } - - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); - - MCInst Inst; - Inst.setOpcode(AArch64::TLSDESCCALL); - Inst.addOperand(MCOperand::CreateExpr(Expr)); - - getParser().getStreamer().EmitInstruction(Inst, STI); - return false; -} - - -bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { - MCInst Inst; - unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, - MatchingInlineAsm); - - if (ErrorInfo != ~0U && ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); - - switch (MatchResult) { - default: break; - case Match_Success: - if (validateInstruction(Inst, Operands)) - return true; - - Out.EmitInstruction(Inst, STI); - return false; - case Match_MissingFeature: - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); - return true; - case Match_InvalidOperand: { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { - ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - } - - return Error(ErrorLoc, "invalid operand for instruction"); - } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction"); - - case Match_AddSubRegExtendSmall: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]"); - case Match_AddSubRegExtendLarge: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]"); - case Match_AddSubRegShift32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]"); - case Match_AddSubRegShift64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]"); - case Match_AddSubSecondSource: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register, symbol or integer in range [0, 4095]"); - case Match_CVTFixedPos32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 32]"); - case Match_CVTFixedPos64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 64]"); - case Match_CondCode: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected AArch64 condition code"); - case Match_FPImm: - // Any situation which allows a nontrivial floating-point constant also - // allows a register. - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register or floating-point constant"); - case Match_FPZero: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected floating-point constant #0.0 or invalid register type"); - case Match_Label: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected label or encodable integer pc offset"); - case Match_Lane1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected lane specifier '[1]'"); - case Match_LoadStoreExtend32_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0"); - case Match_LoadStoreExtend32_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1"); - case Match_LoadStoreExtend32_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2"); - case Match_LoadStoreExtend32_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3"); - case Match_LoadStoreExtend32_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtw' with optional shift of #0 or #4"); - case Match_LoadStoreExtend64_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0"); - case Match_LoadStoreExtend64_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #1"); - case Match_LoadStoreExtend64_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #2"); - case Match_LoadStoreExtend64_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #3"); - case Match_LoadStoreExtend64_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #4"); - case Match_LoadStoreSImm7_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 4 in range [-256, 252]"); - case Match_LoadStoreSImm7_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 8 in range [-512, 504]"); - case Match_LoadStoreSImm7_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 16 in range [-1024, 1008]"); - case Match_LoadStoreSImm9: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [-256, 255]"); - case Match_LoadStoreUImm12_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 4095]"); - case Match_LoadStoreUImm12_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 8190]"); - case Match_LoadStoreUImm12_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 16380]"); - case Match_LoadStoreUImm12_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 32760]"); - case Match_LoadStoreUImm12_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 65520]"); - case Match_LogicalSecondSource: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register or logical immediate"); - case Match_MOVWUImm16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected relocated symbol or integer in range [0, 65535]"); - case Match_MRS: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected readable system register"); - case Match_MSR: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected writable system register or pstate"); - case Match_NamedImm_at: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]"); - case Match_NamedImm_dbarrier: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15] or symbolic barrier operand"); - case Match_NamedImm_dc: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic 'dc' operand"); - case Match_NamedImm_ic: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'"); - case Match_NamedImm_isb: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15] or 'sy'"); - case Match_NamedImm_prefetch: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected prefetch hint: p(ld|st|i)l[123](strm|keep)"); - case Match_NamedImm_tlbi: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected translation buffer invalidation operand"); - case Match_UImm16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 65535]"); - case Match_UImm3: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 7]"); - case Match_UImm4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15]"); - case Match_UImm5: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 31]"); - case Match_UImm6: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 63]"); - case Match_UImm7: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 127]"); - case Match_Width32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [<lsb>, 31]"); - case Match_Width64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [<lsb>, 63]"); - case Match_ShrImm8: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 8]"); - case Match_ShrImm16: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 16]"); - case Match_ShrImm32: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 32]"); - case Match_ShrImm64: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 64]"); - case Match_ShlImm8: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 7]"); - case Match_ShlImm16: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15]"); - case Match_ShlImm32: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 31]"); - case Match_ShlImm64: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 63]"); - } - - llvm_unreachable("Implement any new match types added!"); - return true; -} - -void AArch64Operand::print(raw_ostream &OS) const { - switch (Kind) { - case k_CondCode: - OS << "<CondCode: " << CondCode.Code << ">"; - break; - case k_FPImmediate: - OS << "<fpimm: " << FPImm.Val << ">"; - break; - case k_ImmWithLSL: - OS << "<immwithlsl: imm=" << ImmWithLSL.Val - << ", shift=" << ImmWithLSL.ShiftAmount << ">"; - break; - case k_Immediate: - getImm()->print(OS); - break; - case k_Register: - OS << "<register " << getReg() << '>'; - break; - case k_Token: - OS << '\'' << getToken() << '\''; - break; - case k_ShiftExtend: - OS << "<shift: type=" << ShiftExtend.ShiftType - << ", amount=" << ShiftExtend.Amount << ">"; - break; - case k_SysReg: { - StringRef Name(SysReg.Data, SysReg.Length); - OS << "<sysreg: " << Name << '>'; - break; - } - default: - llvm_unreachable("No idea how to print this kind of operand"); - break; - } -} - -void AArch64Operand::dump() const { - print(errs()); -} - - -/// Force static initialization. -extern "C" void LLVMInitializeAArch64AsmParser() { - RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64leTarget); - RegisterMCAsmParser<AArch64AsmParser> Y(TheAArch64beTarget); -} - -#define GET_REGISTER_MATCHER -#define GET_MATCHER_IMPLEMENTATION -#include "AArch64GenAsmMatcher.inc" diff --git a/llvm/lib/Target/AArch64/AsmParser/CMakeLists.txt b/llvm/lib/Target/AArch64/AsmParser/CMakeLists.txt deleted file mode 100644 index e81ec70437a..00000000000 --- a/llvm/lib/Target/AArch64/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64AsmParser - AArch64AsmParser.cpp - ) diff --git a/llvm/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/llvm/lib/Target/AArch64/AsmParser/LLVMBuild.txt deleted file mode 100644 index 2d8f6321237..00000000000 --- a/llvm/lib/Target/AArch64/AsmParser/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64AsmParser -parent = AArch64 -required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCParser Support -add_to_library_groups = AArch64 diff --git a/llvm/lib/Target/AArch64/AsmParser/Makefile b/llvm/lib/Target/AArch64/AsmParser/Makefile deleted file mode 100644 index 56c9ef52ea5..00000000000 --- a/llvm/lib/Target/AArch64/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64AsmParser - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt deleted file mode 100644 index dfc10afcdcf..00000000000 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS AArch64.td) - -tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) -tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv) -tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) -tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) -tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) -tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) -tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel) -tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget) -add_public_tablegen_target(AArch64CommonTableGen) - -add_llvm_target(AArch64CodeGen - AArch64AsmPrinter.cpp - AArch64BranchFixupPass.cpp - AArch64FrameLowering.cpp - AArch64ISelDAGToDAG.cpp - AArch64ISelLowering.cpp - AArch64InstrInfo.cpp - AArch64MachineFunctionInfo.cpp - AArch64MCInstLower.cpp - AArch64RegisterInfo.cpp - AArch64SelectionDAGInfo.cpp - AArch64Subtarget.cpp - AArch64TargetMachine.cpp - AArch64TargetObjectFile.cpp - AArch64TargetTransformInfo.cpp - ) - -add_subdirectory(AsmParser) -add_subdirectory(Disassembler) -add_subdirectory(InstPrinter) -add_subdirectory(MCTargetDesc) -add_subdirectory(TargetInfo) -add_subdirectory(Utils) diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp deleted file mode 100644 index 01f1497dc33..00000000000 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ /dev/null @@ -1,1572 +0,0 @@ -//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions necessary to decode AArch64 instruction -// bitpatterns into MCInsts (with the help of TableGenerated information from -// the instruction definitions). -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64RegisterInfo.h" -#include "AArch64Subtarget.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "arm-disassembler" - -typedef MCDisassembler::DecodeStatus DecodeStatus; - -namespace { -/// AArch64 disassembler for all AArch64 platforms. -class AArch64Disassembler : public MCDisassembler { -public: - /// Initializes the disassembler. - /// - AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) - : MCDisassembler(STI, Ctx) { - } - - ~AArch64Disassembler() {} - - /// See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const override; -}; - -} - -// Forward-declarations used in the auto-generated files. -static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus -DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus -DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, - unsigned OptionHiS, - uint64_t Address, - const void *Decoder); - - -static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, - unsigned RmBits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); - -template<int RegWidth> -static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, - unsigned FullImm, - uint64_t Address, - const void *Decoder); - -template<int RegWidth> -static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, - unsigned Bits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder); -template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf> -static DecodeStatus -DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, - uint64_t Address, const void *Decoder); - -static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -template<typename SomeNamedImmMapper> -static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus -DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, - llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - - -static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static bool Check(DecodeStatus &Out, DecodeStatus In); - -#include "AArch64GenDisassemblerTables.inc" - -static bool Check(DecodeStatus &Out, DecodeStatus In) { - switch (In) { - case MCDisassembler::Success: - // Out stays the same. - return true; - case MCDisassembler::SoftFail: - Out = In; - return true; - case MCDisassembler::Fail: - Out = In; - return false; - } - llvm_unreachable("Invalid DecodeStatus!"); -} - -DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { - CommentStream = &cs; - - uint8_t bytes[4]; - - // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, bytes) == -1) { - Size = 0; - return MCDisassembler::Fail; - } - - // Encoded as a small-endian 32-bit word in the stream. - uint32_t insn = (bytes[3] << 24) | - (bytes[2] << 16) | - (bytes[1] << 8) | - (bytes[0] << 0); - - // Calling the auto-generated decoder function. - DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { - Size = 4; - return result; - } - - MI.clear(); - Size = 0; - return MCDisassembler::Fail; -} - -static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { - const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D); - const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); - return RegInfo->getRegClass(RC).getRegister(RegNo); -} - -static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - - -static DecodeStatus -DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 15) - return MCDisassembler::Fail; - - return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder); -} - -static DecodeStatus -DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 15) - return MCDisassembler::Fail; - - return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder); -} - -static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { - if (RegNo > 30) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR64noxzrRegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo, - unsigned RegID, - const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, RegID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID, - Decoder); -} - -static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID, - Decoder); -} - -static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID, - Decoder); -} - -static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID, - Decoder); -} - -static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID, - Decoder); -} - -static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID, - Decoder); -} - -static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, - unsigned OptionHiS, - uint64_t Address, - const void *Decoder) { - // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1}, - // S}. Hence we want to check bit 1. - if (!(OptionHiS & 2)) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(OptionHiS)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder) { - // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be - // between 0 and 31. - if (Imm6Bits > 31) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder) { - // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32. - if (Imm6Bits < 32) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, - unsigned RmBits, - uint64_t Address, - const void *Decoder) { - // Any bits are valid in the instruction (they're architecturally ignored), - // but a code generator should insert 0. - Inst.addOperand(MCOperand::CreateImm(0)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(8 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(16 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(32 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 7) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 15) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 31) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 63) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; -} - -template<int RegWidth> -static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, - unsigned FullImm, - uint64_t Address, - const void *Decoder) { - unsigned Imm16 = FullImm & 0xffff; - unsigned Shift = FullImm >> 16; - - if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Imm16)); - Inst.addOperand(MCOperand::CreateImm(Shift)); - return MCDisassembler::Success; -} - -template<int RegWidth> -static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, - unsigned Bits, - uint64_t Address, - const void *Decoder) { - uint64_t Imm; - if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm)) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Bits)); - return MCDisassembler::Success; -} - - -static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder) { - // Only values 0-4 are valid for this 3-bit field - if (ShiftAmount > 4) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; -} - -static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder) { - // Only values below 32 are valid for a 32-bit register - if (ShiftAmount > 31) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned ImmS = fieldFromInstruction(Insn, 10, 6); - unsigned ImmR = fieldFromInstruction(Insn, 16, 6); - unsigned SF = fieldFromInstruction(Insn, 31, 1); - - // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise - // out assertions that it thinks should never be hit. - enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc; - Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2); - - if (!SF) { - // ImmR and ImmS must be between 0 and 31 for 32-bit instructions. - if (ImmR > 31 || ImmS > 31) - return MCDisassembler::Fail; - } - - if (SF) { - DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - // BFM MCInsts use Rd as a source too. - if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); - } else { - DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); - // BFM MCInsts use Rd as a source too. - if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); - DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder); - } - - // ASR and LSR have more specific patterns so they won't get here: - assert(!(ImmS == 31 && !SF && Opc != BFM) - && "shift should have used auto decode"); - assert(!(ImmS == 63 && SF && Opc != BFM) - && "shift should have used auto decode"); - - // Extension instructions similarly: - if (Opc == SBFM && ImmR == 0) { - assert((ImmS != 7 && ImmS != 15) && "extension got here"); - assert((ImmS != 31 || SF == 0) && "extension got here"); - } else if (Opc == UBFM && ImmR == 0) { - assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here"); - } - - if (Opc == UBFM) { - // It might be a LSL instruction, which actually takes the shift amount - // itself as an MCInst operand. - if (SF && (ImmS + 1) % 64 == ImmR) { - Inst.setOpcode(AArch64::LSLxxi); - Inst.addOperand(MCOperand::CreateImm(63 - ImmS)); - return MCDisassembler::Success; - } else if (!SF && (ImmS + 1) % 32 == ImmR) { - Inst.setOpcode(AArch64::LSLwwi); - Inst.addOperand(MCOperand::CreateImm(31 - ImmS)); - return MCDisassembler::Success; - } - } - - // Otherwise it's definitely either an extract or an insert depending on which - // of ImmR or ImmS is larger. - unsigned ExtractOp, InsertOp; - switch (Opc) { - default: llvm_unreachable("unexpected instruction trying to decode bitfield"); - case SBFM: - ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii; - InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii; - break; - case BFM: - ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii; - InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii; - break; - case UBFM: - ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii; - InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii; - break; - } - - // Otherwise it's a boring insert or extract - Inst.addOperand(MCOperand::CreateImm(ImmR)); - Inst.addOperand(MCOperand::CreateImm(ImmS)); - - - if (ImmS < ImmR) - Inst.setOpcode(InsertOp); - else - Inst.setOpcode(ExtractOp); - - return MCDisassembler::Success; -} - -static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - // This decoder exists to add the dummy Lane operand to the MCInst, which must - // be 1 in assembly but has no other real manifestation. - unsigned Rd = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); - - if (IsToVec) { - DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); - } else { - DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); - } - - // Add the lane - Inst.addOperand(MCOperand::CreateImm(1)); - - return MCDisassembler::Success; -} - - -static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - DecodeStatus Result = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rt2 = fieldFromInstruction(Insn, 10, 5); - unsigned SImm7 = fieldFromInstruction(Insn, 15, 7); - unsigned L = fieldFromInstruction(Insn, 22, 1); - unsigned V = fieldFromInstruction(Insn, 26, 1); - unsigned Opc = fieldFromInstruction(Insn, 30, 2); - - // Not an official name, but it turns out that bit 23 distinguishes indexed - // from non-indexed operations. - unsigned Indexed = fieldFromInstruction(Insn, 23, 1); - - if (Indexed && L == 0) { - // The MCInst for an indexed store has an out operand and 4 ins: - // Rn_wb, Rt, Rt2, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - // You shouldn't load to the same register twice in an instruction... - if (L && Rt == Rt2) - Result = MCDisassembler::SoftFail; - - // ... or do any operation that writes-back to a transfer register. But note - // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different. - if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn)) - Result = MCDisassembler::SoftFail; - - // Exactly how we decode the MCInst's registers depends on the Opc and V - // fields of the instruction. These also obviously determine the size of the - // operation so we can fill in that information while we're at it. - if (V) { - // The instruction operates on the FP/SIMD registers - switch (Opc) { - default: return MCDisassembler::Fail; - case 0: - DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 1: - DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 2: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder); - break; - } - } else { - switch (Opc) { - default: return MCDisassembler::Fail; - case 0: - DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 1: - assert(L && "unexpected \"store signed\" attempt"); - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 2: - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - } - } - - if (Indexed && L == 1) { - // The MCInst for an indexed load has 3 out operands and an 3 ins: - // Rt, Rt2, Rn_wb, Rt2, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - Inst.addOperand(MCOperand::CreateImm(SImm7)); - - return Result; -} - -static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, - uint32_t Val, - uint64_t Address, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(Val, 0, 5); - unsigned Rn = fieldFromInstruction(Val, 5, 5); - unsigned Rt2 = fieldFromInstruction(Val, 10, 5); - unsigned MemSize = fieldFromInstruction(Val, 30, 2); - - DecodeStatus S = MCDisassembler::Success; - if (Rt == Rt2) S = MCDisassembler::SoftFail; - - switch (MemSize) { - case 2: - if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder))) - return MCDisassembler::Fail; - break; - case 3: - if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder))) - return MCDisassembler::Fail; - break; - default: - llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction"); - } - - if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder))) - return MCDisassembler::Fail; - - return S; -} - -template<typename SomeNamedImmMapper> -static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - SomeNamedImmMapper Mapper; - bool ValidNamed; - Mapper.toString(Val, ValidNamed); - if (ValidNamed || Mapper.validImm(Val)) { - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; - } - - return MCDisassembler::Fail; -} - -static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - bool ValidNamed; - Mapper.toString(Val, ValidNamed); - - Inst.addOperand(MCOperand::CreateImm(Val)); - - return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail; -} - -static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address, - Decoder); -} - -static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address, - Decoder); -} - -static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Imm9 = fieldFromInstruction(Insn, 12, 9); - - unsigned Opc = fieldFromInstruction(Insn, 22, 2); - unsigned V = fieldFromInstruction(Insn, 26, 1); - unsigned Size = fieldFromInstruction(Insn, 30, 2); - - if (Opc == 0 || (V == 1 && Opc == 2)) { - // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - if (V == 0 && (Opc == 2 || Size == 3)) { - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - } else if (V == 0) { - DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); - } else if (V == 1 && (Opc & 2)) { - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - } else { - switch (Size) { - case 0: - DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder); - break; - case 1: - DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - } - } - - if (Opc != 0 && (V != 1 || Opc != 2)) { - // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - Inst.addOperand(MCOperand::CreateImm(Imm9)); - - // N.b. The official documentation says undpredictable if Rt == Rn, but this - // takes place at the architectural rather than encoding level: - // - // "STR xzr, [sp], #4" is perfectly valid. - if (V == 0 && Rt == Rn && Rn != 31) - return MCDisassembler::SoftFail; - else - return MCDisassembler::Success; -} - -static MCDisassembler *createAArch64Disassembler(const Target &T, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new AArch64Disassembler(STI, Ctx); -} - -extern "C" void LLVMInitializeAArch64Disassembler() { - TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget, - createAArch64Disassembler); - TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget, - createAArch64Disassembler); -} - -template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf> -static DecodeStatus -DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, - uint64_t Address, const void *Decoder) { - bool IsLSL = false; - if (Ext == A64SE::LSL) - IsLSL = true; - else if (Ext != A64SE::MSL) - return MCDisassembler::Fail; - - // MSL and LSLH accepts encoded shift amount 0 or 1. - if ((!IsLSL || (IsLSL && IsHalf)) && ShiftAmount != 0 && ShiftAmount != 1) - return MCDisassembler::Fail; - - // LSL accepts encoded shift amount 0, 1, 2 or 3. - if (IsLSL && ShiftAmount > 3) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; -} - -// Decode post-index vector load/store instructions. -// This is necessary as we need to decode Rm: if Rm == 0b11111, the last -// operand is an immediate equal the the length of vector list in bytes, -// or Rm is decoded to a GPR64noxzr register. -static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rm = fieldFromInstruction(Insn, 16, 5); - unsigned Opcode = fieldFromInstruction(Insn, 12, 4); - unsigned IsLoad = fieldFromInstruction(Insn, 22, 1); - // 0 for 64bit vector list, 1 for 128bit vector list - unsigned Is128BitVec = fieldFromInstruction(Insn, 30, 1); - - unsigned NumVecs; - switch (Opcode) { - case 0: // ld4/st4 - case 2: // ld1/st1 with 4 vectors - NumVecs = 4; break; - case 4: // ld3/st3 - case 6: // ld1/st1 with 3 vectors - NumVecs = 3; break; - case 7: // ld1/st1 with 1 vector - NumVecs = 1; break; - case 8: // ld2/st2 - case 10: // ld1/st1 with 2 vectors - NumVecs = 2; break; - default: - llvm_unreachable("Invalid opcode for post-index load/store instructions"); - } - - // Decode vector list of 1/2/3/4 vectors for load instructions. - if (IsLoad) { - switch (NumVecs) { - case 1: - Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder); - break; - } - } - - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - if (Rm == 31) // If Rm is 0x11111, add the vector list length in byte - Inst.addOperand(MCOperand::CreateImm(NumVecs * (Is128BitVec ? 16 : 8))); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); - - // Decode vector list of 1/2/3/4 vectors for load instructions. - if (!IsLoad) { - switch (NumVecs) { - case 1: - Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder); - break; - } - } - - return MCDisassembler::Success; -} - -// Decode post-index vector load/store lane instructions. -// This is necessary as we need to decode Rm: if Rm == 0b11111, the last -// operand is an immediate equal the the length of the changed bytes, -// or Rm is decoded to a GPR64noxzr register. -static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - bool Is64bitVec = false; - bool IsLoadDup = false; - bool IsLoad = false; - // The total number of bytes transferred. - // TransferBytes = NumVecs * OneLaneBytes - unsigned TransferBytes = 0; - unsigned NumVecs = 0; - unsigned Opc = Inst.getOpcode(); - switch (Opc) { - case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register: - case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register: - case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register: - case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: { - switch (Opc) { - case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register: - TransferBytes = 1; break; - case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register: - TransferBytes = 2; break; - case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register: - TransferBytes = 4; break; - case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: - TransferBytes = 8; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 1; - break; - } - - case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register: - case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register: - case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register: - case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: { - switch (Opc) { - case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register: - TransferBytes = 1; break; - case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register: - TransferBytes = 2; break; - case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register: - TransferBytes = 4; break; - case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: - TransferBytes = 8; break; - } - IsLoadDup = true; - NumVecs = 1; - break; - } - - case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register: - case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register: - case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register: - case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: { - switch (Opc) { - case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register: - TransferBytes = 2; break; - case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register: - TransferBytes = 4; break; - case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register: - TransferBytes = 8; break; - case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: - TransferBytes = 16; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 2; - break; - } - - case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register: - case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register: - case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register: - case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: { - switch (Opc) { - case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register: - TransferBytes = 2; break; - case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register: - TransferBytes = 4; break; - case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register: - TransferBytes = 8; break; - case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: - TransferBytes = 16; break; - } - IsLoadDup = true; - NumVecs = 2; - break; - } - - case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register: - case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register: - case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register: - case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: { - switch (Opc) { - case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register: - TransferBytes = 3; break; - case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register: - TransferBytes = 6; break; - case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register: - TransferBytes = 12; break; - case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: - TransferBytes = 24; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 3; - break; - } - - case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register: - case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_8H_register: - case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_4S_register: - case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: { - switch (Opc) { - case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register: - TransferBytes = 3; break; - case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_8H_register: - TransferBytes = 6; break; - case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_4S_register: - TransferBytes = 12; break; - case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: - TransferBytes = 24; break; - } - IsLoadDup = true; - NumVecs = 3; - break; - } - - case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register: - case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register: - case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register: - case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: { - switch (Opc) { - case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register: - TransferBytes = 4; break; - case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register: - TransferBytes = 8; break; - case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register: - TransferBytes = 16; break; - case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: - TransferBytes = 32; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 4; - break; - } - - case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register: - case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_8H_register: - case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_4S_register: - case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: { - switch (Opc) { - case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register: - TransferBytes = 4; break; - case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_8H_register: - TransferBytes = 8; break; - case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_4S_register: - TransferBytes = 16; break; - case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: - TransferBytes = 32; break; - } - IsLoadDup = true; - NumVecs = 4; - break; - } - - case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register: - case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register: - case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register: - case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: { - switch (Opc) { - case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register: - TransferBytes = 1; break; - case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register: - TransferBytes = 2; break; - case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register: - TransferBytes = 4; break; - case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: - TransferBytes = 8; break; - } - IsLoad = true; - NumVecs = 1; - break; - } - - case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register: - case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register: - case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register: - case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: { - switch (Opc) { - case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register: - TransferBytes = 2; break; - case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register: - TransferBytes = 4; break; - case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register: - TransferBytes = 8; break; - case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: - TransferBytes = 16; break; - } - IsLoad = true; - NumVecs = 2; - break; - } - - case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register: - case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register: - case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register: - case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: { - switch (Opc) { - case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register: - TransferBytes = 3; break; - case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register: - TransferBytes = 6; break; - case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register: - TransferBytes = 12; break; - case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: - TransferBytes = 24; break; - } - IsLoad = true; - NumVecs = 3; - break; - } - - case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register: - case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register: - case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register: - case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: { - switch (Opc) { - case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register: - TransferBytes = 4; break; - case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register: - TransferBytes = 8; break; - case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register: - TransferBytes = 16; break; - case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: - TransferBytes = 32; break; - } - IsLoad = true; - NumVecs = 4; - break; - } - - case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register: - case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register: - case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register: - case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: { - switch (Opc) { - case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register: - TransferBytes = 1; break; - case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register: - TransferBytes = 2; break; - case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register: - TransferBytes = 4; break; - case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: - TransferBytes = 8; break; - } - NumVecs = 1; - break; - } - - case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register: - case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register: - case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register: - case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: { - switch (Opc) { - case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register: - TransferBytes = 2; break; - case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register: - TransferBytes = 4; break; - case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register: - TransferBytes = 8; break; - case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: - TransferBytes = 16; break; - } - NumVecs = 2; - break; - } - - case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register: - case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register: - case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register: - case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: { - switch (Opc) { - case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register: - TransferBytes = 3; break; - case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register: - TransferBytes = 6; break; - case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register: - TransferBytes = 12; break; - case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: - TransferBytes = 24; break; - } - NumVecs = 3; - break; - } - - case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register: - case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register: - case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register: - case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: { - switch (Opc) { - case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register: - TransferBytes = 4; break; - case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register: - TransferBytes = 8; break; - case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register: - TransferBytes = 16; break; - case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: - TransferBytes = 32; break; - } - NumVecs = 4; - break; - } - - default: - return MCDisassembler::Fail; - } // End of switch (Opc) - - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rm = fieldFromInstruction(Insn, 16, 5); - - // Decode post-index of load duplicate lane - if (IsLoadDup) { - switch (NumVecs) { - case 1: - Is64bitVec ? DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is64bitVec ? DecodeDPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is64bitVec ? DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is64bitVec ? DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes - Inst.addOperand(MCOperand::CreateImm(TransferBytes)); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); - - return MCDisassembler::Success; - } - - // Decode post-index of load/store lane - // Loads have a vector list as output. - if (IsLoad) { - switch (NumVecs) { - case 1: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - } - - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes - Inst.addOperand(MCOperand::CreateImm(TransferBytes)); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); - - // Decode the source vector list. - switch (NumVecs) { - case 1: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - - // Decode lane - unsigned Q = fieldFromInstruction(Insn, 30, 1); - unsigned S = fieldFromInstruction(Insn, 10, 3); - unsigned lane = 0; - // Calculate the number of lanes by number of vectors and transferred bytes. - // NumLanes = 16 bytes / bytes of each lane - unsigned NumLanes = 16 / (TransferBytes / NumVecs); - switch (NumLanes) { - case 16: // A vector has 16 lanes, each lane is 1 bytes. - lane = (Q << 3) | S; - break; - case 8: - lane = (Q << 2) | (S >> 1); - break; - case 4: - lane = (Q << 1) | (S >> 2); - break; - case 2: - lane = Q; - break; - } - Inst.addOperand(MCOperand::CreateImm(lane)); - - return MCDisassembler::Success; -} - -static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned size = fieldFromInstruction(Insn, 22, 2); - unsigned Q = fieldFromInstruction(Insn, 30, 1); - - DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); - - if(Q) - DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); - else - DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder); - - switch (size) { - case 0: - Inst.addOperand(MCOperand::CreateImm(8)); - break; - case 1: - Inst.addOperand(MCOperand::CreateImm(16)); - break; - case 2: - Inst.addOperand(MCOperand::CreateImm(32)); - break; - default : - return MCDisassembler::Fail; - } - return MCDisassembler::Success; -} - diff --git a/llvm/lib/Target/AArch64/Disassembler/CMakeLists.txt b/llvm/lib/Target/AArch64/Disassembler/CMakeLists.txt deleted file mode 100644 index 21baf250af8..00000000000 --- a/llvm/lib/Target/AArch64/Disassembler/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64Disassembler - AArch64Disassembler.cpp - ) diff --git a/llvm/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/llvm/lib/Target/AArch64/Disassembler/LLVMBuild.txt deleted file mode 100644 index 05c4ed1646b..00000000000 --- a/llvm/lib/Target/AArch64/Disassembler/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64Disassembler -parent = AArch64 -required_libraries = AArch64Info AArch64Utils MC Support -add_to_library_groups = AArch64 diff --git a/llvm/lib/Target/AArch64/Disassembler/Makefile b/llvm/lib/Target/AArch64/Disassembler/Makefile deleted file mode 100644 index 5c861207f83..00000000000 --- a/llvm/lib/Target/AArch64/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64Disassembler - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp deleted file mode 100644 index d9571238a03..00000000000 --- a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ /dev/null @@ -1,549 +0,0 @@ -//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an AArch64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#include "AArch64InstPrinter.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "asm-printer" - -#define GET_INSTRUCTION_NAME -#define PRINT_ALIAS_INSTR -#include "AArch64GenAsmWriter.inc" - -static int64_t unpackSignedImm(int BitWidth, uint64_t Value) { - assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit"); - if (Value & (1ULL << (BitWidth - 1))) - return static_cast<int64_t>(Value) - (1LL << BitWidth); - else - return Value; -} - -AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} - -void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); -} - -void -AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O) { - const MCOperand &MOImm = MI->getOperand(OpNum); - int32_t Imm = unpackSignedImm(9, MOImm.getImm()); - - O << '#' << Imm; -} - -void -AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, unsigned MemSize, - unsigned RmSize) { - unsigned ExtImm = MI->getOperand(OpNum).getImm(); - unsigned OptionHi = ExtImm >> 1; - unsigned S = ExtImm & 1; - bool IsLSL = OptionHi == 1 && RmSize == 64; - - const char *Ext; - switch (OptionHi) { - case 1: - Ext = (RmSize == 32) ? "uxtw" : "lsl"; - break; - case 3: - Ext = (RmSize == 32) ? "sxtw" : "sxtx"; - break; - default: - llvm_unreachable("Incorrect Option on load/store (reg offset)"); - } - O << Ext; - - if (S) { - unsigned ShiftAmt = Log2_32(MemSize); - O << " #" << ShiftAmt; - } else if (IsLSL) { - O << " #0"; - } -} - -void -AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O) { - const MCOperand &Imm12Op = MI->getOperand(OpNum); - - if (Imm12Op.isImm()) { - int64_t Imm12 = Imm12Op.getImm(); - assert(Imm12 >= 0 && "Invalid immediate for add/sub imm"); - O << "#" << Imm12; - } else { - assert(Imm12Op.isExpr() && "Unexpected shift operand type"); - O << "#" << *Imm12Op.getExpr(); - } -} - -void -AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - - printAddSubImmLSL0Operand(MI, OpNum, O); - - O << ", lsl #12"; -} - -void -AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - O << MO.getImm(); -} - -template<unsigned RegWidth> void -AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmROp = MI->getOperand(OpNum); - unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm(); - - O << '#' << LSB; -} - -void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmSOp = MI->getOperand(OpNum); - unsigned Width = ImmSOp.getImm() + 1; - - O << '#' << Width; -} - -void -AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmSOp = MI->getOperand(OpNum); - const MCOperand &ImmROp = MI->getOperand(OpNum - 1); - - unsigned ImmR = ImmROp.getImm(); - unsigned ImmS = ImmSOp.getImm(); - - assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract"); - - O << '#' << (ImmS - ImmR + 1); -} - -void -AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &CRx = MI->getOperand(OpNum); - - O << 'c' << CRx.getImm(); -} - - -void -AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ScaleOp = MI->getOperand(OpNum); - - O << '#' << (64 - ScaleOp.getImm()); -} - - -void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - const MCOperand &MOImm8 = MI->getOperand(OpNum); - - assert(MOImm8.isImm() - && "Immediate operand required for floating-point immediate inst"); - - uint32_t Imm8 = MOImm8.getImm(); - uint32_t Fraction = Imm8 & 0xf; - uint32_t Exponent = (Imm8 >> 4) & 0x7; - uint32_t Negative = (Imm8 >> 7) & 0x1; - - float Val = 1.0f + Fraction / 16.0f; - - // That is: - // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4, - // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0 - if (Exponent & 0x4) { - Val /= 1 << (7 - Exponent); - } else { - Val *= 1 << (Exponent + 1); - } - - Val = Negative ? -Val : Val; - - o << '#' << format("%.8f", Val); -} - -void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - o << "#0.0"; -} - -void -AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm())); -} - -void -AArch64InstPrinter::printInverseCondCodeOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - A64CC::CondCodes CC = - static_cast<A64CC::CondCodes>(MI->getOperand(OpNum).getImm()); - O << A64CondCodeToString(A64InvertCondCode(CC)); -} - -template <unsigned field_width, unsigned scale> void -AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - if (!MO.isImm()) { - printOperand(MI, OpNum, O); - return; - } - - // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which - // is multiplied by 4 (because all A64 instructions are 32-bits wide). - uint64_t UImm = MO.getImm(); - uint64_t Sign = UImm & (1LL << (field_width - 1)); - int64_t SImm = scale * ((UImm & ~Sign) - Sign); - - O << "#" << SImm; -} - -template<unsigned RegWidth> void -AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - uint64_t Val; - A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val); - O << "#0x"; - O.write_hex(Val); -} - -void -AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, int MemSize) { - const MCOperand &MOImm = MI->getOperand(OpNum); - - if (MOImm.isImm()) { - uint32_t Imm = MOImm.getImm() * MemSize; - - O << "#" << Imm; - } else { - O << "#" << *MOImm.getExpr(); - } -} - -void -AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, - A64SE::ShiftExtSpecifiers Shift) { - const MCOperand &MO = MI->getOperand(OpNum); - - // LSL #0 is not printed - if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0) - return; - - switch (Shift) { - case A64SE::LSL: O << "lsl"; break; - case A64SE::LSR: O << "lsr"; break; - case A64SE::ASR: O << "asr"; break; - case A64SE::ROR: O << "ror"; break; - default: llvm_unreachable("Invalid shift specifier in logical instruction"); - } - - O << " #" << MO.getImm(); -} - -void -AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &UImm16MO = MI->getOperand(OpNum); - const MCOperand &ShiftMO = MI->getOperand(OpNum + 1); - - if (UImm16MO.isImm()) { - O << '#' << UImm16MO.getImm(); - - if (ShiftMO.getImm() != 0) - O << ", lsl #" << (ShiftMO.getImm() * 16); - - return; - } - - O << "#" << *UImm16MO.getExpr(); -} - -void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - bool ValidName; - const MCOperand &MO = MI->getOperand(OpNum); - StringRef Name = Mapper.toString(MO.getImm(), ValidName); - - if (ValidName) - O << Name; - else - O << '#' << MO.getImm(); -} - -void -AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - bool ValidName; - std::string Name = Mapper.toString(MO.getImm(), ValidName); - if (ValidName) { - O << Name; - return; - } -} - - -void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O, - A64SE::ShiftExtSpecifiers Ext) { - // FIXME: In principle TableGen should be able to detect this itself far more - // easily. We will only accumulate more of these hacks. - unsigned Reg0 = MI->getOperand(0).getReg(); - unsigned Reg1 = MI->getOperand(1).getReg(); - - if (isStackReg(Reg0) || isStackReg(Reg1)) { - A64SE::ShiftExtSpecifiers LSLEquiv; - - if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP) - LSLEquiv = A64SE::UXTX; - else - LSLEquiv = A64SE::UXTW; - - if (Ext == LSLEquiv) { - O << "lsl #" << MI->getOperand(OpNum).getImm(); - return; - } - } - - switch (Ext) { - case A64SE::UXTB: O << "uxtb"; break; - case A64SE::UXTH: O << "uxth"; break; - case A64SE::UXTW: O << "uxtw"; break; - case A64SE::UXTX: O << "uxtx"; break; - case A64SE::SXTB: O << "sxtb"; break; - case A64SE::SXTH: O << "sxth"; break; - case A64SE::SXTW: O << "sxtw"; break; - case A64SE::SXTX: O << "sxtx"; break; - default: llvm_unreachable("Unexpected shift type for printing"); - } - - const MCOperand &MO = MI->getOperand(OpNum); - if (MO.getImm() != 0) - O << " #" << MO.getImm(); -} - -template<int MemScale> void -AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOImm = MI->getOperand(OpNum); - int32_t Imm = unpackSignedImm(7, MOImm.getImm()); - - O << "#" << (Imm * MemScale); -} - -void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Reg = MI->getOperand(OpNo).getReg(); - std::string Name = getRegisterName(Reg); - Name[0] = 'v'; - O << Name; -} - -void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg); - } else if (Op.isImm()) { - O << '#' << Op.getImm(); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - // If a symbolic branch target was added as a constant expression then print - // that address in hex. - const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); - int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { - O << "0x"; - O.write_hex(Address); - } - else { - // Otherwise, just print the expression. - O << *Op.getExpr(); - } - } -} - - -void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - if (MI->getOpcode() == AArch64::TLSDESCCALL) { - // This is a special assembler directive which applies an - // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed - // form outside the normal TableGenerated scheme. - O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr(); - } else if (!printAliasInstr(MI, O)) - printInstruction(MI, O); - - printAnnotation(O, Annot); -} - -template <A64SE::ShiftExtSpecifiers Ext, bool isHalf> -void AArch64InstPrinter::printNeonMovImmShiftOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - assert(MO.isImm() && - "Immediate operand required for Neon vector immediate inst."); - - bool IsLSL = false; - if (Ext == A64SE::LSL) - IsLSL = true; - else if (Ext != A64SE::MSL) - llvm_unreachable("Invalid shift specifier in movi instruction"); - - int64_t Imm = MO.getImm(); - - // MSL and LSLH accepts encoded shift amount 0 or 1. - if ((!IsLSL || (IsLSL && isHalf)) && Imm != 0 && Imm != 1) - llvm_unreachable("Invalid shift amount in movi instruction"); - - // LSH accepts encoded shift amount 0, 1, 2 or 3. - if (IsLSL && (Imm < 0 || Imm > 3)) - llvm_unreachable("Invalid shift amount in movi instruction"); - - // Print shift amount as multiple of 8 with MSL encoded shift amount - // 0 and 1 printed as 8 and 16. - if (!IsLSL) - Imm++; - Imm *= 8; - - // LSL #0 is not printed - if (IsLSL) { - if (Imm == 0) - return; - O << ", lsl"; - } else - O << ", msl"; - - O << " #" << Imm; -} - -void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - o << "#0x0"; -} - -void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm = MI->getOperand(OpNum); - - assert(MOUImm.isImm() && - "Immediate operand required for Neon vector immediate inst."); - - unsigned Imm = MOUImm.getImm(); - - O << "#0x"; - O.write_hex(Imm); -} - -void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm = MI->getOperand(OpNum); - - assert(MOUImm.isImm() - && "Immediate operand required for Neon vector immediate inst."); - - unsigned Imm = MOUImm.getImm(); - O << Imm; -} - -void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm8 = MI->getOperand(OpNum); - - assert(MOUImm8.isImm() && - "Immediate operand required for Neon vector immediate bytemask inst."); - - uint32_t UImm8 = MOUImm8.getImm(); - uint64_t Mask = 0; - - // Replicates 0x00 or 0xff byte in a 64-bit vector - for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { - if ((UImm8 >> ByteNum) & 1) - Mask |= (uint64_t)0xff << (8 * ByteNum); - } - - O << "#0x"; - O.write_hex(Mask); -} - -// If Count > 1, there are two valid kinds of vector list: -// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} -// (2) {Vn.layout - Vm.layout} -// We choose the first kind as output. -template <A64Layout::VectorLayout Layout, unsigned Count> -void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors"); - - unsigned Reg = MI->getOperand(OpNum).getReg(); - std::string LayoutStr = A64VectorLayoutToString(Layout); - O << "{ "; - if (Count > 1) { // Print sub registers separately - bool IsVec64 = (Layout < A64Layout::VL_16B); - unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; - for (unsigned I = 0; I < Count; I++) { - std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++)); - Name[0] = 'v'; - O << Name << LayoutStr; - if (I != Count - 1) - O << ", "; - } - } else { // Print the register directly when NumVecs is 1. - std::string Name = getRegisterName(Reg); - Name[0] = 'v'; - O << Name << LayoutStr; - } - O << " }"; -} diff --git a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h deleted file mode 100644 index 7432755dd89..00000000000 --- a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ /dev/null @@ -1,186 +0,0 @@ -//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an AArch64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64INSTPRINTER_H -#define LLVM_AARCH64INSTPRINTER_H - -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace llvm { - -class MCOperand; - -class AArch64InstPrinter : public MCInstPrinter { -public: - AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - - // Autogenerated by tblgen - void printInstruction(const MCInst *MI, raw_ostream &O); - bool printAliasInstr(const MCInst *MI, raw_ostream &O); - void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); - - void printRegName(raw_ostream &O, unsigned RegNum) const override; - - template<unsigned MemSize, unsigned RmSize> - void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize); - } - - - void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, unsigned MemSize, - unsigned RmSize); - - void printAddSubImmLSL0Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O); - void printAddSubImmLSL12Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O); - - void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template<unsigned RegWidth> - void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - - void printCondCodeOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printInverseCondCodeOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printCRxOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); - - void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); - - template<int MemScale> - void printOffsetUImm12Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &o) { - printOffsetUImm12Operand(MI, OpNum, o, MemScale); - } - - void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &o, int MemScale); - - template<unsigned field_width, unsigned scale> - void printLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template<unsigned RegWidth> - void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template<typename SomeNamedImmMapper> - void printNamedImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O); - } - - void printNamedImmOperand(const NamedImmMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printMRSOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O); - } - - void printMSROperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O); - } - - void printShiftOperand(const char *name, const MCInst *MI, - unsigned OpIdx, raw_ostream &O); - - void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("lsr", MI, OpNum, O); - } - void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("asr", MI, OpNum, O); - } - void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("ror", MI, OpNum, O); - } - - template<A64SE::ShiftExtSpecifiers Shift> - void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand(MI, OpNum, O, Shift); - } - - void printShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, A64SE::ShiftExtSpecifiers Sh); - - - void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template<int MemSize> void - printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template<A64SE::ShiftExtSpecifiers EXT> - void printRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printRegExtendOperand(MI, OpNum, O, EXT); - } - - void printRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); - - void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; - - bool isStackReg(unsigned RegNo) { - return RegNo == AArch64::XSP || RegNo == AArch64::WSP; - } - - template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf> - void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template <A64Layout::VectorLayout Layout, unsigned Count> - void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); -}; -} - -#endif diff --git a/llvm/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/llvm/lib/Target/AArch64/InstPrinter/CMakeLists.txt deleted file mode 100644 index 3db56e4733f..00000000000 --- a/llvm/lib/Target/AArch64/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64AsmPrinter - AArch64InstPrinter.cpp - ) diff --git a/llvm/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/llvm/lib/Target/AArch64/InstPrinter/LLVMBuild.txt deleted file mode 100644 index 4836c7c45d4..00000000000 --- a/llvm/lib/Target/AArch64/InstPrinter/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64AsmPrinter -parent = AArch64 -required_libraries = AArch64Utils MC Support -add_to_library_groups = AArch64 - diff --git a/llvm/lib/Target/AArch64/InstPrinter/Makefile b/llvm/lib/Target/AArch64/InstPrinter/Makefile deleted file mode 100644 index 1c36a8dea79..00000000000 --- a/llvm/lib/Target/AArch64/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64AsmPrinter - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/AArch64/LLVMBuild.txt b/llvm/lib/Target/AArch64/LLVMBuild.txt deleted file mode 100644 index 1b838282bd4..00000000000 --- a/llvm/lib/Target/AArch64/LLVMBuild.txt +++ /dev/null @@ -1,35 +0,0 @@ -;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils - -[component_0] -type = TargetGroup -name = AArch64 -parent = Target -has_asmparser = 1 -has_asmprinter = 1 -has_disassembler = 1 -has_jit = 1 - -[component_1] -type = Library -name = AArch64CodeGen -parent = AArch64 -required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target -add_to_library_groups = AArch64 diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp deleted file mode 100644 index e0931e42078..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ /dev/null @@ -1,593 +0,0 @@ -//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the MCAsmBackend class, -// which is principally concerned with relaxation of the various fixup kinds. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AArch64FixupKinds.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { -class AArch64AsmBackend : public MCAsmBackend { - const MCSubtargetInfo* STI; -public: - AArch64AsmBackend(const Target &T, const StringRef TT) - : MCAsmBackend(), - STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", "")) - {} - - - ~AArch64AsmBackend() { - delete STI; - } - - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - - virtual void processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; -}; -} // end anonymous namespace - -void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, - uint64_t &Value, bool &IsResolved) { - // The ADRP instruction adds some multiple of 0x1000 to the current PC & - // ~0xfff. This means that the required offset to reach a symbol can vary by - // up to one step depending on where the ADRP is in memory. For example: - // - // ADRP x0, there - // there: - // - // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and - // we'll need that as an offset. At any other address "there" will be in the - // same page as the ADRP and the instruction should encode 0x0. Assuming the - // section isn't 0x1000-aligned, we therefore need to delegate this decision - // to the linker -- a relocation! - if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page) - IsResolved = false; -} - - -static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value); - -namespace { - -class ELFAArch64AsmBackend : public AArch64AsmBackend { - uint8_t OSABI; - bool IsLittle; // Big or little endian -public: - ELFAArch64AsmBackend(const Target &T, const StringRef TT, - uint8_t _OSABI, bool isLittle) - : AArch64AsmBackend(T, TT), OSABI(_OSABI), IsLittle(isLittle) { } - - bool fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override; - - unsigned int getNumFixupKinds() const override { - return AArch64::NumTargetFixupKinds; - } - - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { - const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { -// This table *must* be in the order that the fixup_* kinds are defined in -// AArch64FixupKinds.h. -// -// Name Offset (bits) Size (bits) Flags -{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_add_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst128_lo12", 0, 32, 0 }, -{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_movw_uabs_g0", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g1", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g2", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g3", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g0", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g1", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g2", 0, 32, 0 }, -{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_movw_tprel_g2", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g0", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_add_tprel_hi12", 0, 32, 0 }, -{ "fixup_a64_add_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_call", 0, 0, 0 } - }; - if (Kind < FirstTargetFixupKind) - return MCAsmBackend::getFixupKindInfo(Kind); - - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override { - unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8; - Value = adjustFixupValue(Fixup.getKind(), Value); - if (!Value) return; // Doesn't change encoding. - - unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - - // For each byte of the fragment that the fixup touches, mask in the bits - // from the fixup value. - for (unsigned i = 0; i != NumBytes; ++i) { - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); - } - } - - bool mayNeedRelaxation(const MCInst&) const override { - return false; - } - - void relaxInstruction(const MCInst&, llvm::MCInst&) const override { - llvm_unreachable("Cannot relax instructions"); - } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createAArch64ELFObjectWriter(OS, OSABI, IsLittle); - } -}; - -} // end anonymous namespace - -bool -ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { - // Correct for now. With all instructions 32-bit only very low-level - // considerations could make you select something which may fail. - return false; -} - - -bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // Can't emit NOP with size not multiple of 32-bits - if (Count % 4 != 0) - return false; - - uint64_t NumNops = Count / 4; - for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0xd503201f); - - return true; -} - -static unsigned ADRImmBits(unsigned Value) { - unsigned lo2 = Value & 0x3; - unsigned hi19 = (Value & 0x1fffff) >> 2; - - return (hi19 << 5) | (lo2 << 29); -} - -static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { - switch (Kind) { - default: - llvm_unreachable("Unknown fixup kind!"); - case FK_Data_2: - assert((int64_t)Value >= -32768 && - (int64_t)Value <= 65536 && - "Out of range ABS16 fixup"); - return Value; - case FK_Data_4: - assert((int64_t)Value >= -(1LL << 31) && - (int64_t)Value <= (1LL << 32) - 1 && - "Out of range ABS32 fixup"); - return Value; - case FK_Data_8: - return Value; - - case AArch64::fixup_a64_ld_gottprel_prel19: - // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F - // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20. - case AArch64::fixup_a64_ld_prel: - // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits - // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup"); - return (Value & 0x1ffffc) << 3; - - case AArch64::fixup_a64_adr_prel: - // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of - // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range ADR fixup"); - return ADRImmBits(Value & 0x1fffff); - - case AArch64::fixup_a64_adr_prel_page: - // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF - // F000 of the result of the operation, checking that -2^32 <= result < - // 2^32. - assert((int64_t)Value >= -(1LL << 32) && - (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); - return ADRImmBits((Value & 0x1fffff000ULL) >> 12); - - case AArch64::fixup_a64_add_dtprel_hi12: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits - // FF F000 of DTPREL(S+A), check 0 <= X < 2^24. - case AArch64::fixup_a64_add_tprel_hi12: - // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits - // FF F000 of TPREL(S+A), check 0 <= X < 2^24. - assert((int64_t)Value >= 0 && - (int64_t)Value < (1LL << 24) && "Out of range ADD fixup"); - return (Value & 0xfff000) >> 2; - - case AArch64::fixup_a64_add_dtprel_lo12: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits - // FFF of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_add_tprel_lo12: - // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits - // FFF of TPREL(S+A), check 0 <= X < 2^12. - assert((int64_t)Value >= 0 && - (int64_t)Value < (1LL << 12) && "Out of range ADD fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_add_dtprel_lo12_nc: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits - // FFF of DTPREL(S+A) with no overflow check. - case AArch64::fixup_a64_add_tprel_lo12_nc: - // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits - // FFF of TPREL(S+A) with no overflow check. - case AArch64::fixup_a64_tlsdesc_add_lo12_nc: - // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits - // FFF of G(TLSDESC(S+A)), with no overflow check. - case AArch64::fixup_a64_add_lo12: - // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of - // S+A, with no overflow check. - return (Value & 0xfff) << 10; - - case AArch64::fixup_a64_ldst8_dtprel_lo12: - // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst8_tprel_lo12: - // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst8_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst8_lo12: - // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF - // of S+A, with no overflow check. - return (Value & 0xfff) << 10; - - case AArch64::fixup_a64_ldst16_dtprel_lo12: - // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst16_tprel_lo12: - // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst16_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst16_lo12: - // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE - // of S+A, with no overflow check. - return (Value & 0xffe) << 9; - - case AArch64::fixup_a64_ldst32_dtprel_lo12: - // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst32_tprel_lo12: - // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst32_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst32_lo12: - // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC - // of S+A, with no overflow check. - return (Value & 0xffc) << 8; - - case AArch64::fixup_a64_ldst64_dtprel_lo12: - // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst64_tprel_lo12: - // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst64_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst64_lo12: - // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8 - // of S+A, with no overflow check. - return (Value & 0xff8) << 7; - - case AArch64::fixup_a64_ldst128_lo12: - // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0 - // of S+A, with no overflow check. - return (Value & 0xff0) << 6; - - case AArch64::fixup_a64_movw_uabs_g0: - // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A - // with a check that S+A < 2^16 - assert(Value <= 0xffff && "Out of range move wide fixup"); - return (Value & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g0_nc: - // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of DTPREL(S+A) with no overflow check. - case AArch64::fixup_a64_movw_gottprel_g0_nc: - // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of G(TPREL(S+A)) - GOT with no overflow check. - case AArch64::fixup_a64_movw_tprel_g0_nc: - // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of TPREL(S+A) with no overflow check. - case AArch64::fixup_a64_movw_uabs_g0_nc: - // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of - // S+A with no overflow check. - return (Value & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g1: - // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of - // S+A with a check that S+A < 2^32 - assert(Value <= 0xffffffffull && "Out of range move wide fixup"); - return ((Value >> 16) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g1_nc: - // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field - // to bits FFFF0000 of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_movw_tprel_g1_nc: - // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field - // to bits FFFF0000 of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_movw_uabs_g1_nc: - // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits - // FFFF0000 of S+A with no overflow check. - return ((Value >> 16) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g2: - // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000 - // 0000 of S+A with a check that S+A < 2^48 - assert(Value <= 0xffffffffffffull && "Out of range move wide fixup"); - return ((Value >> 32) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g2_nc: - // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000 - // 0000 of S+A with no overflow check. - return ((Value >> 32) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g3: - // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000 - // 0000 0000 of S+A (no overflow check needed) - return ((Value >> 48) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g0: - // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field - // to bits FFFF of DTPREL(S+A). - case AArch64::fixup_a64_movw_tprel_g0: - // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to - // bits FFFF of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g0: { - // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of - // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we - // should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 16) && Signed < (1LL << 16) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = (Value & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - // MCCodeEmitter should have encoded a MOVN, which is fine. - Value = (~Value & 0xffff) << 5; - } - return Value; - } - - case AArch64::fixup_a64_movw_dtprel_g1: - // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field - // to bits FFFF0000 of DTPREL(S+A). - case AArch64::fixup_a64_movw_gottprel_g1: - // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field - // to bits FFFF0000 of G(TPREL(S+A)) - GOT. - case AArch64::fixup_a64_movw_tprel_g1: - // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to - // bits FFFF0000 of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g1: { - // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000 - // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we - // should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 32) && Signed < (1LL << 32) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = ((Value >> 16) & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - Value = ((~Value >> 16) & 0xffff) << 5; - } - return Value; - } - - case AArch64::fixup_a64_movw_dtprel_g2: - // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field - // to bits FFFF 0000 0000 of DTPREL(S+A). - case AArch64::fixup_a64_movw_tprel_g2: - // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to - // bits FFFF 0000 0000 of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g2: { - // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000 - // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that - // we should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 48) && Signed < (1LL << 48) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = ((Value >> 32) & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - Value = ((~Value >> 32) & 0xffff) << 5; - } - return Value; - } - - case AArch64::fixup_a64_tstbr: - // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to - // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15. - assert((int64_t)Value >= -(1LL << 15) && - (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup"); - return (Value & 0xfffc) << (5 - 2); - - case AArch64::fixup_a64_condbr: - // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch - // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup"); - return (Value & 0x1ffffc) << (5 - 2); - - case AArch64::fixup_a64_uncondbr: - // R_AARCH64_JUMP26 same as below (except to a linker, possibly). - case AArch64::fixup_a64_call: - // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P, - // checking that -2^27 <= S+A-P < 2^27. - assert((int64_t)Value >= -(1LL << 27) && - (int64_t)Value < (1LL << 27) && "Out of range branch fixup"); - return (Value & 0xffffffc) >> 2; - - case AArch64::fixup_a64_adr_gottprel_page: - // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits - // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32. - case AArch64::fixup_a64_tlsdesc_adr_page: - // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000 - // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32. - case AArch64::fixup_a64_adr_prel_got_page: - // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits - // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) < - // 2^32. - assert((int64_t)Value >= -(1LL << 32) && - (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); - return ADRImmBits((Value & 0x1fffff000ULL) >> 12); - - case AArch64::fixup_a64_ld64_gottprel_lo12_nc: - // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8 - // of X, with no overflow check. Check that X & 7 == 0. - case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: - // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of - // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0. - case AArch64::fixup_a64_ld64_got_lo12_nc: - // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of - // G(S) with no overflow check. Check X & 7 == 0 - assert(((int64_t)Value & 7) == 0 && "Misaligned fixup"); - return (Value & 0xff8) << 7; - - case AArch64::fixup_a64_tlsdesc_call: - // R_AARCH64_TLSDESC_CALL: For relaxation only. - return 0; - } -} - -MCAsmBackend * -llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { - Triple TheTriple(TT); - return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS(), /*isLittle*/ true); -} - -MCAsmBackend * -llvm::createAArch64beAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { - Triple TheTriple(TT); - return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS(), /*isLittle*/ false); -} diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp deleted file mode 100644 index a5fe9141e65..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ /dev/null @@ -1,291 +0,0 @@ -//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file handles ELF-specific object emission, converting LLVM's internal -// fixups into the appropriate relocations. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AArch64FixupKinds.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -namespace { -class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { -public: - AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian); - - virtual ~AArch64ELFObjectWriter(); - -protected: - unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const override; - -private: -}; -} - -AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian) - : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, - /*HasRelocationAddend*/ true) -{} - -AArch64ELFObjectWriter::~AArch64ELFObjectWriter() -{} - -unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - unsigned Type; - if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { - default: - llvm_unreachable("Unimplemented fixup -> relocation"); - case FK_Data_8: - return ELF::R_AARCH64_PREL64; - case FK_Data_4: - return ELF::R_AARCH64_PREL32; - case FK_Data_2: - return ELF::R_AARCH64_PREL16; - case AArch64::fixup_a64_ld_prel: - Type = ELF::R_AARCH64_LD_PREL_LO19; - break; - case AArch64::fixup_a64_adr_prel: - Type = ELF::R_AARCH64_ADR_PREL_LO21; - break; - case AArch64::fixup_a64_adr_prel_page: - Type = ELF::R_AARCH64_ADR_PREL_PG_HI21; - break; - case AArch64::fixup_a64_adr_prel_got_page: - Type = ELF::R_AARCH64_ADR_GOT_PAGE; - break; - case AArch64::fixup_a64_tstbr: - Type = ELF::R_AARCH64_TSTBR14; - break; - case AArch64::fixup_a64_condbr: - Type = ELF::R_AARCH64_CONDBR19; - break; - case AArch64::fixup_a64_uncondbr: - Type = ELF::R_AARCH64_JUMP26; - break; - case AArch64::fixup_a64_call: - Type = ELF::R_AARCH64_CALL26; - break; - case AArch64::fixup_a64_adr_gottprel_page: - Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; - break; - case AArch64::fixup_a64_ld_gottprel_prel19: - Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; - break; - case AArch64::fixup_a64_tlsdesc_adr_page: - Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; - break; - } - } else { - switch ((unsigned)Fixup.getKind()) { - default: - llvm_unreachable("Unimplemented fixup -> relocation"); - case FK_Data_8: - return ELF::R_AARCH64_ABS64; - case FK_Data_4: - return ELF::R_AARCH64_ABS32; - case FK_Data_2: - return ELF::R_AARCH64_ABS16; - case AArch64::fixup_a64_add_lo12: - Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ld64_got_lo12_nc: - Type = ELF::R_AARCH64_LD64_GOT_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_lo12: - Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_lo12: - Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_lo12: - Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_lo12: - Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst128_lo12: - Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC; - break; - case AArch64::fixup_a64_movw_uabs_g0: - Type = ELF::R_AARCH64_MOVW_UABS_G0; - break; - case AArch64::fixup_a64_movw_uabs_g0_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G0_NC; - break; - case AArch64::fixup_a64_movw_uabs_g1: - Type = ELF::R_AARCH64_MOVW_UABS_G1; - break; - case AArch64::fixup_a64_movw_uabs_g1_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G1_NC; - break; - case AArch64::fixup_a64_movw_uabs_g2: - Type = ELF::R_AARCH64_MOVW_UABS_G2; - break; - case AArch64::fixup_a64_movw_uabs_g2_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G2_NC; - break; - case AArch64::fixup_a64_movw_uabs_g3: - Type = ELF::R_AARCH64_MOVW_UABS_G3; - break; - case AArch64::fixup_a64_movw_sabs_g0: - Type = ELF::R_AARCH64_MOVW_SABS_G0; - break; - case AArch64::fixup_a64_movw_sabs_g1: - Type = ELF::R_AARCH64_MOVW_SABS_G1; - break; - case AArch64::fixup_a64_movw_sabs_g2: - Type = ELF::R_AARCH64_MOVW_SABS_G2; - break; - - // TLS Local-dynamic block - case AArch64::fixup_a64_movw_dtprel_g2: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; - break; - case AArch64::fixup_a64_movw_dtprel_g1: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; - break; - case AArch64::fixup_a64_movw_dtprel_g1_nc: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; - break; - case AArch64::fixup_a64_movw_dtprel_g0: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; - break; - case AArch64::fixup_a64_movw_dtprel_g0_nc: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; - break; - case AArch64::fixup_a64_add_dtprel_hi12: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; - break; - case AArch64::fixup_a64_add_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; - break; - case AArch64::fixup_a64_add_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; - break; - - // TLS initial-exec block - case AArch64::fixup_a64_movw_gottprel_g1: - Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; - break; - case AArch64::fixup_a64_movw_gottprel_g0_nc: - Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; - break; - case AArch64::fixup_a64_ld64_gottprel_lo12_nc: - Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; - break; - - // TLS local-exec block - case AArch64::fixup_a64_movw_tprel_g2: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; - break; - case AArch64::fixup_a64_movw_tprel_g1: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; - break; - case AArch64::fixup_a64_movw_tprel_g1_nc: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; - break; - case AArch64::fixup_a64_movw_tprel_g0: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; - break; - case AArch64::fixup_a64_movw_tprel_g0_nc: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; - break; - case AArch64::fixup_a64_add_tprel_hi12: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; - break; - case AArch64::fixup_a64_add_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; - break; - case AArch64::fixup_a64_add_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst8_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst16_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst32_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst64_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; - break; - - // TLS general-dynamic block - case AArch64::fixup_a64_tlsdesc_adr_page: - Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; - break; - case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: - Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; - break; - case AArch64::fixup_a64_tlsdesc_add_lo12_nc: - Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; - break; - case AArch64::fixup_a64_tlsdesc_call: - Type = ELF::R_AARCH64_TLSDESC_CALL; - break; - } - } - - return Type; -} - -MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, - bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI, IsLittleEndian); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); -} diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp deleted file mode 100644 index df2cb383758..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ /dev/null @@ -1,161 +0,0 @@ -//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file assembles .s files and emits AArch64 ELF .o object files. Different -// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit -// regions of data and code. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSection.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - -/// Extend the generic ELFStreamer class so that it can emit mapping symbols at -/// the appropriate points in the object files. These symbols are defined in the -/// AArch64 ELF ABI: -/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf -/// -/// In brief: $x or $d should be emitted at the start of each contiguous region -/// of A64 code or data in a section. In practice, this emission does not rely -/// on explicit assembler directives but on inherent properties of the -/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an -/// instruction). -/// -/// As a result this system is orthogonal to the DataRegion infrastructure used -/// by MachO. Beware! -class AArch64ELFStreamer : public MCELFStreamer { -public: - AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) - : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), - LastEMS(EMS_None) {} - - ~AArch64ELFStreamer() {} - - void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) override { - // We have to keep track of the mapping symbol state of any sections we - // use. Each one should start off as EMS_None, which is provided as the - // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection().first] = LastEMS; - LastEMS = LastMappingSymbols.lookup(Section); - - MCELFStreamer::ChangeSection(Section, Subsection); - } - - /// This function is the one used to emit instruction data into the ELF - /// streamer. We override it to add the appropriate mapping symbol if - /// necessary. - void EmitInstruction(const MCInst& Inst, - const MCSubtargetInfo &STI) override { - EmitA64MappingSymbol(); - MCELFStreamer::EmitInstruction(Inst, STI); - } - - /// This is one of the functions used to emit data into an ELF section, so the - /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) - /// if necessary. - void EmitBytes(StringRef Data) override { - EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data); - } - - /// This is one of the functions used to emit data into an ELF section, so the - /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) - /// if necessary. - void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) override { - EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size, Loc); - } - -private: - enum ElfMappingSymbol { - EMS_None, - EMS_A64, - EMS_Data - }; - - void EmitDataMappingSymbol() { - if (LastEMS == EMS_Data) return; - EmitMappingSymbol("$d"); - LastEMS = EMS_Data; - } - - void EmitA64MappingSymbol() { - if (LastEMS == EMS_A64) return; - EmitMappingSymbol("$x"); - LastEMS = EMS_A64; - } - - void EmitMappingSymbol(StringRef Name) { - MCSymbol *Start = getContext().CreateTempSymbol(); - EmitLabel(Start); - - MCSymbol *Symbol = - getContext().GetOrCreateSymbol(Name + "." + - Twine(MappingSymbolCounter++)); - - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetType(SD, ELF::STT_NOTYPE); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); - AssignSection(Symbol, getCurrentSection().first); - - const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); - Symbol->setVariableValue(Value); - } - - int64_t MappingSymbolCounter; - - DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; - ElfMappingSymbol LastEMS; - - /// @} -}; -} - -namespace llvm { - MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack) { - AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); - if (NoExecStack) - S->getAssembler().setNoExecStack(true); - return S; - } -} - - diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h deleted file mode 100644 index 5a89ca50cee..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h +++ /dev/null @@ -1,27 +0,0 @@ -//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF streamer information for the AArch64 backend. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_ELF_STREAMER_H -#define LLVM_AARCH64_ELF_STREAMER_H - -#include "llvm/MC/MCELFStreamer.h" - -namespace llvm { - - MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack); -} - -#endif // AArch64_ELF_STREAMER_H diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h deleted file mode 100644 index eeb122d3849..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h +++ /dev/null @@ -1,113 +0,0 @@ -//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the LLVM fixups applied to MCInsts in the AArch64 -// backend. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H -#define LLVM_AARCH64_AARCH64FIXUPKINDS_H - -#include "llvm/MC/MCFixup.h" - -namespace llvm { - namespace AArch64 { - enum Fixups { - fixup_a64_ld_prel = FirstTargetFixupKind, - fixup_a64_adr_prel, - fixup_a64_adr_prel_page, - - fixup_a64_add_lo12, - - fixup_a64_ldst8_lo12, - fixup_a64_ldst16_lo12, - fixup_a64_ldst32_lo12, - fixup_a64_ldst64_lo12, - fixup_a64_ldst128_lo12, - - fixup_a64_tstbr, - fixup_a64_condbr, - fixup_a64_uncondbr, - fixup_a64_call, - - fixup_a64_movw_uabs_g0, - fixup_a64_movw_uabs_g0_nc, - fixup_a64_movw_uabs_g1, - fixup_a64_movw_uabs_g1_nc, - fixup_a64_movw_uabs_g2, - fixup_a64_movw_uabs_g2_nc, - fixup_a64_movw_uabs_g3, - - fixup_a64_movw_sabs_g0, - fixup_a64_movw_sabs_g1, - fixup_a64_movw_sabs_g2, - - fixup_a64_adr_prel_got_page, - fixup_a64_ld64_got_lo12_nc, - - // Produce offsets relative to the module's dynamic TLS area. - fixup_a64_movw_dtprel_g2, - fixup_a64_movw_dtprel_g1, - fixup_a64_movw_dtprel_g1_nc, - fixup_a64_movw_dtprel_g0, - fixup_a64_movw_dtprel_g0_nc, - fixup_a64_add_dtprel_hi12, - fixup_a64_add_dtprel_lo12, - fixup_a64_add_dtprel_lo12_nc, - fixup_a64_ldst8_dtprel_lo12, - fixup_a64_ldst8_dtprel_lo12_nc, - fixup_a64_ldst16_dtprel_lo12, - fixup_a64_ldst16_dtprel_lo12_nc, - fixup_a64_ldst32_dtprel_lo12, - fixup_a64_ldst32_dtprel_lo12_nc, - fixup_a64_ldst64_dtprel_lo12, - fixup_a64_ldst64_dtprel_lo12_nc, - - // Produce the GOT entry containing a variable's address in TLS's - // initial-exec mode. - fixup_a64_movw_gottprel_g1, - fixup_a64_movw_gottprel_g0_nc, - fixup_a64_adr_gottprel_page, - fixup_a64_ld64_gottprel_lo12_nc, - fixup_a64_ld_gottprel_prel19, - - // Produce offsets relative to the thread pointer: TPIDR_EL0. - fixup_a64_movw_tprel_g2, - fixup_a64_movw_tprel_g1, - fixup_a64_movw_tprel_g1_nc, - fixup_a64_movw_tprel_g0, - fixup_a64_movw_tprel_g0_nc, - fixup_a64_add_tprel_hi12, - fixup_a64_add_tprel_lo12, - fixup_a64_add_tprel_lo12_nc, - fixup_a64_ldst8_tprel_lo12, - fixup_a64_ldst8_tprel_lo12_nc, - fixup_a64_ldst16_tprel_lo12, - fixup_a64_ldst16_tprel_lo12_nc, - fixup_a64_ldst32_tprel_lo12, - fixup_a64_ldst32_tprel_lo12_nc, - fixup_a64_ldst64_tprel_lo12, - fixup_a64_ldst64_tprel_lo12_nc, - - // Produce the special fixups used by the general-dynamic TLS model. - fixup_a64_tlsdesc_adr_page, - fixup_a64_tlsdesc_ld64_lo12_nc, - fixup_a64_tlsdesc_add_lo12_nc, - fixup_a64_tlsdesc_call, - - - // Marker - LastTargetFixupKind, - NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind - }; - } -} - -#endif diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp deleted file mode 100644 index b090a55eb99..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ /dev/null @@ -1,46 +0,0 @@ -//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the AArch64MCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "AArch64MCAsmInfo.h" -#include "llvm/ADT/Triple.h" - -using namespace llvm; - -AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::aarch64_be) - IsLittleEndian = false; - - PointerSize = 8; - - // ".comm align is in bytes but .align is pow-2." - AlignmentIsInBytes = false; - - CommentString = "//"; - Code32Directive = ".code\t32"; - - Data16bitsDirective = "\t.hword\t"; - Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = "\t.xword\t"; - - HasLEB128 = true; - SupportsDebugInformation = true; - - // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfCFI; - - UseIntegratedAssembler = true; -} - -// Pin the vtable to this file. -void AArch64ELFMCAsmInfo::anchor() {} diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h deleted file mode 100644 index 78fd5d5b4fe..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ /dev/null @@ -1,29 +0,0 @@ -//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the AArch64MCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64TARGETASMINFO_H -#define LLVM_AARCH64TARGETASMINFO_H - -#include "llvm/MC/MCAsmInfoELF.h" - -namespace llvm { - -struct AArch64ELFMCAsmInfo : public MCAsmInfoELF { - explicit AArch64ELFMCAsmInfo(StringRef TT); -private: - void anchor() override; -}; - -} // namespace llvm - -#endif diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp deleted file mode 100644 index 7ff46d71df9..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ /dev/null @@ -1,613 +0,0 @@ -//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AArch64MCCodeEmitter class. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AArch64FixupKinds.h" -#include "MCTargetDesc/AArch64MCExpr.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "mccodeemitter" - -namespace { -class AArch64MCCodeEmitter : public MCCodeEmitter { - AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; - void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; - MCContext &Ctx; - -public: - AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {} - - ~AArch64MCCodeEmitter() {} - - unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - template<int MemSize> - unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return getOffsetUImm12OpValue(MI, OpIdx, Fixups, STI, MemSize); - } - - unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, - int MemSize) const; - - unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned getShiftRightImm8(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm16(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm32(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm64(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - // Labels are handled mostly the same way: a symbol is needed, and - // just gets some fixup attached. - template<AArch64::Fixups fixupDesired> - unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - - unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - - unsigned getAddressWithFixup(const MCOperand &MO, - unsigned FixupKind, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - - // getBinaryCodeForInstr - TableGen'erated function for getting the - // binary encoding for an instruction. - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - - void EmitByte(unsigned char C, raw_ostream &OS) const { - OS << (char)C; - } - - void EmitInstruction(uint32_t Val, raw_ostream &OS) const { - // Output the constant in little endian byte order. - for (unsigned i = 0; i != 4; ++i) { - EmitByte(Val & 0xff, OS); - Val >>= 8; - } - } - - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const override; - - template<int hasRs, int hasRt2> unsigned - fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - -}; - -} // end anonymous namespace - -unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO, - unsigned FixupKind, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - if (!MO.isExpr()) { - // This can occur for manually decoded or constructed MCInsts, but neither - // the assembly-parser nor instruction selection will currently produce an - // MCInst that's not a symbol reference. - assert(MO.isImm() && "Unexpected address requested"); - return MO.getImm(); - } - - const MCExpr *Expr = MO.getExpr(); - MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); - - return 0; -} - -unsigned AArch64MCCodeEmitter:: -getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, - int MemSize) const { - const MCOperand &ImmOp = MI.getOperand(OpIdx); - if (ImmOp.isImm()) - return ImmOp.getImm(); - - assert(ImmOp.isExpr() && "Unexpected operand type"); - const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr()); - unsigned FixupKind; - - - switch (Expr->getKind()) { - default: llvm_unreachable("Unexpected operand modifier"); - case AArch64MCExpr::VK_AARCH64_LO12: { - static const unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, - AArch64::fixup_a64_ldst16_lo12, - AArch64::fixup_a64_ldst32_lo12, - AArch64::fixup_a64_ldst64_lo12, - AArch64::fixup_a64_ldst128_lo12 }; - assert(MemSize <= 16 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_GOT_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; - break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_dtprel_lo12, - AArch64::fixup_a64_ldst16_dtprel_lo12, - AArch64::fixup_a64_ldst32_dtprel_lo12, - AArch64::fixup_a64_ldst64_dtprel_lo12 - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_dtprel_lo12_nc, - AArch64::fixup_a64_ldst16_dtprel_lo12_nc, - AArch64::fixup_a64_ldst32_dtprel_lo12_nc, - AArch64::fixup_a64_ldst64_dtprel_lo12_nc - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; - break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_tprel_lo12, - AArch64::fixup_a64_ldst16_tprel_lo12, - AArch64::fixup_a64_ldst32_tprel_lo12, - AArch64::fixup_a64_ldst64_tprel_lo12 - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_tprel_lo12_nc, - AArch64::fixup_a64_ldst16_tprel_lo12_nc, - AArch64::fixup_a64_ldst32_tprel_lo12_nc, - AArch64::fixup_a64_ldst64_tprel_lo12_nc - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc; - break; - } - - return getAddressWithFixup(ImmOp, FixupKind, Fixups, STI); -} - -unsigned -AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); - - assert(MO.isExpr()); - - unsigned FixupKind = 0; - switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) { - default: llvm_unreachable("Invalid expression modifier"); - case AArch64MCExpr::VK_AARCH64_LO12: - FixupKind = AArch64::fixup_a64_add_lo12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_HI12: - FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: - FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: - FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_HI12: - FixupKind = AArch64::fixup_a64_add_tprel_hi12; break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12: - FixupKind = AArch64::fixup_a64_add_tprel_lo12; break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: - FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break; - case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: - FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break; - } - - return getAddressWithFixup(MO, FixupKind, Fixups, STI); -} - -unsigned -AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - - const MCOperand &MO = MI.getOperand(OpIdx); - if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); - - assert(MO.isExpr()); - - unsigned Modifier = AArch64MCExpr::VK_AARCH64_None; - if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr())) - Modifier = Expr->getKind(); - - unsigned FixupKind = 0; - switch(Modifier) { - case AArch64MCExpr::VK_AARCH64_None: - FixupKind = AArch64::fixup_a64_adr_prel_page; - break; - case AArch64MCExpr::VK_AARCH64_GOT: - FixupKind = AArch64::fixup_a64_adr_prel_got_page; - break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL: - FixupKind = AArch64::fixup_a64_adr_gottprel_page; - break; - case AArch64MCExpr::VK_AARCH64_TLSDESC: - FixupKind = AArch64::fixup_a64_tlsdesc_adr_page; - break; - default: - llvm_unreachable("Unknown symbol reference kind for ADRP instruction"); - } - - return getAddressWithFixup(MO, FixupKind, Fixups, STI); -} - -unsigned -AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Only immediate expected for shift"); - - return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6; -} - -unsigned -AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Only immediate expected for shift"); - - return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; -} - -unsigned AArch64MCCodeEmitter::getShiftRightImm8( - const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return 8 - MI.getOperand(Op).getImm(); -} - -unsigned AArch64MCCodeEmitter::getShiftRightImm16( - const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return 16 - MI.getOperand(Op).getImm(); -} - -unsigned AArch64MCCodeEmitter::getShiftRightImm32( - const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return 32 - MI.getOperand(Op).getImm(); -} - -unsigned AArch64MCCodeEmitter::getShiftRightImm64( - const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return 64 - MI.getOperand(Op).getImm(); -} - -unsigned AArch64MCCodeEmitter::getShiftLeftImm8( - const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 8; -} - -unsigned AArch64MCCodeEmitter::getShiftLeftImm16( - const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 16; -} - -unsigned AArch64MCCodeEmitter::getShiftLeftImm32( - const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 32; -} - -unsigned AArch64MCCodeEmitter::getShiftLeftImm64( - const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 64; -} - -template<AArch64::Fixups fixupDesired> unsigned -AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, - unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - if (MO.isExpr()) - return getAddressWithFixup(MO, fixupDesired, Fixups, STI); - - assert(MO.isImm()); - return MO.getImm(); -} - -unsigned -AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI, - unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - if (MO.isImm()) - return MO.getImm(); - - assert(MO.isExpr()); - - unsigned FixupKind; - if (isa<AArch64MCExpr>(MO.getExpr())) { - assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind() - == AArch64MCExpr::VK_AARCH64_GOTTPREL - && "Invalid symbol modifier for literal load"); - FixupKind = AArch64::fixup_a64_ld_gottprel_prel19; - } else { - FixupKind = AArch64::fixup_a64_ld_prel; - } - - return getAddressWithFixup(MO, FixupKind, Fixups, STI); -} - - -unsigned -AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, - const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - if (MO.isReg()) { - return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); - } else if (MO.isImm()) { - return static_cast<unsigned>(MO.getImm()); - } - - llvm_unreachable("Unable to encode MCOperand!"); - return 0; -} - -unsigned -AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &UImm16MO = MI.getOperand(OpIdx); - const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1); - - unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16; - - if (UImm16MO.isImm()) { - Result |= UImm16MO.getImm(); - return Result; - } - - const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr()); - AArch64::Fixups requestedFixup; - switch (A64E->getKind()) { - default: llvm_unreachable("unexpected expression modifier"); - case AArch64MCExpr::VK_AARCH64_ABS_G0: - requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break; - case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G1: - requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break; - case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G2: - requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break; - case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G3: - requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break; - case AArch64MCExpr::VK_AARCH64_SABS_G0: - requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break; - case AArch64MCExpr::VK_AARCH64_SABS_G1: - requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break; - case AArch64MCExpr::VK_AARCH64_SABS_G2: - requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G0: - requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break; - } - - return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups, STI); -} - -template<int hasRs, int hasRt2> unsigned -AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, - unsigned EncodedValue, - const MCSubtargetInfo &STI) const { - if (!hasRs) EncodedValue |= 0x001F0000; - if (!hasRt2) EncodedValue |= 0x00007C00; - - return EncodedValue; -} - -unsigned -AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const { - // If one of the signed fixup kinds is applied to a MOVZ instruction, the - // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's - // job to ensure that any bits possibly affected by this are 0. This means we - // must zero out bit 30 (essentially emitting a MOVN). - MCOperand UImm16MO = MI.getOperand(1); - - // Nothing to do if there's no fixup. - if (UImm16MO.isImm()) - return EncodedValue; - - const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr()); - switch (A64E->getKind()) { - case AArch64MCExpr::VK_AARCH64_SABS_G0: - case AArch64MCExpr::VK_AARCH64_SABS_G1: - case AArch64MCExpr::VK_AARCH64_SABS_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G0: - return EncodedValue & ~(1u << 30); - default: - // Nothing to do for an unsigned fixup. - return EncodedValue; - } - - llvm_unreachable("Should have returned by now"); -} - -unsigned -AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI, - unsigned EncodedValue, - const MCSubtargetInfo &STI) const { - // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 - // (i.e. all bits 1) but is ignored by the processor. - EncodedValue |= 0x1f << 10; - return EncodedValue; -} - -MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new AArch64MCCodeEmitter(Ctx); -} - -void AArch64MCCodeEmitter:: -EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - if (MI.getOpcode() == AArch64::TLSDESCCALL) { - // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the - // following (BLR) instruction. It doesn't emit any code itself so it - // doesn't go through the normal TableGenerated channels. - MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call); - const MCExpr *Expr; - Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx); - Fixups.push_back(MCFixup::Create(0, Expr, Fixup)); - return; - } - - uint32_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - - EmitInstruction(Binary, OS); -} - - -#include "AArch64GenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp deleted file mode 100644 index 7aef9c57bf3..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ /dev/null @@ -1,179 +0,0 @@ -//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the assembly expression modifiers -// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...). -// -//===----------------------------------------------------------------------===// - -#include "AArch64MCExpr.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/Object/ELF.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64mcexpr" - -const AArch64MCExpr* -AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr, - MCContext &Ctx) { - return new (Ctx) AArch64MCExpr(Kind, Expr); -} - -void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { - switch (Kind) { - default: llvm_unreachable("Invalid kind!"); - case VK_AARCH64_GOT: OS << ":got:"; break; - case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break; - case VK_AARCH64_LO12: OS << ":lo12:"; break; - case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break; - case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break; - case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break; - case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break; - case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break; - case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break; - case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break; - case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break; - case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break; - case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break; - case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break; - case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break; - case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break; - case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break; - case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break; - case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break; - case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break; - case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break; - case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break; - case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break; - case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break; - case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break; - case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break; - case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break; - case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break; - case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break; - case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break; - case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break; - case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break; - case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break; - case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break; - case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break; - - } - - const MCExpr *Expr = getSubExpr(); - if (Expr->getKind() != MCExpr::SymbolRef) - OS << '('; - Expr->print(OS); - if (Expr->getKind() != MCExpr::SymbolRef) - OS << ')'; -} - -bool -AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - return getSubExpr()->EvaluateAsRelocatable(Res, Layout); -} - -static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { - switch (Expr->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expression"); - break; - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr); - fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); - fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: { - // We're known to be under a TLS fixup, so any symbol should be - // modified. There should be only one. - const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); - MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); - break; - } - - case MCExpr::Unary: - fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm); - break; - } -} - -void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { - switch (getKind()) { - default: - return; - case VK_AARCH64_DTPREL_G2: - case VK_AARCH64_DTPREL_G1: - case VK_AARCH64_DTPREL_G1_NC: - case VK_AARCH64_DTPREL_G0: - case VK_AARCH64_DTPREL_G0_NC: - case VK_AARCH64_DTPREL_HI12: - case VK_AARCH64_DTPREL_LO12: - case VK_AARCH64_DTPREL_LO12_NC: - case VK_AARCH64_GOTTPREL_G1: - case VK_AARCH64_GOTTPREL_G0_NC: - case VK_AARCH64_GOTTPREL: - case VK_AARCH64_GOTTPREL_LO12: - case VK_AARCH64_TPREL_G2: - case VK_AARCH64_TPREL_G1: - case VK_AARCH64_TPREL_G1_NC: - case VK_AARCH64_TPREL_G0: - case VK_AARCH64_TPREL_G0_NC: - case VK_AARCH64_TPREL_HI12: - case VK_AARCH64_TPREL_LO12: - case VK_AARCH64_TPREL_LO12_NC: - case VK_AARCH64_TLSDESC: - case VK_AARCH64_TLSDESC_LO12: - break; - } - - fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); -} - -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -// FIXME: really do above: now that two backends are using it. -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); -} diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h deleted file mode 100644 index 23128fefb0b..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ /dev/null @@ -1,187 +0,0 @@ -//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes AArch64-specific MCExprs, used for modifiers like -// ":lo12:" or ":gottprel_g1:". -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64MCEXPR_H -#define LLVM_AARCH64MCEXPR_H - -#include "llvm/MC/MCExpr.h" - -namespace llvm { - -class AArch64MCExpr : public MCTargetExpr { -public: - enum VariantKind { - VK_AARCH64_None, - VK_AARCH64_GOT, // :got: modifier in assembly - VK_AARCH64_GOT_LO12, // :got_lo12: - VK_AARCH64_LO12, // :lo12: - - VK_AARCH64_ABS_G0, // :abs_g0: - VK_AARCH64_ABS_G0_NC, // :abs_g0_nc: - VK_AARCH64_ABS_G1, - VK_AARCH64_ABS_G1_NC, - VK_AARCH64_ABS_G2, - VK_AARCH64_ABS_G2_NC, - VK_AARCH64_ABS_G3, - - VK_AARCH64_SABS_G0, // :abs_g0_s: - VK_AARCH64_SABS_G1, - VK_AARCH64_SABS_G2, - - VK_AARCH64_DTPREL_G2, // :dtprel_g2: - VK_AARCH64_DTPREL_G1, - VK_AARCH64_DTPREL_G1_NC, - VK_AARCH64_DTPREL_G0, - VK_AARCH64_DTPREL_G0_NC, - VK_AARCH64_DTPREL_HI12, - VK_AARCH64_DTPREL_LO12, - VK_AARCH64_DTPREL_LO12_NC, - - VK_AARCH64_GOTTPREL_G1, // :gottprel: - VK_AARCH64_GOTTPREL_G0_NC, - VK_AARCH64_GOTTPREL, - VK_AARCH64_GOTTPREL_LO12, - - VK_AARCH64_TPREL_G2, // :tprel: - VK_AARCH64_TPREL_G1, - VK_AARCH64_TPREL_G1_NC, - VK_AARCH64_TPREL_G0, - VK_AARCH64_TPREL_G0_NC, - VK_AARCH64_TPREL_HI12, - VK_AARCH64_TPREL_LO12, - VK_AARCH64_TPREL_LO12_NC, - - VK_AARCH64_TLSDESC, // :tlsdesc: - VK_AARCH64_TLSDESC_LO12 - }; - -private: - const VariantKind Kind; - const MCExpr *Expr; - - explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr) - : Kind(_Kind), Expr(_Expr) {} - -public: - /// @name Construction - /// @{ - - static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr, - MCContext &Ctx); - - static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_AARCH64_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_AARCH64_GOT, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOT_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx); - } - - static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOTTPREL, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TLSDESC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TPREL_G1, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G3, Expr, Ctx); - } - - static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx); - } - - /// @} - /// @name Accessors - /// @{ - - /// getOpcode - Get the kind of this expression. - VariantKind getKind() const { return Kind; } - - /// getSubExpr - Get the child of this expression. - const MCExpr *getSubExpr() const { return Expr; } - - /// @} - - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const override; - void AddValueSymbols(MCAssembler *) const override; - const MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); - } - - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; - - static bool classof(const MCExpr *E) { - return E->getKind() == MCExpr::Target; - } - - static bool classof(const AArch64MCExpr *) { return true; } - -}; -} // end namespace llvm - -#endif diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp deleted file mode 100644 index 599949c0435..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ /dev/null @@ -1,221 +0,0 @@ -//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides AArch64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "AArch64MCTargetDesc.h" -#include "AArch64ELFStreamer.h" -#include "AArch64MCAsmInfo.h" -#include "InstPrinter/AArch64InstPrinter.h" -#include "llvm/ADT/APInt.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -#define GET_REGINFO_MC_DESC -#include "AArch64GenRegisterInfo.inc" - -#define GET_INSTRINFO_MC_DESC -#include "AArch64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "AArch64GenSubtargetInfo.inc" - -MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, - StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitAArch64MCSubtargetInfo(X, TT, CPU, FS); - return X; -} - - -static MCInstrInfo *createAArch64MCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitAArch64MCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitAArch64MCRegisterInfo(X, AArch64::X30); - return X; -} - -static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { - Triple TheTriple(TT); - - MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(TT); - unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); - MAI->addInitialFrameState(Inst); - - return MAI; -} - -static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) { - // On ELF platforms the default static relocation model has a smart enough - // linker to cope with referencing external symbols defined in a shared - // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. - RM = Reloc::Static; - } - - if (CM == CodeModel::Default) - CM = CodeModel::Small; - else if (CM == CodeModel::JITDefault) { - // The default MCJIT memory managers make no guarantees about where they can - // find an executable page; JITed code needs to be able to refer to globals - // no matter how far away they are. - CM = CodeModel::Large; - } - - X->InitMCCodeGenInfo(RM, CM, OL); - return X; -} - -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); -} - - -static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - if (SyntaxVariant == 0) - return new AArch64InstPrinter(MAI, MII, MRI, STI); - return nullptr; -} - -namespace { - -class AArch64MCInstrAnalysis : public MCInstrAnalysis { -public: - AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - - bool isUnconditionalBranch(const MCInst &Inst) const override { - if (Inst.getOpcode() == AArch64::Bcc - && Inst.getOperand(0).getImm() == A64CC::AL) - return true; - return MCInstrAnalysis::isUnconditionalBranch(Inst); - } - - bool isConditionalBranch(const MCInst &Inst) const override { - if (Inst.getOpcode() == AArch64::Bcc - && Inst.getOperand(0).getImm() == A64CC::AL) - return false; - return MCInstrAnalysis::isConditionalBranch(Inst); - } - - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size, uint64_t &Target) const override { - unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; - // FIXME: We only handle PCRel branches for now. - if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType - != MCOI::OPERAND_PCREL) - return false; - - int64_t Imm = Inst.getOperand(LblOperand).getImm(); - Target = Addr + Imm; - return true; - } -}; - -} - -static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) { - return new AArch64MCInstrAnalysis(Info); -} - - - -extern "C" void LLVMInitializeAArch64TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn A(TheAArch64leTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn B(TheAArch64beTarget, createAArch64MCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, - createAArch64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, - createAArch64MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, - createAArch64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, - createAArch64MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, - createAArch64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, - createAArch64MCRegisterInfo); - - // Register the MC subtarget info. - using AArch64_MC::createAArch64MCSubtargetInfo; - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, - createAArch64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, - createAArch64MCSubtargetInfo); - - // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(TheAArch64leTarget, - createAArch64MCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheAArch64beTarget, - createAArch64MCInstrAnalysis); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, - createAArch64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, - createAArch64MCCodeEmitter); - - // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, - createAArch64leAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, - createAArch64beAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, - createMCStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, - createAArch64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, - createAArch64MCInstPrinter); -} diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h deleted file mode 100644 index bd8beaf16b0..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ /dev/null @@ -1,72 +0,0 @@ -//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides AArch64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64MCTARGETDESC_H -#define LLVM_AARCH64MCTARGETDESC_H - -#include "llvm/Support/DataTypes.h" - -namespace llvm { -class MCAsmBackend; -class MCCodeEmitter; -class MCContext; -class MCInstrInfo; -class MCObjectWriter; -class MCRegisterInfo; -class MCSubtargetInfo; -class StringRef; -class Target; -class raw_ostream; - -extern Target TheAArch64leTarget; -extern Target TheAArch64beTarget; - -namespace AArch64_MC { - MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS); -} - -MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); - -MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, - bool IsLittleEndian); - -MCAsmBackend *createAArch64leAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); - -MCAsmBackend *createAArch64beAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); - -} // End llvm namespace - -// Defines symbolic names for AArch64 registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "AArch64GenRegisterInfo.inc" - -// Defines symbolic names for the AArch64 instructions. -// -#define GET_INSTRINFO_ENUM -#include "AArch64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "AArch64GenSubtargetInfo.inc" - -#endif diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 54c4465b60d..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_llvm_library(LLVMAArch64Desc - AArch64AsmBackend.cpp - AArch64ELFObjectWriter.cpp - AArch64ELFStreamer.cpp - AArch64MCAsmInfo.cpp - AArch64MCCodeEmitter.cpp - AArch64MCExpr.cpp - AArch64MCTargetDesc.cpp - ) diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/llvm/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index 37c8035a49f..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64Desc -parent = AArch64 -required_libraries = AArch64AsmPrinter AArch64Info MC Support -add_to_library_groups = AArch64 - diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/Makefile b/llvm/lib/Target/AArch64/MCTargetDesc/Makefile deleted file mode 100644 index 5779ac5ac60..00000000000 --- a/llvm/lib/Target/AArch64/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64Desc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/AArch64/Makefile b/llvm/lib/Target/AArch64/Makefile deleted file mode 100644 index 641bb83c477..00000000000 --- a/llvm/lib/Target/AArch64/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMAArch64CodeGen -TARGET = AArch64 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = AArch64GenAsmMatcher.inc \ - AArch64GenAsmWriter.inc \ - AArch64GenCallingConv.inc \ - AArch64GenDAGISel.inc \ - AArch64GenDisassemblerTables.inc \ - AArch64GenInstrInfo.inc \ - AArch64GenMCCodeEmitter.inc \ - AArch64GenMCPseudoLowering.inc \ - AArch64GenRegisterInfo.inc \ - AArch64GenSubtargetInfo.inc - -DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils - -include $(LEVEL)/Makefile.common - - diff --git a/llvm/lib/Target/AArch64/README.txt b/llvm/lib/Target/AArch64/README.txt deleted file mode 100644 index 601990f17de..00000000000 --- a/llvm/lib/Target/AArch64/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -This file will contain changes that need to be made before AArch64 can become an -officially supported target. Currently a placeholder. diff --git a/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp deleted file mode 100644 index 9281e4e1d93..00000000000 --- a/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the key registration step for the architecture. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheAArch64leTarget; -Target llvm::TheAArch64beTarget; - -extern "C" void LLVMInitializeAArch64TargetInfo() { - RegisterTarget<Triple::aarch64, /*HasJIT=*/true> - X(TheAArch64leTarget, "aarch64", "AArch64 (ARM 64-bit little endian target)"); - RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> - Y(TheAArch64beTarget, "aarch64_be", "AArch64 (ARM 64-bit big endian target)"); -} diff --git a/llvm/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/llvm/lib/Target/AArch64/TargetInfo/CMakeLists.txt deleted file mode 100644 index ee734c64726..00000000000 --- a/llvm/lib/Target/AArch64/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64Info - AArch64TargetInfo.cpp - ) diff --git a/llvm/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/llvm/lib/Target/AArch64/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 64291723981..00000000000 --- a/llvm/lib/Target/AArch64/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64Info -parent = AArch64 -required_libraries = Support -add_to_library_groups = AArch64 diff --git a/llvm/lib/Target/AArch64/TargetInfo/Makefile b/llvm/lib/Target/AArch64/TargetInfo/Makefile deleted file mode 100644 index 9dc9aa4bccf..00000000000 --- a/llvm/lib/Target/AArch64/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64Info - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp deleted file mode 100644 index 2a97cd63256..00000000000 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ /dev/null @@ -1,1173 +0,0 @@ -//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides basic encoding and assembly information for AArch64. -// -//===----------------------------------------------------------------------===// -#include "AArch64BaseInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Regex.h" - -using namespace llvm; - -StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Value == Value) { - Valid = true; - return Pairs[i].Name; - } - } - - Valid = false; - return StringRef(); -} - -uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { - std::string LowerCaseName = Name.lower(); - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Name == LowerCaseName) { - Valid = true; - return Pairs[i].Value; - } - } - - Valid = false; - return -1; -} - -bool NamedImmMapper::validImm(uint32_t Value) const { - return Value < TooBigImm; -} - -const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { - {"s1e1r", S1E1R}, - {"s1e2r", S1E2R}, - {"s1e3r", S1E3R}, - {"s1e1w", S1E1W}, - {"s1e2w", S1E2W}, - {"s1e3w", S1E3W}, - {"s1e0r", S1E0R}, - {"s1e0w", S1E0W}, - {"s12e1r", S12E1R}, - {"s12e1w", S12E1W}, - {"s12e0r", S12E0R}, - {"s12e0w", S12E0W}, -}; - -A64AT::ATMapper::ATMapper() - : NamedImmMapper(ATPairs, 0) {} - -const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { - {"oshld", OSHLD}, - {"oshst", OSHST}, - {"osh", OSH}, - {"nshld", NSHLD}, - {"nshst", NSHST}, - {"nsh", NSH}, - {"ishld", ISHLD}, - {"ishst", ISHST}, - {"ish", ISH}, - {"ld", LD}, - {"st", ST}, - {"sy", SY} -}; - -A64DB::DBarrierMapper::DBarrierMapper() - : NamedImmMapper(DBarrierPairs, 16u) {} - -const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { - {"zva", ZVA}, - {"ivac", IVAC}, - {"isw", ISW}, - {"cvac", CVAC}, - {"csw", CSW}, - {"cvau", CVAU}, - {"civac", CIVAC}, - {"cisw", CISW} -}; - -A64DC::DCMapper::DCMapper() - : NamedImmMapper(DCPairs, 0) {} - -const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { - {"ialluis", IALLUIS}, - {"iallu", IALLU}, - {"ivau", IVAU} -}; - -A64IC::ICMapper::ICMapper() - : NamedImmMapper(ICPairs, 0) {} - -const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { - {"sy", SY}, -}; - -A64ISB::ISBMapper::ISBMapper() - : NamedImmMapper(ISBPairs, 16) {} - -const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { - {"pldl1keep", PLDL1KEEP}, - {"pldl1strm", PLDL1STRM}, - {"pldl2keep", PLDL2KEEP}, - {"pldl2strm", PLDL2STRM}, - {"pldl3keep", PLDL3KEEP}, - {"pldl3strm", PLDL3STRM}, - {"plil1keep", PLIL1KEEP}, - {"plil1strm", PLIL1STRM}, - {"plil2keep", PLIL2KEEP}, - {"plil2strm", PLIL2STRM}, - {"plil3keep", PLIL3KEEP}, - {"plil3strm", PLIL3STRM}, - {"pstl1keep", PSTL1KEEP}, - {"pstl1strm", PSTL1STRM}, - {"pstl2keep", PSTL2KEEP}, - {"pstl2strm", PSTL2STRM}, - {"pstl3keep", PSTL3KEEP}, - {"pstl3strm", PSTL3STRM} -}; - -A64PRFM::PRFMMapper::PRFMMapper() - : NamedImmMapper(PRFMPairs, 32) {} - -const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { - {"spsel", SPSel}, - {"daifset", DAIFSet}, - {"daifclr", DAIFClr} -}; - -A64PState::PStateMapper::PStateMapper() - : NamedImmMapper(PStatePairs, 0) {} - -const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { - {"mdccsr_el0", MDCCSR_EL0}, - {"dbgdtrrx_el0", DBGDTRRX_EL0}, - {"mdrar_el1", MDRAR_EL1}, - {"oslsr_el1", OSLSR_EL1}, - {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, - {"pmceid0_el0", PMCEID0_EL0}, - {"pmceid1_el0", PMCEID1_EL0}, - {"midr_el1", MIDR_EL1}, - {"ccsidr_el1", CCSIDR_EL1}, - {"clidr_el1", CLIDR_EL1}, - {"ctr_el0", CTR_EL0}, - {"mpidr_el1", MPIDR_EL1}, - {"revidr_el1", REVIDR_EL1}, - {"aidr_el1", AIDR_EL1}, - {"dczid_el0", DCZID_EL0}, - {"id_pfr0_el1", ID_PFR0_EL1}, - {"id_pfr1_el1", ID_PFR1_EL1}, - {"id_dfr0_el1", ID_DFR0_EL1}, - {"id_afr0_el1", ID_AFR0_EL1}, - {"id_mmfr0_el1", ID_MMFR0_EL1}, - {"id_mmfr1_el1", ID_MMFR1_EL1}, - {"id_mmfr2_el1", ID_MMFR2_EL1}, - {"id_mmfr3_el1", ID_MMFR3_EL1}, - {"id_isar0_el1", ID_ISAR0_EL1}, - {"id_isar1_el1", ID_ISAR1_EL1}, - {"id_isar2_el1", ID_ISAR2_EL1}, - {"id_isar3_el1", ID_ISAR3_EL1}, - {"id_isar4_el1", ID_ISAR4_EL1}, - {"id_isar5_el1", ID_ISAR5_EL1}, - {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, - {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, - {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, - {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, - {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, - {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, - {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, - {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, - {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, - {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, - {"mvfr0_el1", MVFR0_EL1}, - {"mvfr1_el1", MVFR1_EL1}, - {"mvfr2_el1", MVFR2_EL1}, - {"rvbar_el1", RVBAR_EL1}, - {"rvbar_el2", RVBAR_EL2}, - {"rvbar_el3", RVBAR_EL3}, - {"isr_el1", ISR_EL1}, - {"cntpct_el0", CNTPCT_EL0}, - {"cntvct_el0", CNTVCT_EL0}, - - // Trace registers - {"trcstatr", TRCSTATR}, - {"trcidr8", TRCIDR8}, - {"trcidr9", TRCIDR9}, - {"trcidr10", TRCIDR10}, - {"trcidr11", TRCIDR11}, - {"trcidr12", TRCIDR12}, - {"trcidr13", TRCIDR13}, - {"trcidr0", TRCIDR0}, - {"trcidr1", TRCIDR1}, - {"trcidr2", TRCIDR2}, - {"trcidr3", TRCIDR3}, - {"trcidr4", TRCIDR4}, - {"trcidr5", TRCIDR5}, - {"trcidr6", TRCIDR6}, - {"trcidr7", TRCIDR7}, - {"trcoslsr", TRCOSLSR}, - {"trcpdsr", TRCPDSR}, - {"trcdevaff0", TRCDEVAFF0}, - {"trcdevaff1", TRCDEVAFF1}, - {"trclsr", TRCLSR}, - {"trcauthstatus", TRCAUTHSTATUS}, - {"trcdevarch", TRCDEVARCH}, - {"trcdevid", TRCDEVID}, - {"trcdevtype", TRCDEVTYPE}, - {"trcpidr4", TRCPIDR4}, - {"trcpidr5", TRCPIDR5}, - {"trcpidr6", TRCPIDR6}, - {"trcpidr7", TRCPIDR7}, - {"trcpidr0", TRCPIDR0}, - {"trcpidr1", TRCPIDR1}, - {"trcpidr2", TRCPIDR2}, - {"trcpidr3", TRCPIDR3}, - {"trccidr0", TRCCIDR0}, - {"trccidr1", TRCCIDR1}, - {"trccidr2", TRCCIDR2}, - {"trccidr3", TRCCIDR3}, - - // GICv3 registers - {"icc_iar1_el1", ICC_IAR1_EL1}, - {"icc_iar0_el1", ICC_IAR0_EL1}, - {"icc_hppir1_el1", ICC_HPPIR1_EL1}, - {"icc_hppir0_el1", ICC_HPPIR0_EL1}, - {"icc_rpr_el1", ICC_RPR_EL1}, - {"ich_vtr_el2", ICH_VTR_EL2}, - {"ich_eisr_el2", ICH_EISR_EL2}, - {"ich_elsr_el2", ICH_ELSR_EL2} -}; - -A64SysReg::MRSMapper::MRSMapper() { - InstPairs = &MRSPairs[0]; - NumInstPairs = llvm::array_lengthof(MRSPairs); -} - -const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { - {"dbgdtrtx_el0", DBGDTRTX_EL0}, - {"oslar_el1", OSLAR_EL1}, - {"pmswinc_el0", PMSWINC_EL0}, - - // Trace registers - {"trcoslar", TRCOSLAR}, - {"trclar", TRCLAR}, - - // GICv3 registers - {"icc_eoir1_el1", ICC_EOIR1_EL1}, - {"icc_eoir0_el1", ICC_EOIR0_EL1}, - {"icc_dir_el1", ICC_DIR_EL1}, - {"icc_sgi1r_el1", ICC_SGI1R_EL1}, - {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, - {"icc_sgi0r_el1", ICC_SGI0R_EL1} -}; - -A64SysReg::MSRMapper::MSRMapper() { - InstPairs = &MSRPairs[0]; - NumInstPairs = llvm::array_lengthof(MSRPairs); -} - - -const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { - {"osdtrrx_el1", OSDTRRX_EL1}, - {"osdtrtx_el1", OSDTRTX_EL1}, - {"teecr32_el1", TEECR32_EL1}, - {"mdccint_el1", MDCCINT_EL1}, - {"mdscr_el1", MDSCR_EL1}, - {"dbgdtr_el0", DBGDTR_EL0}, - {"oseccr_el1", OSECCR_EL1}, - {"dbgvcr32_el2", DBGVCR32_EL2}, - {"dbgbvr0_el1", DBGBVR0_EL1}, - {"dbgbvr1_el1", DBGBVR1_EL1}, - {"dbgbvr2_el1", DBGBVR2_EL1}, - {"dbgbvr3_el1", DBGBVR3_EL1}, - {"dbgbvr4_el1", DBGBVR4_EL1}, - {"dbgbvr5_el1", DBGBVR5_EL1}, - {"dbgbvr6_el1", DBGBVR6_EL1}, - {"dbgbvr7_el1", DBGBVR7_EL1}, - {"dbgbvr8_el1", DBGBVR8_EL1}, - {"dbgbvr9_el1", DBGBVR9_EL1}, - {"dbgbvr10_el1", DBGBVR10_EL1}, - {"dbgbvr11_el1", DBGBVR11_EL1}, - {"dbgbvr12_el1", DBGBVR12_EL1}, - {"dbgbvr13_el1", DBGBVR13_EL1}, - {"dbgbvr14_el1", DBGBVR14_EL1}, - {"dbgbvr15_el1", DBGBVR15_EL1}, - {"dbgbcr0_el1", DBGBCR0_EL1}, - {"dbgbcr1_el1", DBGBCR1_EL1}, - {"dbgbcr2_el1", DBGBCR2_EL1}, - {"dbgbcr3_el1", DBGBCR3_EL1}, - {"dbgbcr4_el1", DBGBCR4_EL1}, - {"dbgbcr5_el1", DBGBCR5_EL1}, - {"dbgbcr6_el1", DBGBCR6_EL1}, - {"dbgbcr7_el1", DBGBCR7_EL1}, - {"dbgbcr8_el1", DBGBCR8_EL1}, - {"dbgbcr9_el1", DBGBCR9_EL1}, - {"dbgbcr10_el1", DBGBCR10_EL1}, - {"dbgbcr11_el1", DBGBCR11_EL1}, - {"dbgbcr12_el1", DBGBCR12_EL1}, - {"dbgbcr13_el1", DBGBCR13_EL1}, - {"dbgbcr14_el1", DBGBCR14_EL1}, - {"dbgbcr15_el1", DBGBCR15_EL1}, - {"dbgwvr0_el1", DBGWVR0_EL1}, - {"dbgwvr1_el1", DBGWVR1_EL1}, - {"dbgwvr2_el1", DBGWVR2_EL1}, - {"dbgwvr3_el1", DBGWVR3_EL1}, - {"dbgwvr4_el1", DBGWVR4_EL1}, - {"dbgwvr5_el1", DBGWVR5_EL1}, - {"dbgwvr6_el1", DBGWVR6_EL1}, - {"dbgwvr7_el1", DBGWVR7_EL1}, - {"dbgwvr8_el1", DBGWVR8_EL1}, - {"dbgwvr9_el1", DBGWVR9_EL1}, - {"dbgwvr10_el1", DBGWVR10_EL1}, - {"dbgwvr11_el1", DBGWVR11_EL1}, - {"dbgwvr12_el1", DBGWVR12_EL1}, - {"dbgwvr13_el1", DBGWVR13_EL1}, - {"dbgwvr14_el1", DBGWVR14_EL1}, - {"dbgwvr15_el1", DBGWVR15_EL1}, - {"dbgwcr0_el1", DBGWCR0_EL1}, - {"dbgwcr1_el1", DBGWCR1_EL1}, - {"dbgwcr2_el1", DBGWCR2_EL1}, - {"dbgwcr3_el1", DBGWCR3_EL1}, - {"dbgwcr4_el1", DBGWCR4_EL1}, - {"dbgwcr5_el1", DBGWCR5_EL1}, - {"dbgwcr6_el1", DBGWCR6_EL1}, - {"dbgwcr7_el1", DBGWCR7_EL1}, - {"dbgwcr8_el1", DBGWCR8_EL1}, - {"dbgwcr9_el1", DBGWCR9_EL1}, - {"dbgwcr10_el1", DBGWCR10_EL1}, - {"dbgwcr11_el1", DBGWCR11_EL1}, - {"dbgwcr12_el1", DBGWCR12_EL1}, - {"dbgwcr13_el1", DBGWCR13_EL1}, - {"dbgwcr14_el1", DBGWCR14_EL1}, - {"dbgwcr15_el1", DBGWCR15_EL1}, - {"teehbr32_el1", TEEHBR32_EL1}, - {"osdlr_el1", OSDLR_EL1}, - {"dbgprcr_el1", DBGPRCR_EL1}, - {"dbgclaimset_el1", DBGCLAIMSET_EL1}, - {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, - {"csselr_el1", CSSELR_EL1}, - {"vpidr_el2", VPIDR_EL2}, - {"vmpidr_el2", VMPIDR_EL2}, - {"sctlr_el1", SCTLR_EL1}, - {"sctlr_el2", SCTLR_EL2}, - {"sctlr_el3", SCTLR_EL3}, - {"actlr_el1", ACTLR_EL1}, - {"actlr_el2", ACTLR_EL2}, - {"actlr_el3", ACTLR_EL3}, - {"cpacr_el1", CPACR_EL1}, - {"hcr_el2", HCR_EL2}, - {"scr_el3", SCR_EL3}, - {"mdcr_el2", MDCR_EL2}, - {"sder32_el3", SDER32_EL3}, - {"cptr_el2", CPTR_EL2}, - {"cptr_el3", CPTR_EL3}, - {"hstr_el2", HSTR_EL2}, - {"hacr_el2", HACR_EL2}, - {"mdcr_el3", MDCR_EL3}, - {"ttbr0_el1", TTBR0_EL1}, - {"ttbr0_el2", TTBR0_EL2}, - {"ttbr0_el3", TTBR0_EL3}, - {"ttbr1_el1", TTBR1_EL1}, - {"tcr_el1", TCR_EL1}, - {"tcr_el2", TCR_EL2}, - {"tcr_el3", TCR_EL3}, - {"vttbr_el2", VTTBR_EL2}, - {"vtcr_el2", VTCR_EL2}, - {"dacr32_el2", DACR32_EL2}, - {"spsr_el1", SPSR_EL1}, - {"spsr_el2", SPSR_EL2}, - {"spsr_el3", SPSR_EL3}, - {"elr_el1", ELR_EL1}, - {"elr_el2", ELR_EL2}, - {"elr_el3", ELR_EL3}, - {"sp_el0", SP_EL0}, - {"sp_el1", SP_EL1}, - {"sp_el2", SP_EL2}, - {"spsel", SPSel}, - {"nzcv", NZCV}, - {"daif", DAIF}, - {"currentel", CurrentEL}, - {"spsr_irq", SPSR_irq}, - {"spsr_abt", SPSR_abt}, - {"spsr_und", SPSR_und}, - {"spsr_fiq", SPSR_fiq}, - {"fpcr", FPCR}, - {"fpsr", FPSR}, - {"dspsr_el0", DSPSR_EL0}, - {"dlr_el0", DLR_EL0}, - {"ifsr32_el2", IFSR32_EL2}, - {"afsr0_el1", AFSR0_EL1}, - {"afsr0_el2", AFSR0_EL2}, - {"afsr0_el3", AFSR0_EL3}, - {"afsr1_el1", AFSR1_EL1}, - {"afsr1_el2", AFSR1_EL2}, - {"afsr1_el3", AFSR1_EL3}, - {"esr_el1", ESR_EL1}, - {"esr_el2", ESR_EL2}, - {"esr_el3", ESR_EL3}, - {"fpexc32_el2", FPEXC32_EL2}, - {"far_el1", FAR_EL1}, - {"far_el2", FAR_EL2}, - {"far_el3", FAR_EL3}, - {"hpfar_el2", HPFAR_EL2}, - {"par_el1", PAR_EL1}, - {"pmcr_el0", PMCR_EL0}, - {"pmcntenset_el0", PMCNTENSET_EL0}, - {"pmcntenclr_el0", PMCNTENCLR_EL0}, - {"pmovsclr_el0", PMOVSCLR_EL0}, - {"pmselr_el0", PMSELR_EL0}, - {"pmccntr_el0", PMCCNTR_EL0}, - {"pmxevtyper_el0", PMXEVTYPER_EL0}, - {"pmxevcntr_el0", PMXEVCNTR_EL0}, - {"pmuserenr_el0", PMUSERENR_EL0}, - {"pmintenset_el1", PMINTENSET_EL1}, - {"pmintenclr_el1", PMINTENCLR_EL1}, - {"pmovsset_el0", PMOVSSET_EL0}, - {"mair_el1", MAIR_EL1}, - {"mair_el2", MAIR_EL2}, - {"mair_el3", MAIR_EL3}, - {"amair_el1", AMAIR_EL1}, - {"amair_el2", AMAIR_EL2}, - {"amair_el3", AMAIR_EL3}, - {"vbar_el1", VBAR_EL1}, - {"vbar_el2", VBAR_EL2}, - {"vbar_el3", VBAR_EL3}, - {"rmr_el1", RMR_EL1}, - {"rmr_el2", RMR_EL2}, - {"rmr_el3", RMR_EL3}, - {"contextidr_el1", CONTEXTIDR_EL1}, - {"tpidr_el0", TPIDR_EL0}, - {"tpidr_el2", TPIDR_EL2}, - {"tpidr_el3", TPIDR_EL3}, - {"tpidrro_el0", TPIDRRO_EL0}, - {"tpidr_el1", TPIDR_EL1}, - {"cntfrq_el0", CNTFRQ_EL0}, - {"cntvoff_el2", CNTVOFF_EL2}, - {"cntkctl_el1", CNTKCTL_EL1}, - {"cnthctl_el2", CNTHCTL_EL2}, - {"cntp_tval_el0", CNTP_TVAL_EL0}, - {"cnthp_tval_el2", CNTHP_TVAL_EL2}, - {"cntps_tval_el1", CNTPS_TVAL_EL1}, - {"cntp_ctl_el0", CNTP_CTL_EL0}, - {"cnthp_ctl_el2", CNTHP_CTL_EL2}, - {"cntps_ctl_el1", CNTPS_CTL_EL1}, - {"cntp_cval_el0", CNTP_CVAL_EL0}, - {"cnthp_cval_el2", CNTHP_CVAL_EL2}, - {"cntps_cval_el1", CNTPS_CVAL_EL1}, - {"cntv_tval_el0", CNTV_TVAL_EL0}, - {"cntv_ctl_el0", CNTV_CTL_EL0}, - {"cntv_cval_el0", CNTV_CVAL_EL0}, - {"pmevcntr0_el0", PMEVCNTR0_EL0}, - {"pmevcntr1_el0", PMEVCNTR1_EL0}, - {"pmevcntr2_el0", PMEVCNTR2_EL0}, - {"pmevcntr3_el0", PMEVCNTR3_EL0}, - {"pmevcntr4_el0", PMEVCNTR4_EL0}, - {"pmevcntr5_el0", PMEVCNTR5_EL0}, - {"pmevcntr6_el0", PMEVCNTR6_EL0}, - {"pmevcntr7_el0", PMEVCNTR7_EL0}, - {"pmevcntr8_el0", PMEVCNTR8_EL0}, - {"pmevcntr9_el0", PMEVCNTR9_EL0}, - {"pmevcntr10_el0", PMEVCNTR10_EL0}, - {"pmevcntr11_el0", PMEVCNTR11_EL0}, - {"pmevcntr12_el0", PMEVCNTR12_EL0}, - {"pmevcntr13_el0", PMEVCNTR13_EL0}, - {"pmevcntr14_el0", PMEVCNTR14_EL0}, - {"pmevcntr15_el0", PMEVCNTR15_EL0}, - {"pmevcntr16_el0", PMEVCNTR16_EL0}, - {"pmevcntr17_el0", PMEVCNTR17_EL0}, - {"pmevcntr18_el0", PMEVCNTR18_EL0}, - {"pmevcntr19_el0", PMEVCNTR19_EL0}, - {"pmevcntr20_el0", PMEVCNTR20_EL0}, - {"pmevcntr21_el0", PMEVCNTR21_EL0}, - {"pmevcntr22_el0", PMEVCNTR22_EL0}, - {"pmevcntr23_el0", PMEVCNTR23_EL0}, - {"pmevcntr24_el0", PMEVCNTR24_EL0}, - {"pmevcntr25_el0", PMEVCNTR25_EL0}, - {"pmevcntr26_el0", PMEVCNTR26_EL0}, - {"pmevcntr27_el0", PMEVCNTR27_EL0}, - {"pmevcntr28_el0", PMEVCNTR28_EL0}, - {"pmevcntr29_el0", PMEVCNTR29_EL0}, - {"pmevcntr30_el0", PMEVCNTR30_EL0}, - {"pmccfiltr_el0", PMCCFILTR_EL0}, - {"pmevtyper0_el0", PMEVTYPER0_EL0}, - {"pmevtyper1_el0", PMEVTYPER1_EL0}, - {"pmevtyper2_el0", PMEVTYPER2_EL0}, - {"pmevtyper3_el0", PMEVTYPER3_EL0}, - {"pmevtyper4_el0", PMEVTYPER4_EL0}, - {"pmevtyper5_el0", PMEVTYPER5_EL0}, - {"pmevtyper6_el0", PMEVTYPER6_EL0}, - {"pmevtyper7_el0", PMEVTYPER7_EL0}, - {"pmevtyper8_el0", PMEVTYPER8_EL0}, - {"pmevtyper9_el0", PMEVTYPER9_EL0}, - {"pmevtyper10_el0", PMEVTYPER10_EL0}, - {"pmevtyper11_el0", PMEVTYPER11_EL0}, - {"pmevtyper12_el0", PMEVTYPER12_EL0}, - {"pmevtyper13_el0", PMEVTYPER13_EL0}, - {"pmevtyper14_el0", PMEVTYPER14_EL0}, - {"pmevtyper15_el0", PMEVTYPER15_EL0}, - {"pmevtyper16_el0", PMEVTYPER16_EL0}, - {"pmevtyper17_el0", PMEVTYPER17_EL0}, - {"pmevtyper18_el0", PMEVTYPER18_EL0}, - {"pmevtyper19_el0", PMEVTYPER19_EL0}, - {"pmevtyper20_el0", PMEVTYPER20_EL0}, - {"pmevtyper21_el0", PMEVTYPER21_EL0}, - {"pmevtyper22_el0", PMEVTYPER22_EL0}, - {"pmevtyper23_el0", PMEVTYPER23_EL0}, - {"pmevtyper24_el0", PMEVTYPER24_EL0}, - {"pmevtyper25_el0", PMEVTYPER25_EL0}, - {"pmevtyper26_el0", PMEVTYPER26_EL0}, - {"pmevtyper27_el0", PMEVTYPER27_EL0}, - {"pmevtyper28_el0", PMEVTYPER28_EL0}, - {"pmevtyper29_el0", PMEVTYPER29_EL0}, - {"pmevtyper30_el0", PMEVTYPER30_EL0}, - - // Trace registers - {"trcprgctlr", TRCPRGCTLR}, - {"trcprocselr", TRCPROCSELR}, - {"trcconfigr", TRCCONFIGR}, - {"trcauxctlr", TRCAUXCTLR}, - {"trceventctl0r", TRCEVENTCTL0R}, - {"trceventctl1r", TRCEVENTCTL1R}, - {"trcstallctlr", TRCSTALLCTLR}, - {"trctsctlr", TRCTSCTLR}, - {"trcsyncpr", TRCSYNCPR}, - {"trcccctlr", TRCCCCTLR}, - {"trcbbctlr", TRCBBCTLR}, - {"trctraceidr", TRCTRACEIDR}, - {"trcqctlr", TRCQCTLR}, - {"trcvictlr", TRCVICTLR}, - {"trcviiectlr", TRCVIIECTLR}, - {"trcvissctlr", TRCVISSCTLR}, - {"trcvipcssctlr", TRCVIPCSSCTLR}, - {"trcvdctlr", TRCVDCTLR}, - {"trcvdsacctlr", TRCVDSACCTLR}, - {"trcvdarcctlr", TRCVDARCCTLR}, - {"trcseqevr0", TRCSEQEVR0}, - {"trcseqevr1", TRCSEQEVR1}, - {"trcseqevr2", TRCSEQEVR2}, - {"trcseqrstevr", TRCSEQRSTEVR}, - {"trcseqstr", TRCSEQSTR}, - {"trcextinselr", TRCEXTINSELR}, - {"trccntrldvr0", TRCCNTRLDVR0}, - {"trccntrldvr1", TRCCNTRLDVR1}, - {"trccntrldvr2", TRCCNTRLDVR2}, - {"trccntrldvr3", TRCCNTRLDVR3}, - {"trccntctlr0", TRCCNTCTLR0}, - {"trccntctlr1", TRCCNTCTLR1}, - {"trccntctlr2", TRCCNTCTLR2}, - {"trccntctlr3", TRCCNTCTLR3}, - {"trccntvr0", TRCCNTVR0}, - {"trccntvr1", TRCCNTVR1}, - {"trccntvr2", TRCCNTVR2}, - {"trccntvr3", TRCCNTVR3}, - {"trcimspec0", TRCIMSPEC0}, - {"trcimspec1", TRCIMSPEC1}, - {"trcimspec2", TRCIMSPEC2}, - {"trcimspec3", TRCIMSPEC3}, - {"trcimspec4", TRCIMSPEC4}, - {"trcimspec5", TRCIMSPEC5}, - {"trcimspec6", TRCIMSPEC6}, - {"trcimspec7", TRCIMSPEC7}, - {"trcrsctlr2", TRCRSCTLR2}, - {"trcrsctlr3", TRCRSCTLR3}, - {"trcrsctlr4", TRCRSCTLR4}, - {"trcrsctlr5", TRCRSCTLR5}, - {"trcrsctlr6", TRCRSCTLR6}, - {"trcrsctlr7", TRCRSCTLR7}, - {"trcrsctlr8", TRCRSCTLR8}, - {"trcrsctlr9", TRCRSCTLR9}, - {"trcrsctlr10", TRCRSCTLR10}, - {"trcrsctlr11", TRCRSCTLR11}, - {"trcrsctlr12", TRCRSCTLR12}, - {"trcrsctlr13", TRCRSCTLR13}, - {"trcrsctlr14", TRCRSCTLR14}, - {"trcrsctlr15", TRCRSCTLR15}, - {"trcrsctlr16", TRCRSCTLR16}, - {"trcrsctlr17", TRCRSCTLR17}, - {"trcrsctlr18", TRCRSCTLR18}, - {"trcrsctlr19", TRCRSCTLR19}, - {"trcrsctlr20", TRCRSCTLR20}, - {"trcrsctlr21", TRCRSCTLR21}, - {"trcrsctlr22", TRCRSCTLR22}, - {"trcrsctlr23", TRCRSCTLR23}, - {"trcrsctlr24", TRCRSCTLR24}, - {"trcrsctlr25", TRCRSCTLR25}, - {"trcrsctlr26", TRCRSCTLR26}, - {"trcrsctlr27", TRCRSCTLR27}, - {"trcrsctlr28", TRCRSCTLR28}, - {"trcrsctlr29", TRCRSCTLR29}, - {"trcrsctlr30", TRCRSCTLR30}, - {"trcrsctlr31", TRCRSCTLR31}, - {"trcssccr0", TRCSSCCR0}, - {"trcssccr1", TRCSSCCR1}, - {"trcssccr2", TRCSSCCR2}, - {"trcssccr3", TRCSSCCR3}, - {"trcssccr4", TRCSSCCR4}, - {"trcssccr5", TRCSSCCR5}, - {"trcssccr6", TRCSSCCR6}, - {"trcssccr7", TRCSSCCR7}, - {"trcsscsr0", TRCSSCSR0}, - {"trcsscsr1", TRCSSCSR1}, - {"trcsscsr2", TRCSSCSR2}, - {"trcsscsr3", TRCSSCSR3}, - {"trcsscsr4", TRCSSCSR4}, - {"trcsscsr5", TRCSSCSR5}, - {"trcsscsr6", TRCSSCSR6}, - {"trcsscsr7", TRCSSCSR7}, - {"trcsspcicr0", TRCSSPCICR0}, - {"trcsspcicr1", TRCSSPCICR1}, - {"trcsspcicr2", TRCSSPCICR2}, - {"trcsspcicr3", TRCSSPCICR3}, - {"trcsspcicr4", TRCSSPCICR4}, - {"trcsspcicr5", TRCSSPCICR5}, - {"trcsspcicr6", TRCSSPCICR6}, - {"trcsspcicr7", TRCSSPCICR7}, - {"trcpdcr", TRCPDCR}, - {"trcacvr0", TRCACVR0}, - {"trcacvr1", TRCACVR1}, - {"trcacvr2", TRCACVR2}, - {"trcacvr3", TRCACVR3}, - {"trcacvr4", TRCACVR4}, - {"trcacvr5", TRCACVR5}, - {"trcacvr6", TRCACVR6}, - {"trcacvr7", TRCACVR7}, - {"trcacvr8", TRCACVR8}, - {"trcacvr9", TRCACVR9}, - {"trcacvr10", TRCACVR10}, - {"trcacvr11", TRCACVR11}, - {"trcacvr12", TRCACVR12}, - {"trcacvr13", TRCACVR13}, - {"trcacvr14", TRCACVR14}, - {"trcacvr15", TRCACVR15}, - {"trcacatr0", TRCACATR0}, - {"trcacatr1", TRCACATR1}, - {"trcacatr2", TRCACATR2}, - {"trcacatr3", TRCACATR3}, - {"trcacatr4", TRCACATR4}, - {"trcacatr5", TRCACATR5}, - {"trcacatr6", TRCACATR6}, - {"trcacatr7", TRCACATR7}, - {"trcacatr8", TRCACATR8}, - {"trcacatr9", TRCACATR9}, - {"trcacatr10", TRCACATR10}, - {"trcacatr11", TRCACATR11}, - {"trcacatr12", TRCACATR12}, - {"trcacatr13", TRCACATR13}, - {"trcacatr14", TRCACATR14}, - {"trcacatr15", TRCACATR15}, - {"trcdvcvr0", TRCDVCVR0}, - {"trcdvcvr1", TRCDVCVR1}, - {"trcdvcvr2", TRCDVCVR2}, - {"trcdvcvr3", TRCDVCVR3}, - {"trcdvcvr4", TRCDVCVR4}, - {"trcdvcvr5", TRCDVCVR5}, - {"trcdvcvr6", TRCDVCVR6}, - {"trcdvcvr7", TRCDVCVR7}, - {"trcdvcmr0", TRCDVCMR0}, - {"trcdvcmr1", TRCDVCMR1}, - {"trcdvcmr2", TRCDVCMR2}, - {"trcdvcmr3", TRCDVCMR3}, - {"trcdvcmr4", TRCDVCMR4}, - {"trcdvcmr5", TRCDVCMR5}, - {"trcdvcmr6", TRCDVCMR6}, - {"trcdvcmr7", TRCDVCMR7}, - {"trccidcvr0", TRCCIDCVR0}, - {"trccidcvr1", TRCCIDCVR1}, - {"trccidcvr2", TRCCIDCVR2}, - {"trccidcvr3", TRCCIDCVR3}, - {"trccidcvr4", TRCCIDCVR4}, - {"trccidcvr5", TRCCIDCVR5}, - {"trccidcvr6", TRCCIDCVR6}, - {"trccidcvr7", TRCCIDCVR7}, - {"trcvmidcvr0", TRCVMIDCVR0}, - {"trcvmidcvr1", TRCVMIDCVR1}, - {"trcvmidcvr2", TRCVMIDCVR2}, - {"trcvmidcvr3", TRCVMIDCVR3}, - {"trcvmidcvr4", TRCVMIDCVR4}, - {"trcvmidcvr5", TRCVMIDCVR5}, - {"trcvmidcvr6", TRCVMIDCVR6}, - {"trcvmidcvr7", TRCVMIDCVR7}, - {"trccidcctlr0", TRCCIDCCTLR0}, - {"trccidcctlr1", TRCCIDCCTLR1}, - {"trcvmidcctlr0", TRCVMIDCCTLR0}, - {"trcvmidcctlr1", TRCVMIDCCTLR1}, - {"trcitctrl", TRCITCTRL}, - {"trcclaimset", TRCCLAIMSET}, - {"trcclaimclr", TRCCLAIMCLR}, - - // GICv3 registers - {"icc_bpr1_el1", ICC_BPR1_EL1}, - {"icc_bpr0_el1", ICC_BPR0_EL1}, - {"icc_pmr_el1", ICC_PMR_EL1}, - {"icc_ctlr_el1", ICC_CTLR_EL1}, - {"icc_ctlr_el3", ICC_CTLR_EL3}, - {"icc_sre_el1", ICC_SRE_EL1}, - {"icc_sre_el2", ICC_SRE_EL2}, - {"icc_sre_el3", ICC_SRE_EL3}, - {"icc_igrpen0_el1", ICC_IGRPEN0_EL1}, - {"icc_igrpen1_el1", ICC_IGRPEN1_EL1}, - {"icc_igrpen1_el3", ICC_IGRPEN1_EL3}, - {"icc_seien_el1", ICC_SEIEN_EL1}, - {"icc_ap0r0_el1", ICC_AP0R0_EL1}, - {"icc_ap0r1_el1", ICC_AP0R1_EL1}, - {"icc_ap0r2_el1", ICC_AP0R2_EL1}, - {"icc_ap0r3_el1", ICC_AP0R3_EL1}, - {"icc_ap1r0_el1", ICC_AP1R0_EL1}, - {"icc_ap1r1_el1", ICC_AP1R1_EL1}, - {"icc_ap1r2_el1", ICC_AP1R2_EL1}, - {"icc_ap1r3_el1", ICC_AP1R3_EL1}, - {"ich_ap0r0_el2", ICH_AP0R0_EL2}, - {"ich_ap0r1_el2", ICH_AP0R1_EL2}, - {"ich_ap0r2_el2", ICH_AP0R2_EL2}, - {"ich_ap0r3_el2", ICH_AP0R3_EL2}, - {"ich_ap1r0_el2", ICH_AP1R0_EL2}, - {"ich_ap1r1_el2", ICH_AP1R1_EL2}, - {"ich_ap1r2_el2", ICH_AP1R2_EL2}, - {"ich_ap1r3_el2", ICH_AP1R3_EL2}, - {"ich_hcr_el2", ICH_HCR_EL2}, - {"ich_misr_el2", ICH_MISR_EL2}, - {"ich_vmcr_el2", ICH_VMCR_EL2}, - {"ich_vseir_el2", ICH_VSEIR_EL2}, - {"ich_lr0_el2", ICH_LR0_EL2}, - {"ich_lr1_el2", ICH_LR1_EL2}, - {"ich_lr2_el2", ICH_LR2_EL2}, - {"ich_lr3_el2", ICH_LR3_EL2}, - {"ich_lr4_el2", ICH_LR4_EL2}, - {"ich_lr5_el2", ICH_LR5_EL2}, - {"ich_lr6_el2", ICH_LR6_EL2}, - {"ich_lr7_el2", ICH_LR7_EL2}, - {"ich_lr8_el2", ICH_LR8_EL2}, - {"ich_lr9_el2", ICH_LR9_EL2}, - {"ich_lr10_el2", ICH_LR10_EL2}, - {"ich_lr11_el2", ICH_LR11_EL2}, - {"ich_lr12_el2", ICH_LR12_EL2}, - {"ich_lr13_el2", ICH_LR13_EL2}, - {"ich_lr14_el2", ICH_LR14_EL2}, - {"ich_lr15_el2", ICH_LR15_EL2} -}; - -uint32_t -A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { - // First search the registers shared by all - std::string NameLower = Name.lower(); - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Name == NameLower) { - Valid = true; - return SysRegPairs[i].Value; - } - } - - // Now try the instruction-specific registers (either read-only or - // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Name == NameLower) { - Valid = true; - return InstPairs[i].Value; - } - } - - // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits - // are: 11 xxx 1x11 xxxx xxx - Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$"); - - SmallVector<StringRef, 4> Ops; - if (!GenericRegPattern.match(NameLower, &Ops)) { - Valid = false; - return -1; - } - - uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; - uint32_t Bits; - Ops[1].getAsInteger(10, Op1); - Ops[2].getAsInteger(10, CRn); - Ops[3].getAsInteger(10, CRm); - Ops[4].getAsInteger(10, Op2); - Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2; - - Valid = true; - return Bits; -} - -std::string -A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Value == Bits) { - Valid = true; - return SysRegPairs[i].Name; - } - } - - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Value == Bits) { - Valid = true; - return InstPairs[i].Name; - } - } - - uint32_t Op0 = (Bits >> 14) & 0x3; - uint32_t Op1 = (Bits >> 11) & 0x7; - uint32_t CRn = (Bits >> 7) & 0xf; - uint32_t CRm = (Bits >> 3) & 0xf; - uint32_t Op2 = Bits & 0x7; - - // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic - // name. - if (Op0 != 3 || (CRn != 11 && CRn != 15)) { - Valid = false; - return ""; - } - - assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg"); - - Valid = true; - return "s3_" + utostr(Op1) + "_c" + utostr(CRn) - + "_c" + utostr(CRm) + "_" + utostr(Op2); -} - -const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { - {"ipas2e1is", IPAS2E1IS}, - {"ipas2le1is", IPAS2LE1IS}, - {"vmalle1is", VMALLE1IS}, - {"alle2is", ALLE2IS}, - {"alle3is", ALLE3IS}, - {"vae1is", VAE1IS}, - {"vae2is", VAE2IS}, - {"vae3is", VAE3IS}, - {"aside1is", ASIDE1IS}, - {"vaae1is", VAAE1IS}, - {"alle1is", ALLE1IS}, - {"vale1is", VALE1IS}, - {"vale2is", VALE2IS}, - {"vale3is", VALE3IS}, - {"vmalls12e1is", VMALLS12E1IS}, - {"vaale1is", VAALE1IS}, - {"ipas2e1", IPAS2E1}, - {"ipas2le1", IPAS2LE1}, - {"vmalle1", VMALLE1}, - {"alle2", ALLE2}, - {"alle3", ALLE3}, - {"vae1", VAE1}, - {"vae2", VAE2}, - {"vae3", VAE3}, - {"aside1", ASIDE1}, - {"vaae1", VAAE1}, - {"alle1", ALLE1}, - {"vale1", VALE1}, - {"vale2", VALE2}, - {"vale3", VALE3}, - {"vmalls12e1", VMALLS12E1}, - {"vaale1", VAALE1} -}; - -A64TLBI::TLBIMapper::TLBIMapper() - : NamedImmMapper(TLBIPairs, 0) {} - -bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { - const fltSemantics &Sem = Val.getSemantics(); - unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; - - uint32_t ExpMask; - switch (FracBits) { - case 10: // IEEE half-precision - ExpMask = 0x1f; - break; - case 23: // IEEE single-precision - ExpMask = 0xff; - break; - case 52: // IEEE double-precision - ExpMask = 0x7ff; - break; - case 112: // IEEE quad-precision - // No immediates are valid for double precision. - return false; - default: - llvm_unreachable("Only half, single and double precision supported"); - } - - uint32_t ExpStart = FracBits; - uint64_t FracMask = (1ULL << FracBits) - 1; - - uint32_t Sign = Val.isNegative(); - - uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); - uint64_t Fraction = Bits & FracMask; - int32_t Exponent = ((Bits >> ExpStart) & ExpMask); - Exponent -= ExpMask >> 1; - - // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) - // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) - // This translates to: only 4 bits of fraction; -3 <= exp <= 4. - uint64_t A64FracStart = FracBits - 4; - uint64_t A64FracMask = 0xf; - - // Are there too many fraction bits? - if (Fraction & ~(A64FracMask << A64FracStart)) - return false; - - if (Exponent < -3 || Exponent > 4) - return false; - - uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; - uint32_t PackedExp = (Exponent + 7) & 0x7; - - Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; - return true; -} - -// Encoding of the immediate for logical (immediate) instructions: -// -// | N | imms | immr | size | R | S | -// |---+--------+--------+------+--------------+--------------| -// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | -// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | -// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | -// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | -// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | -// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | -// | 0 | 11111x | - | | UNALLOCATED | | -// -// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in -// which the lower S+1 bits are ones and the remaining bits are zero, then -// rotated right by R bits, which is then replicated across the datapath. -// -// + Values of 'N', 'imms' and 'immr' which do not match the above table are -// RESERVED. -// + If all 's' bits in the imms field are set then the instruction is -// RESERVED. -// + The 'x' bits in the 'immr' field are IGNORED. - -bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { - int RepeatWidth; - int Rotation = 0; - int Num1s = 0; - - // Because there are S+1 ones in the replicated mask, an immediate of all - // zeros is not allowed. Filtering it here is probably more efficient. - if (Imm == 0) return false; - - for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { - uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; - uint64_t ReplicatedMask = Imm & RepeatMask; - - if (ReplicatedMask == 0) continue; - - // First we have to make sure the mask is actually repeated in each slot for - // this width-specifier. - bool IsReplicatedMask = true; - for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { - if (((Imm >> i) & RepeatMask) != ReplicatedMask) { - IsReplicatedMask = false; - break; - } - } - if (!IsReplicatedMask) continue; - - // Now we have to work out the amount of rotation needed. The first part of - // this calculation is actually independent of RepeatWidth, but the complex - // case will depend on it. - Rotation = countTrailingZeros(Imm); - if (Rotation == 0) { - // There were no leading zeros, which means it's either in place or there - // are 1s at each end (e.g. 0x8003 needs rotating). - Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) - : CountLeadingOnes_32(Imm); - Rotation = RepeatWidth - Rotation; - } - - uint64_t ReplicatedOnes = ReplicatedMask; - if (Rotation != 0 && Rotation != 64) - ReplicatedOnes = (ReplicatedMask >> Rotation) - | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); - - // Of course, they may not actually be ones, so we have to check that: - if (!isMask_64(ReplicatedOnes)) - continue; - - Num1s = CountTrailingOnes_64(ReplicatedOnes); - - // We know we've got an almost valid encoding (certainly, if this is invalid - // no other parameters would work). - break; - } - - // The encodings which would produce all 1s are RESERVED. - if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; - - uint32_t N = RepeatWidth == 64; - uint32_t ImmR = RepeatWidth - Rotation; - uint32_t ImmS = Num1s - 1; - - switch (RepeatWidth) { - default: break; // No action required for other valid rotations. - case 16: ImmS |= 0x20; break; // 10ssss - case 8: ImmS |= 0x30; break; // 110sss - case 4: ImmS |= 0x38; break; // 1110ss - case 2: ImmS |= 0x3c; break; // 11110s - } - - Bits = ImmS | (ImmR << 6) | (N << 12); - - return true; -} - - -bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, - uint64_t &Imm) { - uint32_t N = Bits >> 12; - uint32_t ImmR = (Bits >> 6) & 0x3f; - uint32_t ImmS = Bits & 0x3f; - - // N=1 encodes a 64-bit replication and is invalid for the 32-bit - // instructions. - if (RegWidth == 32 && N != 0) return false; - - int Width = 0; - if (N == 1) - Width = 64; - else if ((ImmS & 0x20) == 0) - Width = 32; - else if ((ImmS & 0x10) == 0) - Width = 16; - else if ((ImmS & 0x08) == 0) - Width = 8; - else if ((ImmS & 0x04) == 0) - Width = 4; - else if ((ImmS & 0x02) == 0) - Width = 2; - else { - // ImmS is 0b11111x: UNALLOCATED - return false; - } - - int Num1s = (ImmS & (Width - 1)) + 1; - - // All encodings which would map to -1 (signed) are RESERVED. - if (Num1s == Width) return false; - - int Rotation = (ImmR & (Width - 1)); - uint64_t Mask = (1ULL << Num1s) - 1; - uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; - if (Rotation != 0 && Rotation != 64) - Mask = (Mask >> Rotation) - | ((Mask << (Width - Rotation)) & WidthMask); - - Imm = Mask; - for (unsigned i = 1; i < RegWidth / Width; ++i) { - Mask <<= Width; - Imm |= Mask; - } - - return true; -} - -bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { - // If high bits are set then a 32-bit MOVZ can't possibly work. - if (RegWidth == 32 && (Value & ~0xffffffffULL)) - return false; - - for (int i = 0; i < RegWidth; i += 16) { - // If the value is 0 when we mask out all the bits that could be set with - // the current LSL value then it's representable. - if ((Value & ~(0xffffULL << i)) == 0) { - Shift = i / 16; - UImm16 = (Value >> i) & 0xffff; - return true; - } - } - return false; -} - -bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { - // MOVN is defined to set its register to NOT(LSL(imm16, shift)). - - // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* - // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not - // a valid input for isMOVZImm. - if (RegWidth == 32 && (Value & ~0xffffffffULL)) - return false; - - uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; - - return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); -} - -bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, - int &UImm16, int &Shift) { - if (isMOVZImm(RegWidth, Value, UImm16, Shift)) - return false; - - return isMOVNImm(RegWidth, Value, UImm16, Shift); -} - -// decodeNeonModShiftImm - Decode a Neon OpCmode value into the -// the shift amount and the shift type (shift zeros or ones in) and -// returns whether the OpCmode value implies a shift operation. -bool A64Imms::decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, - unsigned &ShiftOnesIn) { - ShiftImm = 0; - ShiftOnesIn = false; - bool HasShift = true; - - if (OpCmode == 0xe) { - // movi byte - HasShift = false; - } else if (OpCmode == 0x1e) { - // movi 64-bit bytemask - HasShift = false; - } else if ((OpCmode & 0xc) == 0x8) { - // shift zeros, per halfword - ShiftImm = ((OpCmode & 0x2) >> 1); - } else if ((OpCmode & 0x8) == 0) { - // shift zeros, per word - ShiftImm = ((OpCmode & 0x6) >> 1); - } else if ((OpCmode & 0xe) == 0xc) { - // shift ones, per word - ShiftOnesIn = true; - ShiftImm = (OpCmode & 0x1); - } else { - // per byte, per bytemask - llvm_unreachable("Unsupported Neon modified immediate"); - } - - return HasShift; -} - -// decodeNeonModImm - Decode a NEON modified immediate and OpCmode values -// into the element value and the element size in bits. -uint64_t A64Imms::decodeNeonModImm(unsigned Val, unsigned OpCmode, - unsigned &EltBits) { - uint64_t DecodedVal = Val; - EltBits = 0; - - if (OpCmode == 0xe) { - // movi byte - EltBits = 8; - } else if (OpCmode == 0x1e) { - // movi 64-bit bytemask - DecodedVal = 0; - for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { - if ((Val >> ByteNum) & 1) - DecodedVal |= (uint64_t)0xff << (8 * ByteNum); - } - EltBits = 64; - } else if ((OpCmode & 0xc) == 0x8) { - // shift zeros, per halfword - EltBits = 16; - } else if ((OpCmode & 0x8) == 0) { - // shift zeros, per word - EltBits = 32; - } else if ((OpCmode & 0xe) == 0xc) { - // shift ones, per word - EltBits = 32; - } else { - llvm_unreachable("Unsupported Neon modified immediate"); - } - return DecodedVal; -} diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h deleted file mode 100644 index 39b042b7208..00000000000 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ /dev/null @@ -1,1138 +0,0 @@ -//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the AArch64 target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_BASEINFO_H -#define LLVM_AARCH64_BASEINFO_H - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -// // Enums corresponding to AArch64 condition codes -namespace A64CC { - // The CondCodes constants map directly to the 4-bit encoding of the - // condition field for predicated instructions. - enum CondCodes { // Meaning (integer) Meaning (floating-point) - EQ = 0, // Equal Equal - NE, // Not equal Not equal, or unordered - HS, // Unsigned higher or same >, ==, or unordered - LO, // Unsigned lower or same Less than - MI, // Minus, negative Less than - PL, // Plus, positive or zero >, ==, or unordered - VS, // Overflow Unordered - VC, // No overflow Ordered - HI, // Unsigned higher Greater than, or unordered - LS, // Unsigned lower or same Less than or equal - GE, // Greater than or equal Greater than or equal - LT, // Less than Less than, or unordered - GT, // Signed greater than Greater than - LE, // Signed less than or equal <, ==, or unordered - AL, // Always (unconditional) Always (unconditional) - NV, // Always (unconditional) Always (unconditional) - // Note the NV exists purely to disassemble 0b1111. Execution - // is "always". - Invalid - }; - -} // namespace A64CC - -inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case A64CC::EQ: return "eq"; - case A64CC::NE: return "ne"; - case A64CC::HS: return "hs"; - case A64CC::LO: return "lo"; - case A64CC::MI: return "mi"; - case A64CC::PL: return "pl"; - case A64CC::VS: return "vs"; - case A64CC::VC: return "vc"; - case A64CC::HI: return "hi"; - case A64CC::LS: return "ls"; - case A64CC::GE: return "ge"; - case A64CC::LT: return "lt"; - case A64CC::GT: return "gt"; - case A64CC::LE: return "le"; - case A64CC::AL: return "al"; - case A64CC::NV: return "nv"; - } -} - -inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { - return StringSwitch<A64CC::CondCodes>(CondStr.lower()) - .Case("eq", A64CC::EQ) - .Case("ne", A64CC::NE) - .Case("ne", A64CC::NE) - .Case("hs", A64CC::HS) - .Case("cs", A64CC::HS) - .Case("lo", A64CC::LO) - .Case("cc", A64CC::LO) - .Case("mi", A64CC::MI) - .Case("pl", A64CC::PL) - .Case("vs", A64CC::VS) - .Case("vc", A64CC::VC) - .Case("hi", A64CC::HI) - .Case("ls", A64CC::LS) - .Case("ge", A64CC::GE) - .Case("lt", A64CC::LT) - .Case("gt", A64CC::GT) - .Case("le", A64CC::LE) - .Case("al", A64CC::AL) - .Case("nv", A64CC::NV) - .Default(A64CC::Invalid); -} - -inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { - // It turns out that the condition codes have been designed so that in order - // to reverse the intent of the condition you only have to invert the low bit: - - return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1); -} - -/// Instances of this class can perform bidirectional mapping from random -/// identifier strings to operand encodings. For example "MSR" takes a named -/// system-register which must be encoded somehow and decoded for printing. This -/// central location means that the information for those transformations is not -/// duplicated and remains in sync. -/// -/// FIXME: currently the algorithm is a completely unoptimised linear -/// search. Obviously this could be improved, but we would probably want to work -/// out just how often these instructions are emitted before working on it. It -/// might even be optimal to just reorder the tables for the common instructions -/// rather than changing the algorithm. -struct NamedImmMapper { - struct Mapping { - const char *Name; - uint32_t Value; - }; - - template<int N> - NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) - : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} - - StringRef toString(uint32_t Value, bool &Valid) const; - uint32_t fromString(StringRef Name, bool &Valid) const; - - /// Many of the instructions allow an alternative assembly form consisting of - /// a simple immediate. Currently the only valid forms are ranges [0, N) where - /// N being 0 indicates no immediate syntax-form is allowed. - bool validImm(uint32_t Value) const; -protected: - const Mapping *Pairs; - size_t NumPairs; - uint32_t TooBigImm; -}; - -namespace A64AT { - enum ATValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - S1E1R = 0x43c0, // 01 000 0111 1000 000 - S1E2R = 0x63c0, // 01 100 0111 1000 000 - S1E3R = 0x73c0, // 01 110 0111 1000 000 - S1E1W = 0x43c1, // 01 000 0111 1000 001 - S1E2W = 0x63c1, // 01 100 0111 1000 001 - S1E3W = 0x73c1, // 01 110 0111 1000 001 - S1E0R = 0x43c2, // 01 000 0111 1000 010 - S1E0W = 0x43c3, // 01 000 0111 1000 011 - S12E1R = 0x63c4, // 01 100 0111 1000 100 - S12E1W = 0x63c5, // 01 100 0111 1000 101 - S12E0R = 0x63c6, // 01 100 0111 1000 110 - S12E0W = 0x63c7 // 01 100 0111 1000 111 - }; - - struct ATMapper : NamedImmMapper { - const static Mapping ATPairs[]; - - ATMapper(); - }; - -} -namespace A64DB { - enum DBValues { - Invalid = -1, - OSHLD = 0x1, - OSHST = 0x2, - OSH = 0x3, - NSHLD = 0x5, - NSHST = 0x6, - NSH = 0x7, - ISHLD = 0x9, - ISHST = 0xa, - ISH = 0xb, - LD = 0xd, - ST = 0xe, - SY = 0xf - }; - - struct DBarrierMapper : NamedImmMapper { - const static Mapping DBarrierPairs[]; - - DBarrierMapper(); - }; -} - -namespace A64DC { - enum DCValues { - Invalid = -1, // Op1 CRn CRm Op2 - ZVA = 0x5ba1, // 01 011 0111 0100 001 - IVAC = 0x43b1, // 01 000 0111 0110 001 - ISW = 0x43b2, // 01 000 0111 0110 010 - CVAC = 0x5bd1, // 01 011 0111 1010 001 - CSW = 0x43d2, // 01 000 0111 1010 010 - CVAU = 0x5bd9, // 01 011 0111 1011 001 - CIVAC = 0x5bf1, // 01 011 0111 1110 001 - CISW = 0x43f2 // 01 000 0111 1110 010 - }; - - struct DCMapper : NamedImmMapper { - const static Mapping DCPairs[]; - - DCMapper(); - }; - -} - -namespace A64IC { - enum ICValues { - Invalid = -1, // Op1 CRn CRm Op2 - IALLUIS = 0x0388, // 000 0111 0001 000 - IALLU = 0x03a8, // 000 0111 0101 000 - IVAU = 0x1ba9 // 011 0111 0101 001 - }; - - - struct ICMapper : NamedImmMapper { - const static Mapping ICPairs[]; - - ICMapper(); - }; - - static inline bool NeedsRegister(ICValues Val) { - return Val == IVAU; - } -} - -namespace A64ISB { - enum ISBValues { - Invalid = -1, - SY = 0xf - }; - struct ISBMapper : NamedImmMapper { - const static Mapping ISBPairs[]; - - ISBMapper(); - }; -} - -namespace A64PRFM { - enum PRFMValues { - Invalid = -1, - PLDL1KEEP = 0x00, - PLDL1STRM = 0x01, - PLDL2KEEP = 0x02, - PLDL2STRM = 0x03, - PLDL3KEEP = 0x04, - PLDL3STRM = 0x05, - PLIL1KEEP = 0x08, - PLIL1STRM = 0x09, - PLIL2KEEP = 0x0a, - PLIL2STRM = 0x0b, - PLIL3KEEP = 0x0c, - PLIL3STRM = 0x0d, - PSTL1KEEP = 0x10, - PSTL1STRM = 0x11, - PSTL2KEEP = 0x12, - PSTL2STRM = 0x13, - PSTL3KEEP = 0x14, - PSTL3STRM = 0x15 - }; - - struct PRFMMapper : NamedImmMapper { - const static Mapping PRFMPairs[]; - - PRFMMapper(); - }; -} - -namespace A64PState { - enum PStateValues { - Invalid = -1, - SPSel = 0x05, - DAIFSet = 0x1e, - DAIFClr = 0x1f - }; - - struct PStateMapper : NamedImmMapper { - const static Mapping PStatePairs[]; - - PStateMapper(); - }; - -} - -namespace A64SE { - enum ShiftExtSpecifiers { - Invalid = -1, - LSL, - MSL, - LSR, - ASR, - ROR, - - UXTB, - UXTH, - UXTW, - UXTX, - - SXTB, - SXTH, - SXTW, - SXTX - }; -} - -namespace A64Layout { - enum VectorLayout { - Invalid = -1, - VL_8B, - VL_4H, - VL_2S, - VL_1D, - - VL_16B, - VL_8H, - VL_4S, - VL_2D, - - // Bare layout for the 128-bit vector - // (only show ".b", ".h", ".s", ".d" without vector number) - VL_B, - VL_H, - VL_S, - VL_D - }; -} - -inline static const char * -A64VectorLayoutToString(A64Layout::VectorLayout Layout) { - switch (Layout) { - case A64Layout::VL_8B: return ".8b"; - case A64Layout::VL_4H: return ".4h"; - case A64Layout::VL_2S: return ".2s"; - case A64Layout::VL_1D: return ".1d"; - case A64Layout::VL_16B: return ".16b"; - case A64Layout::VL_8H: return ".8h"; - case A64Layout::VL_4S: return ".4s"; - case A64Layout::VL_2D: return ".2d"; - case A64Layout::VL_B: return ".b"; - case A64Layout::VL_H: return ".h"; - case A64Layout::VL_S: return ".s"; - case A64Layout::VL_D: return ".d"; - default: llvm_unreachable("Unknown Vector Layout"); - } -} - -inline static A64Layout::VectorLayout -A64StringToVectorLayout(StringRef LayoutStr) { - return StringSwitch<A64Layout::VectorLayout>(LayoutStr) - .Case(".8b", A64Layout::VL_8B) - .Case(".4h", A64Layout::VL_4H) - .Case(".2s", A64Layout::VL_2S) - .Case(".1d", A64Layout::VL_1D) - .Case(".16b", A64Layout::VL_16B) - .Case(".8h", A64Layout::VL_8H) - .Case(".4s", A64Layout::VL_4S) - .Case(".2d", A64Layout::VL_2D) - .Case(".b", A64Layout::VL_B) - .Case(".h", A64Layout::VL_H) - .Case(".s", A64Layout::VL_S) - .Case(".d", A64Layout::VL_D) - .Default(A64Layout::Invalid); -} - -namespace A64SysReg { - enum SysRegROValues { - MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 - DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 - MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000 - OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100 - DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110 - PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110 - PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111 - MIDR_EL1 = 0xc000, // 11 000 0000 0000 000 - CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000 - CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001 - CTR_EL0 = 0xd801, // 11 011 0000 0000 001 - MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101 - REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110 - AIDR_EL1 = 0xc807, // 11 001 0000 0000 111 - DCZID_EL0 = 0xd807, // 11 011 0000 0000 111 - ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000 - ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001 - ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010 - ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011 - ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100 - ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101 - ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110 - ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111 - ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000 - ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001 - ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010 - ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 - ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 - ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 - ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 - ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 - ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 - ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 - ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 - ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 - ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 - ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 - ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 - ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 - MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 - MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 - MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 - RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001 - RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001 - RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 - ISR_EL1 = 0xc608, // 11 000 1100 0001 000 - CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 - CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010 - - // Trace registers - TRCSTATR = 0x8818, // 10 001 0000 0011 000 - TRCIDR8 = 0x8806, // 10 001 0000 0000 110 - TRCIDR9 = 0x880e, // 10 001 0000 0001 110 - TRCIDR10 = 0x8816, // 10 001 0000 0010 110 - TRCIDR11 = 0x881e, // 10 001 0000 0011 110 - TRCIDR12 = 0x8826, // 10 001 0000 0100 110 - TRCIDR13 = 0x882e, // 10 001 0000 0101 110 - TRCIDR0 = 0x8847, // 10 001 0000 1000 111 - TRCIDR1 = 0x884f, // 10 001 0000 1001 111 - TRCIDR2 = 0x8857, // 10 001 0000 1010 111 - TRCIDR3 = 0x885f, // 10 001 0000 1011 111 - TRCIDR4 = 0x8867, // 10 001 0000 1100 111 - TRCIDR5 = 0x886f, // 10 001 0000 1101 111 - TRCIDR6 = 0x8877, // 10 001 0000 1110 111 - TRCIDR7 = 0x887f, // 10 001 0000 1111 111 - TRCOSLSR = 0x888c, // 10 001 0001 0001 100 - TRCPDSR = 0x88ac, // 10 001 0001 0101 100 - TRCDEVAFF0 = 0x8bd6, // 10 001 0111 1010 110 - TRCDEVAFF1 = 0x8bde, // 10 001 0111 1011 110 - TRCLSR = 0x8bee, // 10 001 0111 1101 110 - TRCAUTHSTATUS = 0x8bf6, // 10 001 0111 1110 110 - TRCDEVARCH = 0x8bfe, // 10 001 0111 1111 110 - TRCDEVID = 0x8b97, // 10 001 0111 0010 111 - TRCDEVTYPE = 0x8b9f, // 10 001 0111 0011 111 - TRCPIDR4 = 0x8ba7, // 10 001 0111 0100 111 - TRCPIDR5 = 0x8baf, // 10 001 0111 0101 111 - TRCPIDR6 = 0x8bb7, // 10 001 0111 0110 111 - TRCPIDR7 = 0x8bbf, // 10 001 0111 0111 111 - TRCPIDR0 = 0x8bc7, // 10 001 0111 1000 111 - TRCPIDR1 = 0x8bcf, // 10 001 0111 1001 111 - TRCPIDR2 = 0x8bd7, // 10 001 0111 1010 111 - TRCPIDR3 = 0x8bdf, // 10 001 0111 1011 111 - TRCCIDR0 = 0x8be7, // 10 001 0111 1100 111 - TRCCIDR1 = 0x8bef, // 10 001 0111 1101 111 - TRCCIDR2 = 0x8bf7, // 10 001 0111 1110 111 - TRCCIDR3 = 0x8bff, // 10 001 0111 1111 111 - - // GICv3 registers - ICC_IAR1_EL1 = 0xc660, // 11 000 1100 1100 000 - ICC_IAR0_EL1 = 0xc640, // 11 000 1100 1000 000 - ICC_HPPIR1_EL1 = 0xc662, // 11 000 1100 1100 010 - ICC_HPPIR0_EL1 = 0xc642, // 11 000 1100 1000 010 - ICC_RPR_EL1 = 0xc65b, // 11 000 1100 1011 011 - ICH_VTR_EL2 = 0xe659, // 11 100 1100 1011 001 - ICH_EISR_EL2 = 0xe65b, // 11 100 1100 1011 011 - ICH_ELSR_EL2 = 0xe65d // 11 100 1100 1011 101 - }; - - enum SysRegWOValues { - DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 - OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 - PMSWINC_EL0 = 0xdce4, // 11 011 1001 1100 100 - - // Trace Registers - TRCOSLAR = 0x8884, // 10 001 0001 0000 100 - TRCLAR = 0x8be6, // 10 001 0111 1100 110 - - // GICv3 registers - ICC_EOIR1_EL1 = 0xc661, // 11 000 1100 1100 001 - ICC_EOIR0_EL1 = 0xc641, // 11 000 1100 1000 001 - ICC_DIR_EL1 = 0xc659, // 11 000 1100 1011 001 - ICC_SGI1R_EL1 = 0xc65d, // 11 000 1100 1011 101 - ICC_ASGI1R_EL1 = 0xc65e, // 11 000 1100 1011 110 - ICC_SGI0R_EL1 = 0xc65f // 11 000 1100 1011 111 - }; - - enum SysRegValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010 - OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010 - TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000 - MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000 - MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010 - DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000 - OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010 - DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000 - DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100 - DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100 - DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100 - DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100 - DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100 - DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100 - DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100 - DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100 - DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100 - DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100 - DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100 - DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100 - DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100 - DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100 - DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100 - DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100 - DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101 - DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101 - DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101 - DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101 - DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101 - DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101 - DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101 - DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101 - DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101 - DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101 - DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101 - DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101 - DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101 - DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101 - DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101 - DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101 - DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110 - DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110 - DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110 - DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110 - DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110 - DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110 - DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110 - DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110 - DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110 - DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110 - DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110 - DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110 - DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110 - DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110 - DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110 - DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110 - DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111 - DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111 - DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111 - DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111 - DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111 - DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111 - DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111 - DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111 - DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111 - DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111 - DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111 - DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111 - DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111 - DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111 - DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111 - DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111 - TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000 - OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100 - DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100 - DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110 - DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110 - CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000 - VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000 - VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101 - CPACR_EL1 = 0xc082, // 11 000 0001 0000 010 - SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000 - SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000 - SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000 - ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001 - ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001 - ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001 - HCR_EL2 = 0xe088, // 11 100 0001 0001 000 - SCR_EL3 = 0xf088, // 11 110 0001 0001 000 - MDCR_EL2 = 0xe089, // 11 100 0001 0001 001 - SDER32_EL3 = 0xf089, // 11 110 0001 0001 001 - CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010 - CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010 - HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011 - HACR_EL2 = 0xe08f, // 11 100 0001 0001 111 - MDCR_EL3 = 0xf099, // 11 110 0001 0011 001 - TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000 - TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000 - TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000 - TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001 - TCR_EL1 = 0xc102, // 11 000 0010 0000 010 - TCR_EL2 = 0xe102, // 11 100 0010 0000 010 - TCR_EL3 = 0xf102, // 11 110 0010 0000 010 - VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000 - VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010 - DACR32_EL2 = 0xe180, // 11 100 0011 0000 000 - SPSR_EL1 = 0xc200, // 11 000 0100 0000 000 - SPSR_EL2 = 0xe200, // 11 100 0100 0000 000 - SPSR_EL3 = 0xf200, // 11 110 0100 0000 000 - ELR_EL1 = 0xc201, // 11 000 0100 0000 001 - ELR_EL2 = 0xe201, // 11 100 0100 0000 001 - ELR_EL3 = 0xf201, // 11 110 0100 0000 001 - SP_EL0 = 0xc208, // 11 000 0100 0001 000 - SP_EL1 = 0xe208, // 11 100 0100 0001 000 - SP_EL2 = 0xf208, // 11 110 0100 0001 000 - SPSel = 0xc210, // 11 000 0100 0010 000 - NZCV = 0xda10, // 11 011 0100 0010 000 - DAIF = 0xda11, // 11 011 0100 0010 001 - CurrentEL = 0xc212, // 11 000 0100 0010 010 - SPSR_irq = 0xe218, // 11 100 0100 0011 000 - SPSR_abt = 0xe219, // 11 100 0100 0011 001 - SPSR_und = 0xe21a, // 11 100 0100 0011 010 - SPSR_fiq = 0xe21b, // 11 100 0100 0011 011 - FPCR = 0xda20, // 11 011 0100 0100 000 - FPSR = 0xda21, // 11 011 0100 0100 001 - DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000 - DLR_EL0 = 0xda29, // 11 011 0100 0101 001 - IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001 - AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000 - AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000 - AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000 - AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001 - AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001 - AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001 - ESR_EL1 = 0xc290, // 11 000 0101 0010 000 - ESR_EL2 = 0xe290, // 11 100 0101 0010 000 - ESR_EL3 = 0xf290, // 11 110 0101 0010 000 - FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000 - FAR_EL1 = 0xc300, // 11 000 0110 0000 000 - FAR_EL2 = 0xe300, // 11 100 0110 0000 000 - FAR_EL3 = 0xf300, // 11 110 0110 0000 000 - HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100 - PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000 - PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000 - PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001 - PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010 - PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011 - PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101 - PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000 - PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001 - PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010 - PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000 - PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001 - PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010 - PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011 - MAIR_EL1 = 0xc510, // 11 000 1010 0010 000 - MAIR_EL2 = 0xe510, // 11 100 1010 0010 000 - MAIR_EL3 = 0xf510, // 11 110 1010 0010 000 - AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000 - AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000 - AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000 - VBAR_EL1 = 0xc600, // 11 000 1100 0000 000 - VBAR_EL2 = 0xe600, // 11 100 1100 0000 000 - VBAR_EL3 = 0xf600, // 11 110 1100 0000 000 - RMR_EL1 = 0xc602, // 11 000 1100 0000 010 - RMR_EL2 = 0xe602, // 11 100 1100 0000 010 - RMR_EL3 = 0xf602, // 11 110 1100 0000 010 - CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001 - TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010 - TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010 - TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010 - TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011 - TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100 - CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000 - CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011 - CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000 - CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000 - CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000 - CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000 - CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000 - CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001 - CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001 - CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001 - CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010 - CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010 - CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010 - CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000 - CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001 - CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010 - PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000 - PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001 - PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010 - PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011 - PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100 - PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101 - PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110 - PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111 - PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000 - PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001 - PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010 - PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011 - PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100 - PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101 - PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110 - PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111 - PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000 - PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001 - PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010 - PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011 - PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100 - PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101 - PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110 - PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111 - PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000 - PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001 - PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010 - PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011 - PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100 - PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101 - PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110 - PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111 - PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000 - PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001 - PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010 - PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011 - PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100 - PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101 - PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110 - PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111 - PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000 - PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001 - PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010 - PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011 - PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100 - PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101 - PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110 - PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111 - PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000 - PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001 - PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010 - PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011 - PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100 - PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101 - PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110 - PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111 - PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000 - PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001 - PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010 - PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 - PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 - PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 - PMEVTYPER30_EL0 = 0xdf7e, // 11 011 1110 1111 110 - - // Trace registers - TRCPRGCTLR = 0x8808, // 10 001 0000 0001 000 - TRCPROCSELR = 0x8810, // 10 001 0000 0010 000 - TRCCONFIGR = 0x8820, // 10 001 0000 0100 000 - TRCAUXCTLR = 0x8830, // 10 001 0000 0110 000 - TRCEVENTCTL0R = 0x8840, // 10 001 0000 1000 000 - TRCEVENTCTL1R = 0x8848, // 10 001 0000 1001 000 - TRCSTALLCTLR = 0x8858, // 10 001 0000 1011 000 - TRCTSCTLR = 0x8860, // 10 001 0000 1100 000 - TRCSYNCPR = 0x8868, // 10 001 0000 1101 000 - TRCCCCTLR = 0x8870, // 10 001 0000 1110 000 - TRCBBCTLR = 0x8878, // 10 001 0000 1111 000 - TRCTRACEIDR = 0x8801, // 10 001 0000 0000 001 - TRCQCTLR = 0x8809, // 10 001 0000 0001 001 - TRCVICTLR = 0x8802, // 10 001 0000 0000 010 - TRCVIIECTLR = 0x880a, // 10 001 0000 0001 010 - TRCVISSCTLR = 0x8812, // 10 001 0000 0010 010 - TRCVIPCSSCTLR = 0x881a, // 10 001 0000 0011 010 - TRCVDCTLR = 0x8842, // 10 001 0000 1000 010 - TRCVDSACCTLR = 0x884a, // 10 001 0000 1001 010 - TRCVDARCCTLR = 0x8852, // 10 001 0000 1010 010 - TRCSEQEVR0 = 0x8804, // 10 001 0000 0000 100 - TRCSEQEVR1 = 0x880c, // 10 001 0000 0001 100 - TRCSEQEVR2 = 0x8814, // 10 001 0000 0010 100 - TRCSEQRSTEVR = 0x8834, // 10 001 0000 0110 100 - TRCSEQSTR = 0x883c, // 10 001 0000 0111 100 - TRCEXTINSELR = 0x8844, // 10 001 0000 1000 100 - TRCCNTRLDVR0 = 0x8805, // 10 001 0000 0000 101 - TRCCNTRLDVR1 = 0x880d, // 10 001 0000 0001 101 - TRCCNTRLDVR2 = 0x8815, // 10 001 0000 0010 101 - TRCCNTRLDVR3 = 0x881d, // 10 001 0000 0011 101 - TRCCNTCTLR0 = 0x8825, // 10 001 0000 0100 101 - TRCCNTCTLR1 = 0x882d, // 10 001 0000 0101 101 - TRCCNTCTLR2 = 0x8835, // 10 001 0000 0110 101 - TRCCNTCTLR3 = 0x883d, // 10 001 0000 0111 101 - TRCCNTVR0 = 0x8845, // 10 001 0000 1000 101 - TRCCNTVR1 = 0x884d, // 10 001 0000 1001 101 - TRCCNTVR2 = 0x8855, // 10 001 0000 1010 101 - TRCCNTVR3 = 0x885d, // 10 001 0000 1011 101 - TRCIMSPEC0 = 0x8807, // 10 001 0000 0000 111 - TRCIMSPEC1 = 0x880f, // 10 001 0000 0001 111 - TRCIMSPEC2 = 0x8817, // 10 001 0000 0010 111 - TRCIMSPEC3 = 0x881f, // 10 001 0000 0011 111 - TRCIMSPEC4 = 0x8827, // 10 001 0000 0100 111 - TRCIMSPEC5 = 0x882f, // 10 001 0000 0101 111 - TRCIMSPEC6 = 0x8837, // 10 001 0000 0110 111 - TRCIMSPEC7 = 0x883f, // 10 001 0000 0111 111 - TRCRSCTLR2 = 0x8890, // 10 001 0001 0010 000 - TRCRSCTLR3 = 0x8898, // 10 001 0001 0011 000 - TRCRSCTLR4 = 0x88a0, // 10 001 0001 0100 000 - TRCRSCTLR5 = 0x88a8, // 10 001 0001 0101 000 - TRCRSCTLR6 = 0x88b0, // 10 001 0001 0110 000 - TRCRSCTLR7 = 0x88b8, // 10 001 0001 0111 000 - TRCRSCTLR8 = 0x88c0, // 10 001 0001 1000 000 - TRCRSCTLR9 = 0x88c8, // 10 001 0001 1001 000 - TRCRSCTLR10 = 0x88d0, // 10 001 0001 1010 000 - TRCRSCTLR11 = 0x88d8, // 10 001 0001 1011 000 - TRCRSCTLR12 = 0x88e0, // 10 001 0001 1100 000 - TRCRSCTLR13 = 0x88e8, // 10 001 0001 1101 000 - TRCRSCTLR14 = 0x88f0, // 10 001 0001 1110 000 - TRCRSCTLR15 = 0x88f8, // 10 001 0001 1111 000 - TRCRSCTLR16 = 0x8881, // 10 001 0001 0000 001 - TRCRSCTLR17 = 0x8889, // 10 001 0001 0001 001 - TRCRSCTLR18 = 0x8891, // 10 001 0001 0010 001 - TRCRSCTLR19 = 0x8899, // 10 001 0001 0011 001 - TRCRSCTLR20 = 0x88a1, // 10 001 0001 0100 001 - TRCRSCTLR21 = 0x88a9, // 10 001 0001 0101 001 - TRCRSCTLR22 = 0x88b1, // 10 001 0001 0110 001 - TRCRSCTLR23 = 0x88b9, // 10 001 0001 0111 001 - TRCRSCTLR24 = 0x88c1, // 10 001 0001 1000 001 - TRCRSCTLR25 = 0x88c9, // 10 001 0001 1001 001 - TRCRSCTLR26 = 0x88d1, // 10 001 0001 1010 001 - TRCRSCTLR27 = 0x88d9, // 10 001 0001 1011 001 - TRCRSCTLR28 = 0x88e1, // 10 001 0001 1100 001 - TRCRSCTLR29 = 0x88e9, // 10 001 0001 1101 001 - TRCRSCTLR30 = 0x88f1, // 10 001 0001 1110 001 - TRCRSCTLR31 = 0x88f9, // 10 001 0001 1111 001 - TRCSSCCR0 = 0x8882, // 10 001 0001 0000 010 - TRCSSCCR1 = 0x888a, // 10 001 0001 0001 010 - TRCSSCCR2 = 0x8892, // 10 001 0001 0010 010 - TRCSSCCR3 = 0x889a, // 10 001 0001 0011 010 - TRCSSCCR4 = 0x88a2, // 10 001 0001 0100 010 - TRCSSCCR5 = 0x88aa, // 10 001 0001 0101 010 - TRCSSCCR6 = 0x88b2, // 10 001 0001 0110 010 - TRCSSCCR7 = 0x88ba, // 10 001 0001 0111 010 - TRCSSCSR0 = 0x88c2, // 10 001 0001 1000 010 - TRCSSCSR1 = 0x88ca, // 10 001 0001 1001 010 - TRCSSCSR2 = 0x88d2, // 10 001 0001 1010 010 - TRCSSCSR3 = 0x88da, // 10 001 0001 1011 010 - TRCSSCSR4 = 0x88e2, // 10 001 0001 1100 010 - TRCSSCSR5 = 0x88ea, // 10 001 0001 1101 010 - TRCSSCSR6 = 0x88f2, // 10 001 0001 1110 010 - TRCSSCSR7 = 0x88fa, // 10 001 0001 1111 010 - TRCSSPCICR0 = 0x8883, // 10 001 0001 0000 011 - TRCSSPCICR1 = 0x888b, // 10 001 0001 0001 011 - TRCSSPCICR2 = 0x8893, // 10 001 0001 0010 011 - TRCSSPCICR3 = 0x889b, // 10 001 0001 0011 011 - TRCSSPCICR4 = 0x88a3, // 10 001 0001 0100 011 - TRCSSPCICR5 = 0x88ab, // 10 001 0001 0101 011 - TRCSSPCICR6 = 0x88b3, // 10 001 0001 0110 011 - TRCSSPCICR7 = 0x88bb, // 10 001 0001 0111 011 - TRCPDCR = 0x88a4, // 10 001 0001 0100 100 - TRCACVR0 = 0x8900, // 10 001 0010 0000 000 - TRCACVR1 = 0x8910, // 10 001 0010 0010 000 - TRCACVR2 = 0x8920, // 10 001 0010 0100 000 - TRCACVR3 = 0x8930, // 10 001 0010 0110 000 - TRCACVR4 = 0x8940, // 10 001 0010 1000 000 - TRCACVR5 = 0x8950, // 10 001 0010 1010 000 - TRCACVR6 = 0x8960, // 10 001 0010 1100 000 - TRCACVR7 = 0x8970, // 10 001 0010 1110 000 - TRCACVR8 = 0x8901, // 10 001 0010 0000 001 - TRCACVR9 = 0x8911, // 10 001 0010 0010 001 - TRCACVR10 = 0x8921, // 10 001 0010 0100 001 - TRCACVR11 = 0x8931, // 10 001 0010 0110 001 - TRCACVR12 = 0x8941, // 10 001 0010 1000 001 - TRCACVR13 = 0x8951, // 10 001 0010 1010 001 - TRCACVR14 = 0x8961, // 10 001 0010 1100 001 - TRCACVR15 = 0x8971, // 10 001 0010 1110 001 - TRCACATR0 = 0x8902, // 10 001 0010 0000 010 - TRCACATR1 = 0x8912, // 10 001 0010 0010 010 - TRCACATR2 = 0x8922, // 10 001 0010 0100 010 - TRCACATR3 = 0x8932, // 10 001 0010 0110 010 - TRCACATR4 = 0x8942, // 10 001 0010 1000 010 - TRCACATR5 = 0x8952, // 10 001 0010 1010 010 - TRCACATR6 = 0x8962, // 10 001 0010 1100 010 - TRCACATR7 = 0x8972, // 10 001 0010 1110 010 - TRCACATR8 = 0x8903, // 10 001 0010 0000 011 - TRCACATR9 = 0x8913, // 10 001 0010 0010 011 - TRCACATR10 = 0x8923, // 10 001 0010 0100 011 - TRCACATR11 = 0x8933, // 10 001 0010 0110 011 - TRCACATR12 = 0x8943, // 10 001 0010 1000 011 - TRCACATR13 = 0x8953, // 10 001 0010 1010 011 - TRCACATR14 = 0x8963, // 10 001 0010 1100 011 - TRCACATR15 = 0x8973, // 10 001 0010 1110 011 - TRCDVCVR0 = 0x8904, // 10 001 0010 0000 100 - TRCDVCVR1 = 0x8924, // 10 001 0010 0100 100 - TRCDVCVR2 = 0x8944, // 10 001 0010 1000 100 - TRCDVCVR3 = 0x8964, // 10 001 0010 1100 100 - TRCDVCVR4 = 0x8905, // 10 001 0010 0000 101 - TRCDVCVR5 = 0x8925, // 10 001 0010 0100 101 - TRCDVCVR6 = 0x8945, // 10 001 0010 1000 101 - TRCDVCVR7 = 0x8965, // 10 001 0010 1100 101 - TRCDVCMR0 = 0x8906, // 10 001 0010 0000 110 - TRCDVCMR1 = 0x8926, // 10 001 0010 0100 110 - TRCDVCMR2 = 0x8946, // 10 001 0010 1000 110 - TRCDVCMR3 = 0x8966, // 10 001 0010 1100 110 - TRCDVCMR4 = 0x8907, // 10 001 0010 0000 111 - TRCDVCMR5 = 0x8927, // 10 001 0010 0100 111 - TRCDVCMR6 = 0x8947, // 10 001 0010 1000 111 - TRCDVCMR7 = 0x8967, // 10 001 0010 1100 111 - TRCCIDCVR0 = 0x8980, // 10 001 0011 0000 000 - TRCCIDCVR1 = 0x8990, // 10 001 0011 0010 000 - TRCCIDCVR2 = 0x89a0, // 10 001 0011 0100 000 - TRCCIDCVR3 = 0x89b0, // 10 001 0011 0110 000 - TRCCIDCVR4 = 0x89c0, // 10 001 0011 1000 000 - TRCCIDCVR5 = 0x89d0, // 10 001 0011 1010 000 - TRCCIDCVR6 = 0x89e0, // 10 001 0011 1100 000 - TRCCIDCVR7 = 0x89f0, // 10 001 0011 1110 000 - TRCVMIDCVR0 = 0x8981, // 10 001 0011 0000 001 - TRCVMIDCVR1 = 0x8991, // 10 001 0011 0010 001 - TRCVMIDCVR2 = 0x89a1, // 10 001 0011 0100 001 - TRCVMIDCVR3 = 0x89b1, // 10 001 0011 0110 001 - TRCVMIDCVR4 = 0x89c1, // 10 001 0011 1000 001 - TRCVMIDCVR5 = 0x89d1, // 10 001 0011 1010 001 - TRCVMIDCVR6 = 0x89e1, // 10 001 0011 1100 001 - TRCVMIDCVR7 = 0x89f1, // 10 001 0011 1110 001 - TRCCIDCCTLR0 = 0x8982, // 10 001 0011 0000 010 - TRCCIDCCTLR1 = 0x898a, // 10 001 0011 0001 010 - TRCVMIDCCTLR0 = 0x8992, // 10 001 0011 0010 010 - TRCVMIDCCTLR1 = 0x899a, // 10 001 0011 0011 010 - TRCITCTRL = 0x8b84, // 10 001 0111 0000 100 - TRCCLAIMSET = 0x8bc6, // 10 001 0111 1000 110 - TRCCLAIMCLR = 0x8bce, // 10 001 0111 1001 110 - - // GICv3 registers - ICC_BPR1_EL1 = 0xc663, // 11 000 1100 1100 011 - ICC_BPR0_EL1 = 0xc643, // 11 000 1100 1000 011 - ICC_PMR_EL1 = 0xc230, // 11 000 0100 0110 000 - ICC_CTLR_EL1 = 0xc664, // 11 000 1100 1100 100 - ICC_CTLR_EL3 = 0xf664, // 11 110 1100 1100 100 - ICC_SRE_EL1 = 0xc665, // 11 000 1100 1100 101 - ICC_SRE_EL2 = 0xe64d, // 11 100 1100 1001 101 - ICC_SRE_EL3 = 0xf665, // 11 110 1100 1100 101 - ICC_IGRPEN0_EL1 = 0xc666, // 11 000 1100 1100 110 - ICC_IGRPEN1_EL1 = 0xc667, // 11 000 1100 1100 111 - ICC_IGRPEN1_EL3 = 0xf667, // 11 110 1100 1100 111 - ICC_SEIEN_EL1 = 0xc668, // 11 000 1100 1101 000 - ICC_AP0R0_EL1 = 0xc644, // 11 000 1100 1000 100 - ICC_AP0R1_EL1 = 0xc645, // 11 000 1100 1000 101 - ICC_AP0R2_EL1 = 0xc646, // 11 000 1100 1000 110 - ICC_AP0R3_EL1 = 0xc647, // 11 000 1100 1000 111 - ICC_AP1R0_EL1 = 0xc648, // 11 000 1100 1001 000 - ICC_AP1R1_EL1 = 0xc649, // 11 000 1100 1001 001 - ICC_AP1R2_EL1 = 0xc64a, // 11 000 1100 1001 010 - ICC_AP1R3_EL1 = 0xc64b, // 11 000 1100 1001 011 - ICH_AP0R0_EL2 = 0xe640, // 11 100 1100 1000 000 - ICH_AP0R1_EL2 = 0xe641, // 11 100 1100 1000 001 - ICH_AP0R2_EL2 = 0xe642, // 11 100 1100 1000 010 - ICH_AP0R3_EL2 = 0xe643, // 11 100 1100 1000 011 - ICH_AP1R0_EL2 = 0xe648, // 11 100 1100 1001 000 - ICH_AP1R1_EL2 = 0xe649, // 11 100 1100 1001 001 - ICH_AP1R2_EL2 = 0xe64a, // 11 100 1100 1001 010 - ICH_AP1R3_EL2 = 0xe64b, // 11 100 1100 1001 011 - ICH_HCR_EL2 = 0xe658, // 11 100 1100 1011 000 - ICH_MISR_EL2 = 0xe65a, // 11 100 1100 1011 010 - ICH_VMCR_EL2 = 0xe65f, // 11 100 1100 1011 111 - ICH_VSEIR_EL2 = 0xe64c, // 11 100 1100 1001 100 - ICH_LR0_EL2 = 0xe660, // 11 100 1100 1100 000 - ICH_LR1_EL2 = 0xe661, // 11 100 1100 1100 001 - ICH_LR2_EL2 = 0xe662, // 11 100 1100 1100 010 - ICH_LR3_EL2 = 0xe663, // 11 100 1100 1100 011 - ICH_LR4_EL2 = 0xe664, // 11 100 1100 1100 100 - ICH_LR5_EL2 = 0xe665, // 11 100 1100 1100 101 - ICH_LR6_EL2 = 0xe666, // 11 100 1100 1100 110 - ICH_LR7_EL2 = 0xe667, // 11 100 1100 1100 111 - ICH_LR8_EL2 = 0xe668, // 11 100 1100 1101 000 - ICH_LR9_EL2 = 0xe669, // 11 100 1100 1101 001 - ICH_LR10_EL2 = 0xe66a, // 11 100 1100 1101 010 - ICH_LR11_EL2 = 0xe66b, // 11 100 1100 1101 011 - ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100 - ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 - ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 - ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111 - }; - - // Note that these do not inherit from NamedImmMapper. This class is - // sufficiently different in its behaviour that I don't believe it's worth - // burdening the common NamedImmMapper with abstractions only needed in - // this one case. - struct SysRegMapper { - static const NamedImmMapper::Mapping SysRegPairs[]; - - const NamedImmMapper::Mapping *InstPairs; - size_t NumInstPairs; - - SysRegMapper() {} - uint32_t fromString(StringRef Name, bool &Valid) const; - std::string toString(uint32_t Bits, bool &Valid) const; - }; - - struct MSRMapper : SysRegMapper { - static const NamedImmMapper::Mapping MSRPairs[]; - MSRMapper(); - }; - - struct MRSMapper : SysRegMapper { - static const NamedImmMapper::Mapping MRSPairs[]; - MRSMapper(); - }; - - uint32_t ParseGenericRegister(StringRef Name, bool &Valid); -} - -namespace A64TLBI { - enum TLBIValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 - IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101 - VMALLE1IS = 0x4418, // 01 000 1000 0011 000 - ALLE2IS = 0x6418, // 01 100 1000 0011 000 - ALLE3IS = 0x7418, // 01 110 1000 0011 000 - VAE1IS = 0x4419, // 01 000 1000 0011 001 - VAE2IS = 0x6419, // 01 100 1000 0011 001 - VAE3IS = 0x7419, // 01 110 1000 0011 001 - ASIDE1IS = 0x441a, // 01 000 1000 0011 010 - VAAE1IS = 0x441b, // 01 000 1000 0011 011 - ALLE1IS = 0x641c, // 01 100 1000 0011 100 - VALE1IS = 0x441d, // 01 000 1000 0011 101 - VALE2IS = 0x641d, // 01 100 1000 0011 101 - VALE3IS = 0x741d, // 01 110 1000 0011 101 - VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110 - VAALE1IS = 0x441f, // 01 000 1000 0011 111 - IPAS2E1 = 0x6421, // 01 100 1000 0100 001 - IPAS2LE1 = 0x6425, // 01 100 1000 0100 101 - VMALLE1 = 0x4438, // 01 000 1000 0111 000 - ALLE2 = 0x6438, // 01 100 1000 0111 000 - ALLE3 = 0x7438, // 01 110 1000 0111 000 - VAE1 = 0x4439, // 01 000 1000 0111 001 - VAE2 = 0x6439, // 01 100 1000 0111 001 - VAE3 = 0x7439, // 01 110 1000 0111 001 - ASIDE1 = 0x443a, // 01 000 1000 0111 010 - VAAE1 = 0x443b, // 01 000 1000 0111 011 - ALLE1 = 0x643c, // 01 100 1000 0111 100 - VALE1 = 0x443d, // 01 000 1000 0111 101 - VALE2 = 0x643d, // 01 100 1000 0111 101 - VALE3 = 0x743d, // 01 110 1000 0111 101 - VMALLS12E1 = 0x643e, // 01 100 1000 0111 110 - VAALE1 = 0x443f // 01 000 1000 0111 111 - }; - - struct TLBIMapper : NamedImmMapper { - const static Mapping TLBIPairs[]; - - TLBIMapper(); - }; - - static inline bool NeedsRegister(TLBIValues Val) { - switch (Val) { - case VMALLE1IS: - case ALLE2IS: - case ALLE3IS: - case ALLE1IS: - case VMALLS12E1IS: - case VMALLE1: - case ALLE2: - case ALLE3: - case ALLE1: - case VMALLS12E1: - return false; - default: - return true; - } - } -} - -namespace AArch64II { - - enum TOF { - //===--------------------------------------------------------------===// - // AArch64 Specific MachineOperand flags. - - MO_NO_FLAG, - - // MO_GOT - Represents a relocation referring to the GOT entry of a given - // symbol. Used in adrp. - MO_GOT, - - // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the - // GOT entry of a given symbol. Used in ldr only. - MO_GOT_LO12, - - // MO_DTPREL_* - Represents a relocation referring to the offset from a - // module's dynamic thread pointer. Used in the local-dynamic TLS access - // model. - MO_DTPREL_G1, - MO_DTPREL_G0_NC, - - // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry - // providing the offset of a variable from the thread-pointer. Used in - // initial-exec TLS model where this offset is assigned in the static thread - // block and thus known by the dynamic linker. - MO_GOTTPREL, - MO_GOTTPREL_LO12, - - // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing - // a TLS descriptor chosen by the dynamic linker. Used for the - // general-dynamic and local-dynamic TLS access models where very littls is - // known at link-time. - MO_TLSDESC, - MO_TLSDESC_LO12, - - // MO_TPREL_* - Represents a relocation referring to the offset of a - // variable from the thread pointer itself. Used in the local-exec TLS - // access model. - MO_TPREL_G1, - MO_TPREL_G0_NC, - - // MO_LO12 - On a symbol operand, this represents a relocation containing - // lower 12 bits of the address. Used in add/sub/ldr/str. - MO_LO12, - - // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using - // movz/movk instructions. - MO_ABS_G3, - MO_ABS_G2_NC, - MO_ABS_G1_NC, - MO_ABS_G0_NC - }; -} - -class APFloat; - -namespace A64Imms { - bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); - - inline bool isFPImm(const APFloat &Val) { - uint32_t Imm8; - return isFPImm(Val, Imm8); - } - - bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); - bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); - - bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - - // We sometimes want to know whether the immediate is representable with a - // MOVN but *not* with a MOVZ (because that would take priority). - bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - - uint64_t decodeNeonModImm(unsigned Val, unsigned OpCmode, unsigned &EltBits); - bool decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, - unsigned &ShiftOnesIn); - } - -} // end namespace llvm; - -#endif diff --git a/llvm/lib/Target/AArch64/Utils/CMakeLists.txt b/llvm/lib/Target/AArch64/Utils/CMakeLists.txt deleted file mode 100644 index 8ee03a7571b..00000000000 --- a/llvm/lib/Target/AArch64/Utils/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64Utils - AArch64BaseInfo.cpp - ) diff --git a/llvm/lib/Target/AArch64/Utils/LLVMBuild.txt b/llvm/lib/Target/AArch64/Utils/LLVMBuild.txt deleted file mode 100644 index 4acecc935e2..00000000000 --- a/llvm/lib/Target/AArch64/Utils/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64Utils -parent = AArch64 -required_libraries = Support -add_to_library_groups = AArch64 diff --git a/llvm/lib/Target/AArch64/Utils/Makefile b/llvm/lib/Target/AArch64/Utils/Makefile deleted file mode 100644 index 0f4a6452712..00000000000 --- a/llvm/lib/Target/AArch64/Utils/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64Utils - -# Hack: we need to include 'main' AArch64 target directory to grab private headers -#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common |