diff options
author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2016-11-28 13:34:08 +0000 |
---|---|---|
committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2016-11-28 13:34:08 +0000 |
commit | 524f276c744e15e17e2384f03933390d1957b2dc (patch) | |
tree | 2825ca78c72e71fe14eba2d4fba22746a8ae8363 /llvm/lib/Target | |
parent | 79724fc0ae09671b2e75e6b1345c1e3fe6e49d1d (diff) | |
download | bcm5719-llvm-524f276c744e15e17e2384f03933390d1957b2dc.tar.gz bcm5719-llvm-524f276c744e15e17e2384f03933390d1957b2dc.zip |
[SystemZ] Improve use of conditional instructions
This patch moves formation of LOC-type instructions from (late)
IfConversion to the early if-conversion pass, and in some cases
additionally creates them directly from select instructions
during DAG instruction selection.
To make early if-conversion work, the patch implements the
canInsertSelect / insertSelect callbacks. It also implements
the commuteInstructionImpl and FoldImmediate callbacks to
enable generation of the full range of LOC instructions.
Finally, the patch adds support for all instructions of the
load-store-on-condition-2 facility, which allows using LOC
instructions also for high registers.
Due to the use of the GRX32 register class to enable high registers,
we now also have to handle the cases where there are still no single
hardware instructions (conditional move from a low register to a high
register or vice versa). These are converted back to a branch sequence
after register allocation. Since the expandRAPseudos callback is not
allowed to create new basic blocks, this requires a simple new pass,
modelled after the ARM/AArch64 ExpandPseudos pass.
Overall, this patch causes significantly more LOC-type instructions
to be used, and results in a measurable performance improvement.
llvm-svn: 288028
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/SystemZ/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZ.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp | 153 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 24 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrFormats.td | 199 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 227 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 30 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 86 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZScheduleZ196.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZSubtarget.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp | 8 |
15 files changed, 616 insertions, 147 deletions
diff --git a/llvm/lib/Target/SystemZ/CMakeLists.txt b/llvm/lib/Target/SystemZ/CMakeLists.txt index 939a2dd05cd..138e14a25b7 100644 --- a/llvm/lib/Target/SystemZ/CMakeLists.txt +++ b/llvm/lib/Target/SystemZ/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(SystemZCodeGen SystemZCallingConv.cpp SystemZConstantPoolValue.cpp SystemZElimCompare.cpp + SystemZExpandPseudo.cpp SystemZFrameLowering.cpp SystemZHazardRecognizer.cpp SystemZISelDAGToDAG.cpp diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index c8ea9641fb6..9a8e508e411 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -175,6 +175,7 @@ static inline bool isImmHF(uint64_t Val) { FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); +FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); diff --git a/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp new file mode 100644 index 00000000000..92ce8089c24 --- /dev/null +++ b/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -0,0 +1,153 @@ +//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling and other late optimizations. This +// pass should be run after register allocation but before the post-regalloc +// scheduling pass. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" + +namespace llvm { + void initializeSystemZExpandPseudoPass(PassRegistry&); +} + +namespace { +class SystemZExpandPseudo : public MachineFunctionPass { +public: + static char ID; + SystemZExpandPseudo() : MachineFunctionPass(ID) { + initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); +}; +char SystemZExpandPseudo::ID = 0; +} + +INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo", + SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) + +/// \brief Returns an instance of the pseudo instruction expansion pass. +FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) { + return new SystemZExpandPseudo(); +} + +// MI is a load-register-on-condition pseudo instruction that could not be +// handled as a single hardware instruction. Replace it by a branch sequence. +bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineFunction &MF = *MBB.getParent(); + const BasicBlock *BB = MBB.getBasicBlock(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + + LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + // Splice MBB at MI, moving the rest of the block into RestMBB. + MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); + RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); + RestMBB->transferSuccessors(&MBB); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + RestMBB->addLiveIn(*I); + + // Create a new block MoveMBB to hold the move instruction. + MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); + MoveMBB->addLiveIn(SrcReg); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MoveMBB->addLiveIn(*I); + + // At the end of MBB, create a conditional branch to RestMBB if the + // condition is false, otherwise fall through to MoveMBB. + BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); + MBB.addSuccessor(RestMBB); + MBB.addSuccessor(MoveMBB); + + // In MoveMBB, emit an instruction to move SrcReg into DestReg, + // then fall through to RestMBB. + TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, + MI.getOperand(2).isKill()); + MoveMBB->addSuccessor(RestMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} + +/// \brief If MBBI references a pseudo instruction that should be expanded here, +/// do the expansion and return true. Otherwise return false. +bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + switch (MI.getOpcode()) { + case SystemZ::LOCRMux: + return expandLOCRMux(MBB, MBBI, NextMBBI); + default: + break; + } + return false; +} + +/// \brief Iterate over the instructions in basic block MBB and expand any +/// pseudo instructions. Return true if anything was modified. +bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f23a3e27ec3..920b6e430e8 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1296,8 +1296,14 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); // Prefer to put any load first, so that it can be matched as a - // conditional load. - if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) { + // conditional load. Likewise for constants in range for LOCHI. + if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) || + (Subtarget->hasLoadStoreOnCond2() && + Node->getValueType(0).isInteger() && + Op1.getOpcode() == ISD::Constant && + isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) && + !(Op0.getOpcode() == ISD::Constant && + isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) { SDValue CCValid = Node->getOperand(2); SDValue CCMask = Node->getOperand(3); uint64_t ConstCCValid = diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2ddee39754c..2081809def7 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -5224,7 +5224,8 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, - MachineBasicBlock *MBB) const { + MachineBasicBlock *MBB, + unsigned LOCROpcode) const { const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); @@ -5235,6 +5236,15 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, unsigned CCMask = MI.getOperand(4).getImm(); DebugLoc DL = MI.getDebugLoc(); + // Use LOCROpcode if possible. + if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) { + BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg) + .addReg(FalseReg).addReg(TrueReg) + .addImm(CCValid).addImm(CCMask); + MI.eraseFromParent(); + return MBB; + } + MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); @@ -6020,12 +6030,16 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { case SystemZ::Select32Mux: + return emitSelect(MI, MBB, + Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0); case SystemZ::Select32: - case SystemZ::SelectF32: + return emitSelect(MI, MBB, SystemZ::LOCR); case SystemZ::Select64: + return emitSelect(MI, MBB, SystemZ::LOCGR); + case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: - return emitSelect(MI, MBB); + return emitSelect(MI, MBB, 0); case SystemZ::CondStore8Mux: return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); @@ -6035,6 +6049,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); case SystemZ::CondStore16MuxInv: return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); + case SystemZ::CondStore32Mux: + return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); + case SystemZ::CondStore32MuxInv: + return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); case SystemZ::CondStore8: return emitCondStore(MI, MBB, SystemZ::STC, 0, false); case SystemZ::CondStore8Inv: diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 84c25975783..7a21a474c11 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -561,7 +561,8 @@ private: MachineBasicBlock *Target) const; // Implement EmitInstrWithCustomInserter for individual operation types. - MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB, + unsigned LOCROpcode) const; MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 94d9c72bbb9..7a3aa40fc9d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2201,77 +2201,6 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, let OpType = "reg"; } -// These instructions are generated by if conversion. The old value of R1 -// is added as an implicit use. -class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, - RegisterOperand cls2> - : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$M3), - mnemonic#"$M3\t$R1, $R2", []> { - let CCMaskLast = 1; -} - -class CondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, - Immediate imm> - : InstRIEg<opcode, (outs cls:$R1), (ins imm:$I2, cond4:$valid, cond4:$M3), - mnemonic#"$M3\t$R1, $I2", []> { - let CCMaskLast = 1; -} - -// Like CondUnaryRRF, but used for the raw assembly form. The condition-code -// mask is the third operand rather than being part of the mnemonic. -class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, - RegisterOperand cls2> - : InstRRFc<opcode, (outs cls1:$R1), - (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3), - mnemonic#"\t$R1, $R2, $M3", []> { - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; -} - -class AsmCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, - Immediate imm> - : InstRIEg<opcode, (outs cls:$R1), - (ins cls:$R1src, imm:$I2, imm32zx4:$M3), - mnemonic#"\t$R1, $I2, $M3", []> { - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; -} - -// Like CondUnaryRRF, but with a fixed CC mask. -class FixedCondUnaryRRF<CondVariant V, string mnemonic, bits<16> opcode, - RegisterOperand cls1, RegisterOperand cls2> - : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), - mnemonic#V.suffix#"\t$R1, $R2", []> { - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; - let isAsmParserOnly = V.alternate; - let M3 = V.ccmask; -} - -class FixedCondUnaryRIE<CondVariant V, string mnemonic, bits<16> opcode, - RegisterOperand cls, Immediate imm> - : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), - mnemonic#V.suffix#"\t$R1, $I2", []> { - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; - let isAsmParserOnly = V.alternate; - let M3 = V.ccmask; -} - -multiclass CondUnaryRRFPair<string mnemonic, bits<16> opcode, - RegisterOperand cls1, RegisterOperand cls2> { - let isCodeGenOnly = 1 in - def "" : CondUnaryRRF<mnemonic, opcode, cls1, cls2>; - def Asm : AsmCondUnaryRRF<mnemonic, opcode, cls1, cls2>; -} - -multiclass CondUnaryRIEPair<string mnemonic, bits<16> opcode, - RegisterOperand cls, Immediate imm> { - let isCodeGenOnly = 1 in - def "" : CondUnaryRIE<mnemonic, opcode, cls, imm>; - def Asm : AsmCondUnaryRIE<mnemonic, opcode, cls, imm>; -} - class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIa<opcode, (outs cls:$R1), (ins imm:$I2), @@ -2578,6 +2507,45 @@ class BinaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1, let M4 = 0; } +class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), + mnemonic#"$M3\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondBinaryRRF, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3), + mnemonic#"\t$R1, $R2, $M3", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondBinaryRRF, but with a fixed CC mask. +class FixedCondBinaryRRF<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#V.suffix#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let isAsmParserOnly = V.alternate; + let M3 = V.ccmask; +} + +multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRRF<mnemonic, opcode, cls1, cls2>; + def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>; +} + class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), @@ -2605,6 +2573,47 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, } } +class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + Immediate imm> + : InstRIEg<opcode, (outs cls:$R1), + (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), + mnemonic#"$M3\t$R1, $I2", + [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondBinaryRIE, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + Immediate imm> + : InstRIEg<opcode, (outs cls:$R1), + (ins cls:$R1src, imm:$I2, imm32zx4:$M3), + mnemonic#"\t$R1, $I2, $M3", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondBinaryRIE, but with a fixed CC mask. +class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls, Immediate imm> + : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#V.suffix#"\t$R1, $I2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let isAsmParserOnly = V.alternate; + let M3 = V.ccmask; +} + +multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode, + RegisterOperand cls, Immediate imm> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>; + def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>; +} + class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), @@ -3644,6 +3653,54 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls, let AccessBytes = bytes; } +// Like CondBinaryRRF, but expanded after RA depending on the choice of +// register. +class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondBinaryRIE, but expanded after RA depending on the choice of +// register. +class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm> + : Pseudo<(outs cls:$R1), + (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), + [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondUnaryRSY, but expanded after RA depending on the choice of +// register. +class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : Pseudo<(outs cls:$R1), + (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3), + [(set cls:$R1, + (z_select_ccmask (operator mode:$BD2), cls:$R1src, + cond4:$valid, cond4:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondStoreRSY, but expanded after RA depending on the choice of +// register. +class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : Pseudo<(outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), []> { + let mayStore = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + // Like StoreRXY, but expanded after RA depending on the choice of register. class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdxaddr20only> diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 7374083e6e6..8f1b5575902 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -149,6 +149,37 @@ void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, MI.setDesc(get(Opcode)); } +// MI is a load-on-condition pseudo instruction with a single register +// (source or destination) operand. Replace it with LowOpcode if the +// register is a low GR32 and HighOpcode if the register is a high GR32. +void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const { + unsigned Reg = MI.getOperand(0).getReg(); + unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode; + MI.setDesc(get(Opcode)); +} + +// MI is a load-register-on-condition pseudo instruction. Replace it with +// LowOpcode if source and destination are both low GR32s and HighOpcode if +// source and destination are both high GR32s. +void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const { + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(2).getReg(); + bool DestIsHigh = isHighReg(DestReg); + bool SrcIsHigh = isHighReg(SrcReg); + + if (!DestIsHigh && !SrcIsHigh) + MI.setDesc(get(LowOpcode)); + else if (DestIsHigh && SrcIsHigh) + MI.setDesc(get(HighOpcode)); + + // If we were unable to implement the pseudo with a single instruction, we + // need to convert it back into a branch sequence. This cannot be done here + // since the caller of expandPostRAPseudo does not handle changes to the CFG + // correctly. This change is defered to the SystemZExpandPseudo pass. +} + // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -222,6 +253,36 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); } + +MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { + auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { + if (NewMI) + return *MI.getParent()->getParent()->CloneMachineInstr(&MI); + return MI; + }; + + switch (MI.getOpcode()) { + case SystemZ::LOCRMux: + case SystemZ::LOCFHR: + case SystemZ::LOCR: + case SystemZ::LOCGR: { + auto &WorkingMI = cloneIfNew(MI); + // Invert condition. + unsigned CCValid = WorkingMI.getOperand(3).getImm(); + unsigned CCMask = WorkingMI.getOperand(4).getImm(); + WorkingMI.getOperand(4).setImm(CCMask ^ CCValid); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } + default: + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + } +} + + // If MI is a simple load or store for a frame object, return the register // it loads or stores and set FrameIndex to the index of the frame object. // Return 0 otherwise. @@ -525,30 +586,128 @@ bool SystemZInstrInfo::optimizeCompareInstr( removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); } -// If Opcode is a move that has a conditional variant, return that variant, -// otherwise return 0. -static unsigned getConditionalMove(unsigned Opcode) { - switch (Opcode) { - case SystemZ::LR: return SystemZ::LOCR; - case SystemZ::LGR: return SystemZ::LOCGR; - default: return 0; + +bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, + ArrayRef<MachineOperand> Pred, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, + int &FalseCycles) const { + // Not all subtargets have LOCR instructions. + if (!STI.hasLoadStoreOnCond()) + return false; + if (Pred.size() != 2) + return false; + + // Check register classes. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + if (!RC) + return false; + + // We have LOCR instructions for 32 and 64 bit general purpose registers. + if ((STI.hasLoadStoreOnCond2() && + SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) || + SystemZ::GR32BitRegClass.hasSubClassEq(RC) || + SystemZ::GR64BitRegClass.hasSubClassEq(RC)) { + CondCycles = 2; + TrueCycles = 2; + FalseCycles = 2; + return true; } + + // Can't do anything else. + return false; } -static unsigned getConditionalLoadImmediate(unsigned Opcode) { - switch (Opcode) { - case SystemZ::LHI: return SystemZ::LOCHI; - case SystemZ::LGHI: return SystemZ::LOCGHI; - default: return 0; +void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DstReg, + ArrayRef<MachineOperand> Pred, + unsigned TrueReg, + unsigned FalseReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + + assert(Pred.size() == 2 && "Invalid condition"); + unsigned CCValid = Pred[0].getImm(); + unsigned CCMask = Pred[1].getImm(); + + unsigned Opc; + if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) { + if (STI.hasLoadStoreOnCond2()) + Opc = SystemZ::LOCRMux; + else { + Opc = SystemZ::LOCR; + MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass); + } + } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) + Opc = SystemZ::LOCGR; + else + llvm_unreachable("Invalid register class"); + + BuildMI(MBB, I, DL, get(Opc), DstReg) + .addReg(FalseReg).addReg(TrueReg) + .addImm(CCValid).addImm(CCMask); +} + +bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg, + MachineRegisterInfo *MRI) const { + unsigned DefOpc = DefMI.getOpcode(); + if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI && + DefOpc != SystemZ::LGHI) + return false; + if (DefMI.getOperand(0).getReg() != Reg) + return false; + int32_t ImmVal = (int32_t)DefMI.getOperand(1).getImm(); + + unsigned UseOpc = UseMI.getOpcode(); + unsigned NewUseOpc; + unsigned UseIdx; + int CommuteIdx = -1; + switch (UseOpc) { + case SystemZ::LOCRMux: + if (!STI.hasLoadStoreOnCond2()) + return false; + NewUseOpc = SystemZ::LOCHIMux; + if (UseMI.getOperand(2).getReg() == Reg) + UseIdx = 2; + else if (UseMI.getOperand(1).getReg() == Reg) + UseIdx = 2, CommuteIdx = 1; + else + return false; + break; + case SystemZ::LOCGR: + if (!STI.hasLoadStoreOnCond2()) + return false; + NewUseOpc = SystemZ::LOCGHI; + if (UseMI.getOperand(2).getReg() == Reg) + UseIdx = 2; + else if (UseMI.getOperand(1).getReg() == Reg) + UseIdx = 2, CommuteIdx = 1; + else + return false; + break; + default: + return false; } + + if (CommuteIdx != -1) + if (!commuteInstruction(UseMI, false, CommuteIdx, UseIdx)) + return false; + + bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + UseMI.setDesc(get(NewUseOpc)); + UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal); + if (DeleteDef) + DefMI.eraseFromParent(); + + return true; } bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); - if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode)) - return true; - if (STI.hasLoadStoreOnCond2() && getConditionalLoadImmediate(Opcode)) - return true; if (Opcode == SystemZ::Return || Opcode == SystemZ::Trap || Opcode == SystemZ::CallJG || @@ -600,26 +759,6 @@ bool SystemZInstrInfo::PredicateInstruction( unsigned CCMask = Pred[1].getImm(); assert(CCMask > 0 && CCMask < 15 && "Invalid predicate"); unsigned Opcode = MI.getOpcode(); - if (STI.hasLoadStoreOnCond()) { - if (unsigned CondOpcode = getConditionalMove(Opcode)) { - MI.setDesc(get(CondOpcode)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(CCValid) - .addImm(CCMask) - .addReg(SystemZ::CC, RegState::Implicit); - return true; - } - } - if (STI.hasLoadStoreOnCond2()) { - if (unsigned CondOpcode = getConditionalLoadImmediate(Opcode)) { - MI.setDesc(get(CondOpcode)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(CCValid) - .addImm(CCMask) - .addReg(SystemZ::CC, RegState::Implicit); - return true; - } - } if (Opcode == SystemZ::Trap) { MI.setDesc(get(SystemZ::CondTrap)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) @@ -1090,6 +1229,18 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH); return true; + case SystemZ::LOCMux: + expandLOCPseudo(MI, SystemZ::LOC, SystemZ::LOCFH); + return true; + + case SystemZ::LOCHIMux: + expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); + return true; + + case SystemZ::LOCRMux: + expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); + return true; + case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1102,6 +1253,10 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH); return true; + case SystemZ::STOCMux: + expandLOCPseudo(MI, SystemZ::STOC, SystemZ::STOCFH); + return true; + case SystemZ::LHIMux: expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true); return true; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index e403963ff63..aadeb6b9f1d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -142,6 +142,10 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { unsigned LowOpcodeK, unsigned HighOpcode) const; void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; + void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const; + void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -149,7 +153,23 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; virtual void anchor(); - + +protected: + /// Commutes the operands in the given instruction by changing the operands + /// order and/or changing the instruction's opcode and/or the immediate value + /// operand. + /// + /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands + /// to be commuted. + /// + /// Do not call this method for a non-commutable instruction or + /// non-commutable operands. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned CommuteOpIdx1, + unsigned CommuteOpIdx2) const override; + public: explicit SystemZInstrInfo(SystemZSubtarget &STI); @@ -175,6 +195,14 @@ public: bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const override; + bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond, + unsigned, unsigned, int&, int&, int&) const override; + void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const DebugLoc &DL, unsigned DstReg, + ArrayRef<MachineOperand> Cond, unsigned TrueReg, + unsigned FalseReg) const override; + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, + MachineRegisterInfo *MRI) const override; bool isPredicable(MachineInstr &MI) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 105eb87884e..28c8557c78d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -305,14 +305,17 @@ def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>; def Select32 : SelectWrapper<GR32>; def Select64 : SelectWrapper<GR64>; -// We don't define 32-bit Mux stores because the low-only STOC should -// always be used if possible. +// We don't define 32-bit Mux stores if we don't have STOCFH, because the +// low-only STOC should then always be used if possible. defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8, nonvolatile_anyextloadi8, bdxaddr20only>, Requires<[FeatureHighWord]>; defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16, nonvolatile_anyextloadi16, bdxaddr20only>, Requires<[FeatureHighWord]>; +defm CondStore32Mux : CondStores<GRX32, nonvolatile_store, + nonvolatile_load, bdxaddr20only>, + Requires<[FeatureLoadStoreOnCond2]>; defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8, nonvolatile_anyextloadi8, bdxaddr20only>; defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16, @@ -446,24 +449,57 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in //===----------------------------------------------------------------------===// let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { - // Load immediate on condition. Created by if-conversion. - defm LOCHI : CondUnaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; - defm LOCGHI : CondUnaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + // Load immediate on condition. Matched via DAG pattern and created + // by the PeepholeOptimizer via FoldImmediate. + let hasSideEffects = 0 in { + // Expands to LOCHI or LOCHHI, depending on the choice of register. + def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>; + defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; + defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; + defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + } + + // Move register on condition. Expanded from Select* pseudos and + // created by early if-conversion. + let hasSideEffects = 0, isCommutable = 1 in { + // Expands to LOCR or LOCFHR or a branch-and-move sequence, + // depending on the choice of registers. + def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>; + defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>; + } + + // Load on condition. Matched via DAG pattern. + // Expands to LOC or LOCFH, depending on the choice of register. + def LOCMux : CondUnaryRSYPseudo<nonvolatile_load, GRX32, 4>; + defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>; + + // Store on condition. Expanded from CondStore* pseudos. + // Expands to STOC or STOCFH, depending on the choice of register. + def STOCMux : CondStoreRSYPseudo<GRX32, 4>; + defm STOCFH : CondStoreRSYPair<"stocfh", 0xEBE1, GRH32, 4>; // Define AsmParser extended mnemonics for each general condition-code mask. foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { - def LOCHIAsm#V : FixedCondUnaryRIE<CV<V>, "lochi", 0xEC42, GR32, - imm32sx16>; - def LOCGHIAsm#V : FixedCondUnaryRIE<CV<V>, "locghi", 0xEC46, GR64, - imm64sx16>; + def LOCHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochi", 0xEC42, GR32, + imm32sx16>; + def LOCGHIAsm#V : FixedCondBinaryRIE<CV<V>, "locghi", 0xEC46, GR64, + imm64sx16>; + def LOCHHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochhi", 0xEC4E, GRH32, + imm32sx16>; + def LOCFHRAsm#V : FixedCondBinaryRRF<CV<V>, "locfhr", 0xB9E0, GRH32, GRH32>; + def LOCFHAsm#V : FixedCondUnaryRSY<CV<V>, "locfh", 0xEBE0, GRH32, 4>; + def STOCFHAsm#V : FixedCondStoreRSY<CV<V>, "stocfh", 0xEBE1, GRH32, 4>; } } let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { - // Move register on condition. Created by if-conversion. - defm LOCR : CondUnaryRRFPair<"locr", 0xB9F2, GR32, GR32>; - defm LOCGR : CondUnaryRRFPair<"locgr", 0xB9E2, GR64, GR64>; + // Move register on condition. Expanded from Select* pseudos and + // created by early if-conversion. + let hasSideEffects = 0, isCommutable = 1 in { + defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>; + defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>; + } // Load on condition. Matched via DAG pattern. defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>; @@ -476,8 +512,8 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { // Define AsmParser extended mnemonics for each general condition-code mask. foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { - def LOCRAsm#V : FixedCondUnaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>; - def LOCGRAsm#V : FixedCondUnaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>; + def LOCRAsm#V : FixedCondBinaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>; + def LOCGRAsm#V : FixedCondBinaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>; def LOCAsm#V : FixedCondUnaryRSY<CV<V>, "loc", 0xEBF2, GR32, 4>; def LOCGAsm#V : FixedCondUnaryRSY<CV<V>, "locg", 0xEBE2, GR64, 8>; def STOCAsm#V : FixedCondStoreRSY<CV<V>, "stoc", 0xEBF3, GR32, 4>; @@ -1108,17 +1144,19 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; // Division and remainder //===----------------------------------------------------------------------===// -// Division and remainder, from registers. -def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; -def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; -def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; -def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; +let hasSideEffects = 1 in { // Do not speculatively execute. + // Division and remainder, from registers. + def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; + def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; + def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; + def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; -// Division and remainder, from memory. -def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; -def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; -def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; -def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; + // Division and remainder, from memory. + def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; + def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; + def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; + def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; +} //===----------------------------------------------------------------------===// // Shifts diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index e3a45b85024..0030ed1f950 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -23,7 +23,7 @@ def Z13Model : SchedMachineModel { let PostRAScheduler = 1; // Extra cycles for a mispredicted branch. - let MispredictPenalty = 8; + let MispredictPenalty = 20; } let SchedModel = Z13Model in { @@ -161,6 +161,7 @@ def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>; def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>; @@ -214,10 +215,11 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; // Conditional move instructions //===----------------------------------------------------------------------===// -def : InstRW<[FXa, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>; -def : InstRW<[FXa, Lat2], (instregex "LOC(G)?HI(Asm.*)?$")>; -def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>; -def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>; +def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; //===----------------------------------------------------------------------===// // Sign extensions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 4f28c519336..4d4a912b5d1 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -23,7 +23,7 @@ def Z196Model : SchedMachineModel { let PostRAScheduler = 1; // Extra cycles for a mispredicted branch. - let MispredictPenalty = 8; + let MispredictPenalty = 16; } let SchedModel = Z196Model in { @@ -187,7 +187,6 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; //===----------------------------------------------------------------------===// def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?R(Asm.*)?$")>; -def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?HI(Asm.*)?$")>; def : InstRW<[FXU, LSU, Lat6, EndGroup], (instregex "LOC(G)?(Asm.*)?$")>; def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "STOC(G)?(Asm.*)?$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 6380f16b889..69c70bbe28f 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -23,7 +23,7 @@ def ZEC12Model : SchedMachineModel { let PostRAScheduler = 1; // Extra cycles for a mispredicted branch. - let MispredictPenalty = 8; + let MispredictPenalty = 16; } let SchedModel = ZEC12Model in { @@ -189,7 +189,6 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; //===----------------------------------------------------------------------===// def : InstRW<[FXU, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>; -def : InstRW<[FXU, Lat2], (instregex "LOC(G)?HI(Asm.*)?$")>; def : InstRW<[FXU, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h index a100eba5b83..9218a7831d9 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -78,6 +78,9 @@ public: // This is important for reducing register pressure in vector code. bool useAA() const override { return true; } + // Always enable the early if-conversion pass. + bool enableEarlyIfConversion() const override { return true; } + // Automatically generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index b250774a3fe..33fdb8f9082 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -122,6 +122,7 @@ public: void addIRPasses() override; bool addInstSelector() override; + bool addILPOpts() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -143,7 +144,14 @@ bool SystemZPassConfig::addInstSelector() { return false; } +bool SystemZPassConfig::addILPOpts() { + addPass(&EarlyIfConverterID); + return true; +} + void SystemZPassConfig::addPreSched2() { + addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); + if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); } |