diff options
26 files changed, 1803 insertions, 171 deletions
diff --git a/llvm/lib/Target/SystemZ/CMakeLists.txt b/llvm/lib/Target/SystemZ/CMakeLists.txt index 939a2dd05cd..138e14a25b7 100644 --- a/llvm/lib/Target/SystemZ/CMakeLists.txt +++ b/llvm/lib/Target/SystemZ/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(SystemZCodeGen SystemZCallingConv.cpp SystemZConstantPoolValue.cpp SystemZElimCompare.cpp + SystemZExpandPseudo.cpp SystemZFrameLowering.cpp SystemZHazardRecognizer.cpp SystemZISelDAGToDAG.cpp diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index c8ea9641fb6..9a8e508e411 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -175,6 +175,7 @@ static inline bool isImmHF(uint64_t Val) { FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); +FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); diff --git a/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp new file mode 100644 index 00000000000..92ce8089c24 --- /dev/null +++ b/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -0,0 +1,153 @@ +//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling and other late optimizations. This +// pass should be run after register allocation but before the post-regalloc +// scheduling pass. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" + +namespace llvm { + void initializeSystemZExpandPseudoPass(PassRegistry&); +} + +namespace { +class SystemZExpandPseudo : public MachineFunctionPass { +public: + static char ID; + SystemZExpandPseudo() : MachineFunctionPass(ID) { + initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); +}; +char SystemZExpandPseudo::ID = 0; +} + +INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo", + SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) + +/// \brief Returns an instance of the pseudo instruction expansion pass. +FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) { + return new SystemZExpandPseudo(); +} + +// MI is a load-register-on-condition pseudo instruction that could not be +// handled as a single hardware instruction. Replace it by a branch sequence. +bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineFunction &MF = *MBB.getParent(); + const BasicBlock *BB = MBB.getBasicBlock(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + + LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + // Splice MBB at MI, moving the rest of the block into RestMBB. + MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); + RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); + RestMBB->transferSuccessors(&MBB); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + RestMBB->addLiveIn(*I); + + // Create a new block MoveMBB to hold the move instruction. + MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); + MoveMBB->addLiveIn(SrcReg); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MoveMBB->addLiveIn(*I); + + // At the end of MBB, create a conditional branch to RestMBB if the + // condition is false, otherwise fall through to MoveMBB. + BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); + MBB.addSuccessor(RestMBB); + MBB.addSuccessor(MoveMBB); + + // In MoveMBB, emit an instruction to move SrcReg into DestReg, + // then fall through to RestMBB. + TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, + MI.getOperand(2).isKill()); + MoveMBB->addSuccessor(RestMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} + +/// \brief If MBBI references a pseudo instruction that should be expanded here, +/// do the expansion and return true. Otherwise return false. +bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + switch (MI.getOpcode()) { + case SystemZ::LOCRMux: + return expandLOCRMux(MBB, MBBI, NextMBBI); + default: + break; + } + return false; +} + +/// \brief Iterate over the instructions in basic block MBB and expand any +/// pseudo instructions. Return true if anything was modified. +bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f23a3e27ec3..920b6e430e8 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1296,8 +1296,14 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); // Prefer to put any load first, so that it can be matched as a - // conditional load. - if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) { + // conditional load. Likewise for constants in range for LOCHI. + if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) || + (Subtarget->hasLoadStoreOnCond2() && + Node->getValueType(0).isInteger() && + Op1.getOpcode() == ISD::Constant && + isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) && + !(Op0.getOpcode() == ISD::Constant && + isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) { SDValue CCValid = Node->getOperand(2); SDValue CCMask = Node->getOperand(3); uint64_t ConstCCValid = diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2ddee39754c..2081809def7 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -5224,7 +5224,8 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, - MachineBasicBlock *MBB) const { + MachineBasicBlock *MBB, + unsigned LOCROpcode) const { const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); @@ -5235,6 +5236,15 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, unsigned CCMask = MI.getOperand(4).getImm(); DebugLoc DL = MI.getDebugLoc(); + // Use LOCROpcode if possible. + if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) { + BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg) + .addReg(FalseReg).addReg(TrueReg) + .addImm(CCValid).addImm(CCMask); + MI.eraseFromParent(); + return MBB; + } + MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); @@ -6020,12 +6030,16 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { case SystemZ::Select32Mux: + return emitSelect(MI, MBB, + Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0); case SystemZ::Select32: - case SystemZ::SelectF32: + return emitSelect(MI, MBB, SystemZ::LOCR); case SystemZ::Select64: + return emitSelect(MI, MBB, SystemZ::LOCGR); + case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: - return emitSelect(MI, MBB); + return emitSelect(MI, MBB, 0); case SystemZ::CondStore8Mux: return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); @@ -6035,6 +6049,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); case SystemZ::CondStore16MuxInv: return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); + case SystemZ::CondStore32Mux: + return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); + case SystemZ::CondStore32MuxInv: + return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); case SystemZ::CondStore8: return emitCondStore(MI, MBB, SystemZ::STC, 0, false); case SystemZ::CondStore8Inv: diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 84c25975783..7a21a474c11 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -561,7 +561,8 @@ private: MachineBasicBlock *Target) const; // Implement EmitInstrWithCustomInserter for individual operation types. - MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB, + unsigned LOCROpcode) const; MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 94d9c72bbb9..7a3aa40fc9d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2201,77 +2201,6 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, let OpType = "reg"; } -// These instructions are generated by if conversion. The old value of R1 -// is added as an implicit use. -class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, - RegisterOperand cls2> - : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$M3), - mnemonic#"$M3\t$R1, $R2", []> { - let CCMaskLast = 1; -} - -class CondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, - Immediate imm> - : InstRIEg<opcode, (outs cls:$R1), (ins imm:$I2, cond4:$valid, cond4:$M3), - mnemonic#"$M3\t$R1, $I2", []> { - let CCMaskLast = 1; -} - -// Like CondUnaryRRF, but used for the raw assembly form. The condition-code -// mask is the third operand rather than being part of the mnemonic. -class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, - RegisterOperand cls2> - : InstRRFc<opcode, (outs cls1:$R1), - (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3), - mnemonic#"\t$R1, $R2, $M3", []> { - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; -} - -class AsmCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, - Immediate imm> - : InstRIEg<opcode, (outs cls:$R1), - (ins cls:$R1src, imm:$I2, imm32zx4:$M3), - mnemonic#"\t$R1, $I2, $M3", []> { - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; -} - -// Like CondUnaryRRF, but with a fixed CC mask. -class FixedCondUnaryRRF<CondVariant V, string mnemonic, bits<16> opcode, - RegisterOperand cls1, RegisterOperand cls2> - : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), - mnemonic#V.suffix#"\t$R1, $R2", []> { - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; - let isAsmParserOnly = V.alternate; - let M3 = V.ccmask; -} - -class FixedCondUnaryRIE<CondVariant V, string mnemonic, bits<16> opcode, - RegisterOperand cls, Immediate imm> - : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), - mnemonic#V.suffix#"\t$R1, $I2", []> { - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; - let isAsmParserOnly = V.alternate; - let M3 = V.ccmask; -} - -multiclass CondUnaryRRFPair<string mnemonic, bits<16> opcode, - RegisterOperand cls1, RegisterOperand cls2> { - let isCodeGenOnly = 1 in - def "" : CondUnaryRRF<mnemonic, opcode, cls1, cls2>; - def Asm : AsmCondUnaryRRF<mnemonic, opcode, cls1, cls2>; -} - -multiclass CondUnaryRIEPair<string mnemonic, bits<16> opcode, - RegisterOperand cls, Immediate imm> { - let isCodeGenOnly = 1 in - def "" : CondUnaryRIE<mnemonic, opcode, cls, imm>; - def Asm : AsmCondUnaryRIE<mnemonic, opcode, cls, imm>; -} - class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIa<opcode, (outs cls:$R1), (ins imm:$I2), @@ -2578,6 +2507,45 @@ class BinaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1, let M4 = 0; } +class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), + mnemonic#"$M3\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondBinaryRRF, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3), + mnemonic#"\t$R1, $R2, $M3", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondBinaryRRF, but with a fixed CC mask. +class FixedCondBinaryRRF<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#V.suffix#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let isAsmParserOnly = V.alternate; + let M3 = V.ccmask; +} + +multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRRF<mnemonic, opcode, cls1, cls2>; + def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>; +} + class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), @@ -2605,6 +2573,47 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, } } +class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + Immediate imm> + : InstRIEg<opcode, (outs cls:$R1), + (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), + mnemonic#"$M3\t$R1, $I2", + [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondBinaryRIE, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, + Immediate imm> + : InstRIEg<opcode, (outs cls:$R1), + (ins cls:$R1src, imm:$I2, imm32zx4:$M3), + mnemonic#"\t$R1, $I2, $M3", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondBinaryRIE, but with a fixed CC mask. +class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls, Immediate imm> + : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + mnemonic#V.suffix#"\t$R1, $I2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let isAsmParserOnly = V.alternate; + let M3 = V.ccmask; +} + +multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode, + RegisterOperand cls, Immediate imm> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>; + def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>; +} + class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), @@ -3644,6 +3653,54 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls, let AccessBytes = bytes; } +// Like CondBinaryRRF, but expanded after RA depending on the choice of +// register. +class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2> + : Pseudo<(outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondBinaryRIE, but expanded after RA depending on the choice of +// register. +class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm> + : Pseudo<(outs cls:$R1), + (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), + [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, + cond4:$valid, cond4:$M3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let CCMaskLast = 1; +} + +// Like CondUnaryRSY, but expanded after RA depending on the choice of +// register. +class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : Pseudo<(outs cls:$R1), + (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3), + [(set cls:$R1, + (z_select_ccmask (operator mode:$BD2), cls:$R1src, + cond4:$valid, cond4:$R3))]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondStoreRSY, but expanded after RA depending on the choice of +// register. +class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : Pseudo<(outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), []> { + let mayStore = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + // Like StoreRXY, but expanded after RA depending on the choice of register. class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdxaddr20only> diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 7374083e6e6..8f1b5575902 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -149,6 +149,37 @@ void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, MI.setDesc(get(Opcode)); } +// MI is a load-on-condition pseudo instruction with a single register +// (source or destination) operand. Replace it with LowOpcode if the +// register is a low GR32 and HighOpcode if the register is a high GR32. +void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const { + unsigned Reg = MI.getOperand(0).getReg(); + unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode; + MI.setDesc(get(Opcode)); +} + +// MI is a load-register-on-condition pseudo instruction. Replace it with +// LowOpcode if source and destination are both low GR32s and HighOpcode if +// source and destination are both high GR32s. +void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const { + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(2).getReg(); + bool DestIsHigh = isHighReg(DestReg); + bool SrcIsHigh = isHighReg(SrcReg); + + if (!DestIsHigh && !SrcIsHigh) + MI.setDesc(get(LowOpcode)); + else if (DestIsHigh && SrcIsHigh) + MI.setDesc(get(HighOpcode)); + + // If we were unable to implement the pseudo with a single instruction, we + // need to convert it back into a branch sequence. This cannot be done here + // since the caller of expandPostRAPseudo does not handle changes to the CFG + // correctly. This change is defered to the SystemZExpandPseudo pass. +} + // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -222,6 +253,36 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); } + +MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { + auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { + if (NewMI) + return *MI.getParent()->getParent()->CloneMachineInstr(&MI); + return MI; + }; + + switch (MI.getOpcode()) { + case SystemZ::LOCRMux: + case SystemZ::LOCFHR: + case SystemZ::LOCR: + case SystemZ::LOCGR: { + auto &WorkingMI = cloneIfNew(MI); + // Invert condition. + unsigned CCValid = WorkingMI.getOperand(3).getImm(); + unsigned CCMask = WorkingMI.getOperand(4).getImm(); + WorkingMI.getOperand(4).setImm(CCMask ^ CCValid); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } + default: + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + } +} + + // If MI is a simple load or store for a frame object, return the register // it loads or stores and set FrameIndex to the index of the frame object. // Return 0 otherwise. @@ -525,30 +586,128 @@ bool SystemZInstrInfo::optimizeCompareInstr( removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); } -// If Opcode is a move that has a conditional variant, return that variant, -// otherwise return 0. -static unsigned getConditionalMove(unsigned Opcode) { - switch (Opcode) { - case SystemZ::LR: return SystemZ::LOCR; - case SystemZ::LGR: return SystemZ::LOCGR; - default: return 0; + +bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, + ArrayRef<MachineOperand> Pred, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, + int &FalseCycles) const { + // Not all subtargets have LOCR instructions. + if (!STI.hasLoadStoreOnCond()) + return false; + if (Pred.size() != 2) + return false; + + // Check register classes. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + if (!RC) + return false; + + // We have LOCR instructions for 32 and 64 bit general purpose registers. + if ((STI.hasLoadStoreOnCond2() && + SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) || + SystemZ::GR32BitRegClass.hasSubClassEq(RC) || + SystemZ::GR64BitRegClass.hasSubClassEq(RC)) { + CondCycles = 2; + TrueCycles = 2; + FalseCycles = 2; + return true; } + + // Can't do anything else. + return false; } -static unsigned getConditionalLoadImmediate(unsigned Opcode) { - switch (Opcode) { - case SystemZ::LHI: return SystemZ::LOCHI; - case SystemZ::LGHI: return SystemZ::LOCGHI; - default: return 0; +void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DstReg, + ArrayRef<MachineOperand> Pred, + unsigned TrueReg, + unsigned FalseReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + + assert(Pred.size() == 2 && "Invalid condition"); + unsigned CCValid = Pred[0].getImm(); + unsigned CCMask = Pred[1].getImm(); + + unsigned Opc; + if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) { + if (STI.hasLoadStoreOnCond2()) + Opc = SystemZ::LOCRMux; + else { + Opc = SystemZ::LOCR; + MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass); + } + } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) + Opc = SystemZ::LOCGR; + else + llvm_unreachable("Invalid register class"); + + BuildMI(MBB, I, DL, get(Opc), DstReg) + .addReg(FalseReg).addReg(TrueReg) + .addImm(CCValid).addImm(CCMask); +} + +bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg, + MachineRegisterInfo *MRI) const { + unsigned DefOpc = DefMI.getOpcode(); + if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI && + DefOpc != SystemZ::LGHI) + return false; + if (DefMI.getOperand(0).getReg() != Reg) + return false; + int32_t ImmVal = (int32_t)DefMI.getOperand(1).getImm(); + + unsigned UseOpc = UseMI.getOpcode(); + unsigned NewUseOpc; + unsigned UseIdx; + int CommuteIdx = -1; + switch (UseOpc) { + case SystemZ::LOCRMux: + if (!STI.hasLoadStoreOnCond2()) + return false; + NewUseOpc = SystemZ::LOCHIMux; + if (UseMI.getOperand(2).getReg() == Reg) + UseIdx = 2; + else if (UseMI.getOperand(1).getReg() == Reg) + UseIdx = 2, CommuteIdx = 1; + else + return false; + break; + case SystemZ::LOCGR: + if (!STI.hasLoadStoreOnCond2()) + return false; + NewUseOpc = SystemZ::LOCGHI; + if (UseMI.getOperand(2).getReg() == Reg) + UseIdx = 2; + else if (UseMI.getOperand(1).getReg() == Reg) + UseIdx = 2, CommuteIdx = 1; + else + return false; + break; + default: + return false; } + + if (CommuteIdx != -1) + if (!commuteInstruction(UseMI, false, CommuteIdx, UseIdx)) + return false; + + bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + UseMI.setDesc(get(NewUseOpc)); + UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal); + if (DeleteDef) + DefMI.eraseFromParent(); + + return true; } bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); - if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode)) - return true; - if (STI.hasLoadStoreOnCond2() && getConditionalLoadImmediate(Opcode)) - return true; if (Opcode == SystemZ::Return || Opcode == SystemZ::Trap || Opcode == SystemZ::CallJG || @@ -600,26 +759,6 @@ bool SystemZInstrInfo::PredicateInstruction( unsigned CCMask = Pred[1].getImm(); assert(CCMask > 0 && CCMask < 15 && "Invalid predicate"); unsigned Opcode = MI.getOpcode(); - if (STI.hasLoadStoreOnCond()) { - if (unsigned CondOpcode = getConditionalMove(Opcode)) { - MI.setDesc(get(CondOpcode)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(CCValid) - .addImm(CCMask) - .addReg(SystemZ::CC, RegState::Implicit); - return true; - } - } - if (STI.hasLoadStoreOnCond2()) { - if (unsigned CondOpcode = getConditionalLoadImmediate(Opcode)) { - MI.setDesc(get(CondOpcode)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(CCValid) - .addImm(CCMask) - .addReg(SystemZ::CC, RegState::Implicit); - return true; - } - } if (Opcode == SystemZ::Trap) { MI.setDesc(get(SystemZ::CondTrap)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) @@ -1090,6 +1229,18 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH); return true; + case SystemZ::LOCMux: + expandLOCPseudo(MI, SystemZ::LOC, SystemZ::LOCFH); + return true; + + case SystemZ::LOCHIMux: + expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); + return true; + + case SystemZ::LOCRMux: + expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); + return true; + case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1102,6 +1253,10 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH); return true; + case SystemZ::STOCMux: + expandLOCPseudo(MI, SystemZ::STOC, SystemZ::STOCFH); + return true; + case SystemZ::LHIMux: expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true); return true; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index e403963ff63..aadeb6b9f1d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -142,6 +142,10 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { unsigned LowOpcodeK, unsigned HighOpcode) const; void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; + void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const; + void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -149,7 +153,23 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; virtual void anchor(); - + +protected: + /// Commutes the operands in the given instruction by changing the operands + /// order and/or changing the instruction's opcode and/or the immediate value + /// operand. + /// + /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands + /// to be commuted. + /// + /// Do not call this method for a non-commutable instruction or + /// non-commutable operands. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned CommuteOpIdx1, + unsigned CommuteOpIdx2) const override; + public: explicit SystemZInstrInfo(SystemZSubtarget &STI); @@ -175,6 +195,14 @@ public: bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const override; + bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond, + unsigned, unsigned, int&, int&, int&) const override; + void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const DebugLoc &DL, unsigned DstReg, + ArrayRef<MachineOperand> Cond, unsigned TrueReg, + unsigned FalseReg) const override; + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, + MachineRegisterInfo *MRI) const override; bool isPredicable(MachineInstr &MI) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 105eb87884e..28c8557c78d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -305,14 +305,17 @@ def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>; def Select32 : SelectWrapper<GR32>; def Select64 : SelectWrapper<GR64>; -// We don't define 32-bit Mux stores because the low-only STOC should -// always be used if possible. +// We don't define 32-bit Mux stores if we don't have STOCFH, because the +// low-only STOC should then always be used if possible. defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8, nonvolatile_anyextloadi8, bdxaddr20only>, Requires<[FeatureHighWord]>; defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16, nonvolatile_anyextloadi16, bdxaddr20only>, Requires<[FeatureHighWord]>; +defm CondStore32Mux : CondStores<GRX32, nonvolatile_store, + nonvolatile_load, bdxaddr20only>, + Requires<[FeatureLoadStoreOnCond2]>; defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8, nonvolatile_anyextloadi8, bdxaddr20only>; defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16, @@ -446,24 +449,57 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in //===----------------------------------------------------------------------===// let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { - // Load immediate on condition. Created by if-conversion. - defm LOCHI : CondUnaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; - defm LOCGHI : CondUnaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + // Load immediate on condition. Matched via DAG pattern and created + // by the PeepholeOptimizer via FoldImmediate. + let hasSideEffects = 0 in { + // Expands to LOCHI or LOCHHI, depending on the choice of register. + def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>; + defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; + defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; + defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + } + + // Move register on condition. Expanded from Select* pseudos and + // created by early if-conversion. + let hasSideEffects = 0, isCommutable = 1 in { + // Expands to LOCR or LOCFHR or a branch-and-move sequence, + // depending on the choice of registers. + def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>; + defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>; + } + + // Load on condition. Matched via DAG pattern. + // Expands to LOC or LOCFH, depending on the choice of register. + def LOCMux : CondUnaryRSYPseudo<nonvolatile_load, GRX32, 4>; + defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>; + + // Store on condition. Expanded from CondStore* pseudos. + // Expands to STOC or STOCFH, depending on the choice of register. + def STOCMux : CondStoreRSYPseudo<GRX32, 4>; + defm STOCFH : CondStoreRSYPair<"stocfh", 0xEBE1, GRH32, 4>; // Define AsmParser extended mnemonics for each general condition-code mask. foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { - def LOCHIAsm#V : FixedCondUnaryRIE<CV<V>, "lochi", 0xEC42, GR32, - imm32sx16>; - def LOCGHIAsm#V : FixedCondUnaryRIE<CV<V>, "locghi", 0xEC46, GR64, - imm64sx16>; + def LOCHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochi", 0xEC42, GR32, + imm32sx16>; + def LOCGHIAsm#V : FixedCondBinaryRIE<CV<V>, "locghi", 0xEC46, GR64, + imm64sx16>; + def LOCHHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochhi", 0xEC4E, GRH32, + imm32sx16>; + def LOCFHRAsm#V : FixedCondBinaryRRF<CV<V>, "locfhr", 0xB9E0, GRH32, GRH32>; + def LOCFHAsm#V : FixedCondUnaryRSY<CV<V>, "locfh", 0xEBE0, GRH32, 4>; + def STOCFHAsm#V : FixedCondStoreRSY<CV<V>, "stocfh", 0xEBE1, GRH32, 4>; } } let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { - // Move register on condition. Created by if-conversion. - defm LOCR : CondUnaryRRFPair<"locr", 0xB9F2, GR32, GR32>; - defm LOCGR : CondUnaryRRFPair<"locgr", 0xB9E2, GR64, GR64>; + // Move register on condition. Expanded from Select* pseudos and + // created by early if-conversion. + let hasSideEffects = 0, isCommutable = 1 in { + defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>; + defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>; + } // Load on condition. Matched via DAG pattern. defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>; @@ -476,8 +512,8 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { // Define AsmParser extended mnemonics for each general condition-code mask. foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { - def LOCRAsm#V : FixedCondUnaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>; - def LOCGRAsm#V : FixedCondUnaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>; + def LOCRAsm#V : FixedCondBinaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>; + def LOCGRAsm#V : FixedCondBinaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>; def LOCAsm#V : FixedCondUnaryRSY<CV<V>, "loc", 0xEBF2, GR32, 4>; def LOCGAsm#V : FixedCondUnaryRSY<CV<V>, "locg", 0xEBE2, GR64, 8>; def STOCAsm#V : FixedCondStoreRSY<CV<V>, "stoc", 0xEBF3, GR32, 4>; @@ -1108,17 +1144,19 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; // Division and remainder //===----------------------------------------------------------------------===// -// Division and remainder, from registers. -def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; -def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; -def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; -def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; +let hasSideEffects = 1 in { // Do not speculatively execute. + // Division and remainder, from registers. + def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; + def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; + def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; + def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; -// Division and remainder, from memory. -def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; -def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; -def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; -def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; + // Division and remainder, from memory. + def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; + def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; + def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; + def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; +} //===----------------------------------------------------------------------===// // Shifts diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index e3a45b85024..0030ed1f950 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -23,7 +23,7 @@ def Z13Model : SchedMachineModel { let PostRAScheduler = 1; // Extra cycles for a mispredicted branch. - let MispredictPenalty = 8; + let MispredictPenalty = 20; } let SchedModel = Z13Model in { @@ -161,6 +161,7 @@ def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>; def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>; @@ -214,10 +215,11 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; // Conditional move instructions //===----------------------------------------------------------------------===// -def : InstRW<[FXa, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>; -def : InstRW<[FXa, Lat2], (instregex "LOC(G)?HI(Asm.*)?$")>; -def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>; -def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>; +def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; //===----------------------------------------------------------------------===// // Sign extensions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 4f28c519336..4d4a912b5d1 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -23,7 +23,7 @@ def Z196Model : SchedMachineModel { let PostRAScheduler = 1; // Extra cycles for a mispredicted branch. - let MispredictPenalty = 8; + let MispredictPenalty = 16; } let SchedModel = Z196Model in { @@ -187,7 +187,6 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; //===----------------------------------------------------------------------===// def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?R(Asm.*)?$")>; -def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?HI(Asm.*)?$")>; def : InstRW<[FXU, LSU, Lat6, EndGroup], (instregex "LOC(G)?(Asm.*)?$")>; def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "STOC(G)?(Asm.*)?$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 6380f16b889..69c70bbe28f 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -23,7 +23,7 @@ def ZEC12Model : SchedMachineModel { let PostRAScheduler = 1; // Extra cycles for a mispredicted branch. - let MispredictPenalty = 8; + let MispredictPenalty = 16; } let SchedModel = ZEC12Model in { @@ -189,7 +189,6 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; //===----------------------------------------------------------------------===// def : InstRW<[FXU, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>; -def : InstRW<[FXU, Lat2], (instregex "LOC(G)?HI(Asm.*)?$")>; def : InstRW<[FXU, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h index a100eba5b83..9218a7831d9 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -78,6 +78,9 @@ public: // This is important for reducing register pressure in vector code. bool useAA() const override { return true; } + // Always enable the early if-conversion pass. + bool enableEarlyIfConversion() const override { return true; } + // Automatically generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index b250774a3fe..33fdb8f9082 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -122,6 +122,7 @@ public: void addIRPasses() override; bool addInstSelector() override; + bool addILPOpts() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -143,7 +144,14 @@ bool SystemZPassConfig::addInstSelector() { return false; } +bool SystemZPassConfig::addILPOpts() { + addPass(&EarlyIfConverterID); + return true; +} + void SystemZPassConfig::addPreSched2() { + addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); + if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); } diff --git a/llvm/test/CodeGen/SystemZ/cond-li.ll b/llvm/test/CodeGen/SystemZ/cond-li.ll deleted file mode 100644 index a3e2f3fd125..00000000000 --- a/llvm/test/CodeGen/SystemZ/cond-li.ll +++ /dev/null @@ -1,23 +0,0 @@ -; Test LOCHI/LOCGHI -; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - -; CHECK-LABEL: bar1: -; CHECK: lhi [[REG:%r[0-5]]], 42 -; CHECK: chi %r2, 0 -; CHECK: lochie [[REG]], 0 -define signext i32 @bar1(i32 signext %x) { - %cmp = icmp ne i32 %x, 0 - %.x = select i1 %cmp, i32 42, i32 0 - ret i32 %.x -} - -; CHECK-LABEL: bar2: -; CHECK: ltgr [[REG:%r[0-5]]], %r2 -; CHECK: lghi %r2, 42 -; CHECK: locghie %r2, 0 -define signext i64 @bar2(i64 signext %x) { - %cmp = icmp ne i64 %x, 0 - %.x = select i1 %cmp, i64 42, i64 0 - ret i64 %.x -} diff --git a/llvm/test/CodeGen/SystemZ/cond-load-01.ll b/llvm/test/CodeGen/SystemZ/cond-load-01.ll index d10551fd066..c7ec4105cbc 100644 --- a/llvm/test/CodeGen/SystemZ/cond-load-01.ll +++ b/llvm/test/CodeGen/SystemZ/cond-load-01.ll @@ -2,6 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; Run the test again to make sure it still works the same even +; in the presence of the load-store-on-condition-2 facility. +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + declare i32 @foo(i32 *) ; Test the simple case. diff --git a/llvm/test/CodeGen/SystemZ/cond-load-03.ll b/llvm/test/CodeGen/SystemZ/cond-load-03.ll new file mode 100644 index 00000000000..4cce92ea718 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-load-03.ll @@ -0,0 +1,159 @@ +; Test LOCFH. See comments in asm-18.ll about testing high-word operations. +; +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: -no-integrated-as | FileCheck %s + +declare void @foo(i32 *) + +; Test the simple case. +define void @f1(i32 *%ptr, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK: locfhhe [[REG]], 0(%r2) +; CHECK: br %r14 + %easy = call i32 asm "stepa $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %other = load i32, i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; ...and again with the operands swapped. +define void @f2(i32 *%ptr, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK: locfhl [[REG]], 0(%r2) +; CHECK: br %r14 + %easy = call i32 asm "stepa $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %other = load i32, i32 *%ptr + %res = select i1 %cond, i32 %other, i32 %easy + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; Check the high end of the aligned LOC range. +define void @f3(i32 *%base, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK: locfhhe [[REG]], 524284(%r2) +; CHECK: br %r14 + %easy = call i32 asm "stepa $0", "=h"() + %ptr = getelementptr i32, i32 *%base, i64 131071 + %cond = icmp ult i32 %limit, 42 + %other = load i32, i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; Check the next word up. Other sequences besides this one would be OK. +define void @f4(i32 *%base, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: agfi %r2, 524288 +; CHECK-DAG: clfi %r3, 42 +; CHECK: locfhhe [[REG]], 0(%r2) +; CHECK: br %r14 + %easy = call i32 asm "stepa $0", "=h"() + %ptr = getelementptr i32, i32 *%base, i64 131072 + %cond = icmp ult i32 %limit, 42 + %other = load i32, i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; Check the low end of the LOC range. +define void @f5(i32 *%base, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK: locfhhe [[REG]], -524288(%r2) +; CHECK: br %r14 + %easy = call i32 asm "stepa $0", "=h"() + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %cond = icmp ult i32 %limit, 42 + %other = load i32, i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; Check the next word down, with the same comments as f4. +define void @f6(i32 *%base, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK-DAG: agfi %r2, -524292 +; CHECK-DAG: clfi %r3, 42 +; CHECK: locfhhe [[REG]], 0(%r2) +; CHECK: br %r14 + %easy = call i32 asm "stepa $0", "=h"() + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %cond = icmp ult i32 %limit, 42 + %other = load i32, i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; Try a frame index base. +define void @f7(i32 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: locfhhe [[REG]], {{[0-9]+}}(%r15) +; CHECK: br %r14 + %ptr = alloca i32 + call void @foo(i32 *%ptr) + %easy = call i32 asm "stepa $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %other = load i32, i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; Try a case when an index is involved. +define void @f8(i32 %limit, i64 %base, i64 %index) { +; CHECK-LABEL: f8: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r2, 42 +; CHECK: locfhhe [[REG]], 0({{%r[1-5]}}) +; CHECK: br %r14 + %easy = call i32 asm "stepa $0", "=h"() + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %cond = icmp ult i32 %limit, 42 + %other = load i32, i32 *%ptr + %res = select i1 %cond, i32 %easy, i32 %other + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; Test that conditionally-executed loads do not use LOC, since it is allowed +; to trap even when the condition is false. +define void @f9(i32 %limit, i32 *%ptr) { +; CHECK-LABEL: f9: +; CHECK-NOT: loc +; CHECK: lfh +; CHECK: br %r14 +entry: + %easy = call i32 asm "stepa $0", "=h"() + %cmp = icmp ule i32 %easy, %limit + br i1 %cmp, label %load, label %exit + +load: + %other = load i32, i32 *%ptr + br label %exit + +exit: + %res = phi i32 [ %easy, %entry ], [ %other, %load ] + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/cond-move-01.ll b/llvm/test/CodeGen/SystemZ/cond-move-01.ll index 088dee0232e..0be81c3ff80 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-01.ll +++ b/llvm/test/CodeGen/SystemZ/cond-move-01.ll @@ -1,6 +1,10 @@ ; Test LOCR and LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -verify-machineinstrs | FileCheck %s +; +; Run the test again to make sure it still works the same even +; in the presence of the load-store-on-condition-2 facility. +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s ; Test LOCR. define i32 @f1(i32 %a, i32 %b, i32 %limit) { @@ -46,3 +50,76 @@ define i64 @f4(i64 %a, i64 %b, i64 %limit) { %res = select i1 %cond, i64 %a, i64 %b ret i64 %res } + +; Check that we also get LOCR as a result of early if-conversion. +define i32 @f5(i32 %a, i32 %b, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK: clfi %r4, 41 +; CHECK: locrh %r2, %r3 +; CHECK: br %r14 +entry: + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %a, %if.then ], [ %b, %entry ] + ret i32 %res +} + +; ... and likewise for LOCGR. +define i64 @f6(i64 %a, i64 %b, i64 %limit) { +; CHECK-LABEL: f6: +; CHECK: clgfi %r4, 41 +; CHECK: locgrh %r2, %r3 +; CHECK: br %r14 +entry: + %cond = icmp ult i64 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i64 [ %a, %if.then ], [ %b, %entry ] + ret i64 %res +} + +; Check that inverting the condition works as well. +define i32 @f7(i32 %a, i32 %b, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK: clfi %r4, 41 +; CHECK: locrle %r2, %r3 +; CHECK: br %r14 +entry: + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %b, %if.then ], [ %a, %entry ] + ret i32 %res +} + +; ... and likewise for LOCGR. +define i64 @f8(i64 %a, i64 %b, i64 %limit) { +; CHECK-LABEL: f8: +; CHECK: clgfi %r4, 41 +; CHECK: locgrle %r2, %r3 +; CHECK: br %r14 +entry: + %cond = icmp ult i64 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i64 [ %b, %if.then ], [ %a, %entry ] + ret i64 %res +} + diff --git a/llvm/test/CodeGen/SystemZ/cond-move-02.ll b/llvm/test/CodeGen/SystemZ/cond-move-02.ll new file mode 100644 index 00000000000..2e2bacd2532 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-02.ll @@ -0,0 +1,138 @@ +; Test LOCHI and LOCGHI. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s + +define i32 @f1(i32 %x) { +; CHECK-LABEL: f1: +; CHECK: lhi [[REG:%r[0-5]]], 0 +; CHECK: chi %r2, 0 +; CHECK: lochilh [[REG]], 42 +; CHECK: br %r14 + %cond = icmp ne i32 %x, 0 + %res = select i1 %cond, i32 42, i32 0 + ret i32 %res +} + +define i32 @f2(i32 %x, i32 %y) { +; CHECK-LABEL: f2: +; CHECK: chi %r2, 0 +; CHECK: lochilh %r3, 42 +; CHECK: br %r14 + %cond = icmp ne i32 %x, 0 + %res = select i1 %cond, i32 42, i32 %y + ret i32 %res +} + +define i32 @f3(i32 %x, i32 %y) { +; CHECK-LABEL: f3: +; CHECK: chi %r2, 0 +; CHECK: lochie %r3, 42 +; CHECK: br %r14 + %cond = icmp ne i32 %x, 0 + %res = select i1 %cond, i32 %y, i32 42 + ret i32 %res +} + +define i64 @f4(i64 %x) { +; CHECK-LABEL: f4: +; CHECK: lghi [[REG:%r[0-5]]], 0 +; CHECK: cghi %r2, 0 +; CHECK: locghilh [[REG]], 42 +; CHECK: br %r14 + %cond = icmp ne i64 %x, 0 + %res = select i1 %cond, i64 42, i64 0 + ret i64 %res +} + +define i64 @f5(i64 %x, i64 %y) { +; CHECK-LABEL: f5: +; CHECK: cghi %r2, 0 +; CHECK: locghilh %r3, 42 +; CHECK: br %r14 + %cond = icmp ne i64 %x, 0 + %res = select i1 %cond, i64 42, i64 %y + ret i64 %res +} + +define i64 @f6(i64 %x, i64 %y) { +; CHECK-LABEL: f6: +; CHECK: cghi %r2, 0 +; CHECK: locghie %r3, 42 +; CHECK: br %r14 + %cond = icmp ne i64 %x, 0 + %res = select i1 %cond, i64 %y, i64 42 + ret i64 %res +} + +; Check that we also get LOCHI as a result of early if-conversion. +define i32 @f7(i32 %x, i32 %y) { +; CHECK-LABEL: f7: +; CHECK: chi %r2, 0 +; CHECK: lochie %r3, 42 +; CHECK: br %r14 +entry: + %cond = icmp ne i32 %x, 0 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %y, %if.then ], [ 42, %entry ] + ret i32 %res +} + +; ... and the same for LOCGHI. +define i64 @f8(i64 %x, i64 %y) { +; CHECK-LABEL: f8: +; CHECK: cghi %r2, 0 +; CHECK: locghie %r3, 42 +; CHECK: br %r14 +entry: + %cond = icmp ne i64 %x, 0 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i64 [ %y, %if.then ], [ 42, %entry ] + ret i64 %res +} + +; Check that inverting the condition works as well. +define i32 @f9(i32 %x, i32 %y) { +; CHECK-LABEL: f9: +; CHECK: chi %r2, 0 +; CHECK: lochilh %r3, 42 +; CHECK: br %r14 +entry: + %cond = icmp ne i32 %x, 0 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ 42, %if.then ], [ %y, %entry ] + ret i32 %res +} + +; ... and the same for LOCGHI. +define i64 @f10(i64 %x, i64 %y) { +; CHECK-LABEL: f10: +; CHECK: cghi %r2, 0 +; CHECK: locghilh %r3, 42 +; CHECK: br %r14 +entry: + %cond = icmp ne i64 %x, 0 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i64 [ 42, %if.then ], [ %y, %entry ] + ret i64 %res +} + diff --git a/llvm/test/CodeGen/SystemZ/cond-move-03.ll b/llvm/test/CodeGen/SystemZ/cond-move-03.ll new file mode 100644 index 00000000000..a9bf1c80310 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-03.ll @@ -0,0 +1,213 @@ +; Test LOCFHR and LOCHHI. +; See comments in asm-18.ll about testing high-word operations. +; +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: -no-integrated-as | FileCheck %s + +define void @f1(i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clfi %r2, 42 +; CHECK: locfhrl [[REG2]], [[REG1]] +; CHECK: stepc [[REG2]] +; CHECK: br %r14 + %a = call i32 asm sideeffect "stepa $0", "=h"() + %b = call i32 asm sideeffect "stepb $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + call void asm sideeffect "stepc $0", "h"(i32 %res) + ret void +} + +; FIXME: We should commute the LOCRMux to save one move. +define void @f2(i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clijhe %r2, 42, +; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32 +; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32 +; CHECK: stepc [[REG1]] +; CHECK: br %r14 + %dummy = call i32 asm sideeffect "dummy $0", "=h"() + %a = call i32 asm sideeffect "stepa $0", "=h"() + %b = call i32 asm sideeffect "stepb $0", "=r"() + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + call void asm sideeffect "stepc $0", "h"(i32 %res) + call void asm sideeffect "dummy $0", "h"(i32 %dummy) + ret void +} + +define void @f3(i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-DAG: stepa [[REG2:%r[0-5]]] +; CHECK-DAG: stepb [[REG1:%r[0-5]]] +; CHECK-DAG: clijhe %r2, 42, +; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32 +; CHECK: stepc [[REG1]] +; CHECK: br %r14 + %dummy = call i32 asm sideeffect "dummy $0", "=h"() + %a = call i32 asm sideeffect "stepa $0", "=r"() + %b = call i32 asm sideeffect "stepb $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + call void asm sideeffect "stepc $0", "h"(i32 %res) + call void asm sideeffect "dummy $0", "h"(i32 %dummy) + ret void +} + +; FIXME: We should commute the LOCRMux to save one move. +define void @f4(i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clijhe %r2, 42, +; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32 +; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32 +; CHECK: stepc [[REG1]] +; CHECK: br %r14 + %dummy = call i32 asm sideeffect "dummy $0", "=h"() + %a = call i32 asm sideeffect "stepa $0", "=r"() + %b = call i32 asm sideeffect "stepb $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + call void asm sideeffect "stepc $0", "r"(i32 %res) + call void asm sideeffect "dummy $0", "h"(i32 %dummy) + ret void +} + +define void @f5(i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-DAG: stepa [[REG2:%r[0-5]]] +; CHECK-DAG: stepb [[REG1:%r[0-5]]] +; CHECK-DAG: clijhe %r2, 42, +; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32 +; CHECK: stepc [[REG1]] +; CHECK: br %r14 + %dummy = call i32 asm sideeffect "dummy $0", "=h"() + %a = call i32 asm sideeffect "stepa $0", "=h"() + %b = call i32 asm sideeffect "stepb $0", "=r"() + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + call void asm sideeffect "stepc $0", "r"(i32 %res) + call void asm sideeffect "dummy $0", "h"(i32 %dummy) + ret void +} + +; Check that we also get LOCFHR as a result of early if-conversion. +define void @f6(i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clfi %r2, 41 +; CHECK: locfhrle [[REG2]], [[REG1]] +; CHECK: stepc [[REG2]] +; CHECK: br %r14 +entry: + %a = call i32 asm sideeffect "stepa $0", "=h"() + %b = call i32 asm sideeffect "stepb $0", "=h"() + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %a, %if.then ], [ %b, %entry ] + call void asm sideeffect "stepc $0", "h"(i32 %res) + ret void +} + +; Check that inverting the condition works as well. +define void @f7(i32 %limit) { +; CHECK-LABEL: f7: +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clfi %r2, 41 +; CHECK: locfhrh [[REG2]], [[REG1]] +; CHECK: stepc [[REG2]] +; CHECK: br %r14 +entry: + %a = call i32 asm sideeffect "stepa $0", "=h"() + %b = call i32 asm sideeffect "stepb $0", "=h"() + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %b, %if.then ], [ %a, %entry ] + call void asm sideeffect "stepc $0", "h"(i32 %res) + ret void +} + +define void @f8(i32 %limit) { +; CHECK-LABEL: f8: +; CHECK: clfi %r2, 42 +; CHECK: lochhil [[REG:%r[0-5]]], 32767 +; CHECK: stepa [[REG]] +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 32767, i32 0 + call void asm sideeffect "stepa $0", "h"(i32 %res) + ret void +} + +define void @f9(i32 %limit) { +; CHECK-LABEL: f9: +; CHECK: clfi %r2, 42 +; CHECK: lochhil [[REG:%r[0-5]]], -32768 +; CHECK: stepa [[REG]] +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 -32768, i32 0 + call void asm sideeffect "stepa $0", "h"(i32 %res) + ret void +} + +; Check that we also get LOCHHI as a result of early if-conversion. +define void @f10(i32 %limit) { +; CHECK-LABEL: f10: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r2, 41 +; CHECK: lochhile [[REG]], 123 +; CHECK: stepb [[REG]] +; CHECK: br %r14 +entry: + %a = call i32 asm sideeffect "stepa $0", "=h"() + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ 123, %if.then ], [ %a, %entry ] + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} + +; Check that inverting the condition works as well. +define void @f11(i32 %limit) { +; CHECK-LABEL: f11: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r2, 41 +; CHECK: lochhih [[REG]], 123 +; CHECK: stepb [[REG]] +; CHECK: br %r14 +entry: + %a = call i32 asm sideeffect "stepa $0", "=h"() + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %a, %if.then ], [ 123, %entry ] + call void asm sideeffect "stepb $0", "h"(i32 %res) + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/cond-store-07.ll b/llvm/test/CodeGen/SystemZ/cond-store-07.ll index 35b1303f402..79b4f87006b 100644 --- a/llvm/test/CodeGen/SystemZ/cond-store-07.ll +++ b/llvm/test/CodeGen/SystemZ/cond-store-07.ll @@ -2,6 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; Run the test again to make sure it still works the same even +; in the presence of the load-store-on-condition-2 facility. +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + declare void @foo(i32 *) ; Test the simple case, with the loaded value first. diff --git a/llvm/test/CodeGen/SystemZ/cond-store-09.ll b/llvm/test/CodeGen/SystemZ/cond-store-09.ll new file mode 100644 index 00000000000..bf7a8b88007 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-store-09.ll @@ -0,0 +1,142 @@ +; Test STOCFHs that are presented as selects. +; See comments in asm-18.ll about testing high-word operations. +; +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: -no-integrated-as | FileCheck %s + +declare void @foo(i32 *) + +; Test the simple case, with the loaded value first. +define void @f1(i32 *%ptr, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK: stocfhhe [[REG]], 0(%r2) +; CHECK: br %r14 + %alt = call i32 asm "stepa $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %orig = load i32, i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i32 *%ptr, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK: stocfhl [[REG]], 0(%r2) +; CHECK: br %r14 + %alt = call i32 asm "stepa $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %orig = load i32, i32 *%ptr + %res = select i1 %cond, i32 %alt, i32 %orig + store i32 %res, i32 *%ptr + ret void +} + +; Check the high end of the aligned STOC range. +define void @f3(i32 *%base, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK: stocfhhe [[REG]], 524284(%r2) +; CHECK: br %r14 + %alt = call i32 asm "stepa $0", "=h"() + %ptr = getelementptr i32, i32 *%base, i64 131071 + %cond = icmp ult i32 %limit, 42 + %orig = load i32, i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word up. Other sequences besides this one would be OK. +define void @f4(i32 *%base, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: agfi %r2, 524288 +; CHECK-DAG: clfi %r3, 42 +; CHECK: stocfhhe [[REG]], 0(%r2) +; CHECK: br %r14 + %alt = call i32 asm "stepa $0", "=h"() + %ptr = getelementptr i32, i32 *%base, i64 131072 + %cond = icmp ult i32 %limit, 42 + %orig = load i32, i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the low end of the STOC range. +define void @f5(i32 *%base, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: clfi %r3, 42 +; CHECK: stocfhhe [[REG]], -524288(%r2) +; CHECK: br %r14 + %alt = call i32 asm "stepa $0", "=h"() + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %cond = icmp ult i32 %limit, 42 + %orig = load i32, i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word down, with the same comments as f8. +define void @f6(i32 *%base, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK-DAG: stepa [[REG:%r[0-5]]] +; CHECK-DAG: agfi %r2, -524292 +; CHECK-DAG: clfi %r3, 42 +; CHECK: stocfhhe [[REG]], 0(%r2) +; CHECK: br %r14 + %alt = call i32 asm "stepa $0", "=h"() + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %cond = icmp ult i32 %limit, 42 + %orig = load i32, i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Try a frame index base. +define void @f7(i32 %limit) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: stocfhhe [[REG]], {{[0-9]+}}(%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i32 + call void @foo(i32 *%ptr) + %alt = call i32 asm "stepa $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %orig = load i32, i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + call void @foo(i32 *%ptr) + ret void +} + +; Test that conditionally-executed stores do not use STOC, since STOC +; is allowed to trap even when the condition is false. +define void @f8(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f8: +; CHECK-NOT: stoc +; CHECK: stfh +; CHECK: br %r14 +entry: + %val = call i32 asm "stepa $0", "=h"() + %cmp = icmp ule i32 %a, %b + br i1 %cmp, label %store, label %exit + +store: + store i32 %val, i32 *%dest + br label %exit + +exit: + ret void +} diff --git a/llvm/test/MC/Disassembler/SystemZ/insns-z13.txt b/llvm/test/MC/Disassembler/SystemZ/insns-z13.txt index b9d665726b8..5a983860df1 100644 --- a/llvm/test/MC/Disassembler/SystemZ/insns-z13.txt +++ b/llvm/test/MC/Disassembler/SystemZ/insns-z13.txt @@ -4414,3 +4414,196 @@ #CHECK: locghi %r11, 32512, 15 0xec 0xbf 0x7f 0x00 0x00 0x46 + +#CHECK: lochhi %r11, 42, 0 +0xec 0xb0 0x00 0x2a 0x00 0x4e + +#CHECK: lochhio %r11, 42 +0xec 0xb1 0x00 0x2a 0x00 0x4e + +#CHECK: lochhih %r11, 42 +0xec 0xb2 0x00 0x2a 0x00 0x4e + +#CHECK: lochhinle %r11, 42 +0xec 0xb3 0x00 0x2a 0x00 0x4e + +#CHECK: lochhil %r11, -1 +0xec 0xb4 0xff 0xff 0x00 0x4e + +#CHECK: lochhinhe %r11, 42 +0xec 0xb5 0x00 0x2a 0x00 0x4e + +#CHECK: lochhilh %r11, -1 +0xec 0xb6 0xff 0xff 0x00 0x4e + +#CHECK: lochhine %r11, 0 +0xec 0xb7 0x00 0x00 0x00 0x4e + +#CHECK: lochhie %r11, 0 +0xec 0xb8 0x00 0x00 0x00 0x4e + +#CHECK: lochhinlh %r11, 42 +0xec 0xb9 0x00 0x2a 0x00 0x4e + +#CHECK: lochhihe %r11, 255 +0xec 0xba 0x00 0xff 0x00 0x4e + +#CHECK: lochhinl %r11, 255 +0xec 0xbb 0x00 0xff 0x00 0x4e + +#CHECK: lochhile %r11, 32767 +0xec 0xbc 0x7f 0xff 0x00 0x4e + +#CHECK: lochhinh %r11, 32767 +0xec 0xbd 0x7f 0xff 0x00 0x4e + +#CHECK: lochhino %r11, 32512 +0xec 0xbe 0x7f 0x00 0x00 0x4e + +#CHECK: lochhi %r11, 32512, 15 +0xec 0xbf 0x7f 0x00 0x00 0x4e + +# CHECK: locfh %r7, 6399(%r8), 0 +0xeb 0x70 0x88 0xff 0x01 0xe0 + +# CHECK: locfho %r7, 6399(%r8) +0xeb 0x71 0x88 0xff 0x01 0xe0 + +# CHECK: locfhh %r7, 6399(%r8) +0xeb 0x72 0x88 0xff 0x01 0xe0 + +# CHECK: locfhnle %r7, 6399(%r8) +0xeb 0x73 0x88 0xff 0x01 0xe0 + +# CHECK: locfhl %r7, 6399(%r8) +0xeb 0x74 0x88 0xff 0x01 0xe0 + +# CHECK: locfhnhe %r7, 6399(%r8) +0xeb 0x75 0x88 0xff 0x01 0xe0 + +# CHECK: locfhlh %r7, 6399(%r8) +0xeb 0x76 0x88 0xff 0x01 0xe0 + +# CHECK: locfhne %r7, 6399(%r8) +0xeb 0x77 0x88 0xff 0x01 0xe0 + +# CHECK: locfhe %r7, 6399(%r8) +0xeb 0x78 0x88 0xff 0x01 0xe0 + +# CHECK: locfhnlh %r7, 6399(%r8) +0xeb 0x79 0x88 0xff 0x01 0xe0 + +# CHECK: locfhhe %r7, 6399(%r8) +0xeb 0x7a 0x88 0xff 0x01 0xe0 + +# CHECK: locfhnl %r7, 6399(%r8) +0xeb 0x7b 0x88 0xff 0x01 0xe0 + +# CHECK: locfhle %r7, 6399(%r8) +0xeb 0x7c 0x88 0xff 0x01 0xe0 + +# CHECK: locfhnh %r7, 6399(%r8) +0xeb 0x7d 0x88 0xff 0x01 0xe0 + +# CHECK: locfhno %r7, 6399(%r8) +0xeb 0x7e 0x88 0xff 0x01 0xe0 + +# CHECK: locfh %r7, 6399(%r8), 15 +0xeb 0x7f 0x88 0xff 0x01 0xe0 + +# CHECK: locfhr %r11, %r3, 0 +0xb9 0xe0 0x00 0xb3 + +# CHECK: locfhro %r11, %r3 +0xb9 0xe0 0x10 0xb3 + +# CHECK: locfhrh %r11, %r3 +0xb9 0xe0 0x20 0xb3 + +# CHECK: locfhrnle %r11, %r3 +0xb9 0xe0 0x30 0xb3 + +# CHECK: locfhrl %r11, %r3 +0xb9 0xe0 0x40 0xb3 + +# CHECK: locfhrnhe %r11, %r3 +0xb9 0xe0 0x50 0xb3 + +# CHECK: locfhrlh %r11, %r3 +0xb9 0xe0 0x60 0xb3 + +# CHECK: locfhrne %r11, %r3 +0xb9 0xe0 0x70 0xb3 + +# CHECK: locfhre %r11, %r3 +0xb9 0xe0 0x80 0xb3 + +# CHECK: locfhrnlh %r11, %r3 +0xb9 0xe0 0x90 0xb3 + +# CHECK: locfhrhe %r11, %r3 +0xb9 0xe0 0xa0 0xb3 + +# CHECK: locfhrnl %r11, %r3 +0xb9 0xe0 0xb0 0xb3 + +# CHECK: locfhrle %r11, %r3 +0xb9 0xe0 0xc0 0xb3 + +# CHECK: locfhrnh %r11, %r3 +0xb9 0xe0 0xd0 0xb3 + +# CHECK: locfhrno %r11, %r3 +0xb9 0xe0 0xe0 0xb3 + +# CHECK: locfhr %r11, %r3, 15 +0xb9 0xe0 0xf0 0xb3 + +# CHECK: stocfh %r1, 2(%r3), 0 +0xeb 0x10 0x30 0x02 0x00 0xe1 + +# CHECK: stocfho %r1, 2(%r3) +0xeb 0x11 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhh %r1, 2(%r3) +0xeb 0x12 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhnle %r1, 2(%r3) +0xeb 0x13 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhl %r1, 2(%r3) +0xeb 0x14 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhnhe %r1, 2(%r3) +0xeb 0x15 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhlh %r1, 2(%r3) +0xeb 0x16 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhne %r1, 2(%r3) +0xeb 0x17 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhe %r1, 2(%r3) +0xeb 0x18 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhnlh %r1, 2(%r3) +0xeb 0x19 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhhe %r1, 2(%r3) +0xeb 0x1a 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhnl %r1, 2(%r3) +0xeb 0x1b 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhle %r1, 2(%r3) +0xeb 0x1c 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhnh %r1, 2(%r3) +0xeb 0x1d 0x30 0x02 0x00 0xe1 + +# CHECK: stocfhno %r1, 2(%r3) +0xeb 0x1e 0x30 0x02 0x00 0xe1 + +# CHECK: stocfh %r1, 2(%r3), 15 +0xeb 0x1f 0x30 0x02 0x00 0xe1 + diff --git a/llvm/test/MC/SystemZ/insn-bad-z13.s b/llvm/test/MC/SystemZ/insn-bad-z13.s index 87f1ce8d2ab..db2de118bf3 100644 --- a/llvm/test/MC/SystemZ/insn-bad-z13.s +++ b/llvm/test/MC/SystemZ/insn-bad-z13.s @@ -1960,3 +1960,56 @@ locghie %f0, 0 locghie 0, %r0 +#CHECK: error: invalid operand +#CHECK: lochhie %r0, 66000 +#CHECK: error: invalid operand +#CHECK: lochhie %f0, 0 +#CHECK: error: invalid operand +#CHECK: lochhie 0, %r0 + + lochhie %r0, 66000 + lochhie %f0, 0 + lochhie 0, %r0 + +#CHECK: error: invalid operand +#CHECK: locfh %r0,0,-1 +#CHECK: error: invalid operand +#CHECK: locfh %r0,0,16 +#CHECK: error: invalid operand +#CHECK: locfh %r0,-524289,1 +#CHECK: error: invalid operand +#CHECK: locfh %r0,524288,1 +#CHECK: error: invalid use of indexed addressing +#CHECK: locfh %r0,0(%r1,%r2),1 + + locfh %r0,0,-1 + locfh %r0,0,16 + locfh %r0,-524289,1 + locfh %r0,524288,1 + locfh %r0,0(%r1,%r2),1 + +#CHECK: error: invalid operand +#CHECK: locfhr %r0,%r0,-1 +#CHECK: error: invalid operand +#CHECK: locfhr %r0,%r0,16 + + locfhr %r0,%r0,-1 + locfhr %r0,%r0,16 + +#CHECK: error: invalid operand +#CHECK: stocfh %r0,0,-1 +#CHECK: error: invalid operand +#CHECK: stocfh %r0,0,16 +#CHECK: error: invalid operand +#CHECK: stocfh %r0,-524289,1 +#CHECK: error: invalid operand +#CHECK: stocfh %r0,524288,1 +#CHECK: error: invalid use of indexed addressing +#CHECK: stocfh %r0,0(%r1,%r2),1 + + stocfh %r0,0,-1 + stocfh %r0,0,16 + stocfh %r0,-524289,1 + stocfh %r0,524288,1 + stocfh %r0,0(%r1,%r2),1 + diff --git a/llvm/test/MC/SystemZ/insn-good-z13.s b/llvm/test/MC/SystemZ/insn-good-z13.s index 73f3075975a..4fd6a664a29 100644 --- a/llvm/test/MC/SystemZ/insn-good-z13.s +++ b/llvm/test/MC/SystemZ/insn-good-z13.s @@ -6892,3 +6892,206 @@ locghinh %r11, 32767 locghino %r11, 32512 locghi %r11, 32512, 15 + +#CHECK: lochhi %r11, 42, 0 # encoding: [0xec,0xb0,0x00,0x2a,0x00,0x4e] +#CHECK: lochhio %r11, 42 # encoding: [0xec,0xb1,0x00,0x2a,0x00,0x4e] +#CHECK: lochhih %r11, 42 # encoding: [0xec,0xb2,0x00,0x2a,0x00,0x4e] +#CHECK: lochhinle %r11, 42 # encoding: [0xec,0xb3,0x00,0x2a,0x00,0x4e] +#CHECK: lochhil %r11, -1 # encoding: [0xec,0xb4,0xff,0xff,0x00,0x4e] +#CHECK: lochhinhe %r11, 42 # encoding: [0xec,0xb5,0x00,0x2a,0x00,0x4e] +#CHECK: lochhilh %r11, -1 # encoding: [0xec,0xb6,0xff,0xff,0x00,0x4e] +#CHECK: lochhine %r11, 0 # encoding: [0xec,0xb7,0x00,0x00,0x00,0x4e] +#CHECK: lochhie %r11, 0 # encoding: [0xec,0xb8,0x00,0x00,0x00,0x4e] +#CHECK: lochhinlh %r11, 42 # encoding: [0xec,0xb9,0x00,0x2a,0x00,0x4e] +#CHECK: lochhihe %r11, 255 # encoding: [0xec,0xba,0x00,0xff,0x00,0x4e] +#CHECK: lochhinl %r11, 255 # encoding: [0xec,0xbb,0x00,0xff,0x00,0x4e] +#CHECK: lochhile %r11, 32767 # encoding: [0xec,0xbc,0x7f,0xff,0x00,0x4e] +#CHECK: lochhinh %r11, 32767 # encoding: [0xec,0xbd,0x7f,0xff,0x00,0x4e] +#CHECK: lochhino %r11, 32512 # encoding: [0xec,0xbe,0x7f,0x00,0x00,0x4e] +#CHECK: lochhi %r11, 32512, 15 # encoding: [0xec,0xbf,0x7f,0x00,0x00,0x4e] + + lochhi %r11, 42, 0 + lochhio %r11, 42 + lochhih %r11, 42 + lochhinle %r11, 42 + lochhil %r11, -1 + lochhinhe %r11, 42 + lochhilh %r11, -1 + lochhine %r11, 0 + lochhie %r11, 0 + lochhinlh %r11, 42 + lochhihe %r11, 255 + lochhinl %r11, 255 + lochhile %r11, 32767 + lochhinh %r11, 32767 + lochhino %r11, 32512 + lochhi %r11, 32512, 15 + +#CHECK: locfh %r0, 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe0] +#CHECK: locfh %r0, 0, 15 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe0] +#CHECK: locfh %r0, -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe0] +#CHECK: locfh %r0, 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe0] +#CHECK: locfh %r0, 0(%r1), 0 # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe0] +#CHECK: locfh %r0, 0(%r15), 0 # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe0] +#CHECK: locfh %r15, 0, 0 # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe0] +#CHECK: locfh %r1, 4095(%r2), 3 # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe0] + + locfh %r0,0,0 + locfh %r0,0,15 + locfh %r0,-524288,0 + locfh %r0,524287,0 + locfh %r0,0(%r1),0 + locfh %r0,0(%r15),0 + locfh %r15,0,0 + locfh %r1,4095(%r2),3 + +#CHECK: locfho %r1, 2(%r3) # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe0] +#CHECK: locfhh %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe0] +#CHECK: locfhp %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe0] +#CHECK: locfhnle %r1, 2(%r3) # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe0] +#CHECK: locfhl %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe0] +#CHECK: locfhm %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe0] +#CHECK: locfhnhe %r1, 2(%r3) # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe0] +#CHECK: locfhlh %r1, 2(%r3) # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe0] +#CHECK: locfhne %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe0] +#CHECK: locfhnz %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe0] +#CHECK: locfhe %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe0] +#CHECK: locfhz %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe0] +#CHECK: locfhnlh %r1, 2(%r3) # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe0] +#CHECK: locfhhe %r1, 2(%r3) # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe0] +#CHECK: locfhnl %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe0] +#CHECK: locfhnm %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe0] +#CHECK: locfhle %r1, 2(%r3) # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe0] +#CHECK: locfhnh %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe0] +#CHECK: locfhnp %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe0] +#CHECK: locfhno %r1, 2(%r3) # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe0] + + locfho %r1,2(%r3) + locfhh %r1,2(%r3) + locfhp %r1,2(%r3) + locfhnle %r1,2(%r3) + locfhl %r1,2(%r3) + locfhm %r1,2(%r3) + locfhnhe %r1,2(%r3) + locfhlh %r1,2(%r3) + locfhne %r1,2(%r3) + locfhnz %r1,2(%r3) + locfhe %r1,2(%r3) + locfhz %r1,2(%r3) + locfhnlh %r1,2(%r3) + locfhhe %r1,2(%r3) + locfhnl %r1,2(%r3) + locfhnm %r1,2(%r3) + locfhle %r1,2(%r3) + locfhnh %r1,2(%r3) + locfhnp %r1,2(%r3) + locfhno %r1,2(%r3) + +#CHECK: locfhr %r1, %r2, 0 # encoding: [0xb9,0xe0,0x00,0x12] +#CHECK: locfhr %r1, %r2, 15 # encoding: [0xb9,0xe0,0xf0,0x12] + + locfhr %r1,%r2,0 + locfhr %r1,%r2,15 + +#CHECK: locfhro %r1, %r3 # encoding: [0xb9,0xe0,0x10,0x13] +#CHECK: locfhrh %r1, %r3 # encoding: [0xb9,0xe0,0x20,0x13] +#CHECK: locfhrp %r1, %r3 # encoding: [0xb9,0xe0,0x20,0x13] +#CHECK: locfhrnle %r1, %r3 # encoding: [0xb9,0xe0,0x30,0x13] +#CHECK: locfhrl %r1, %r3 # encoding: [0xb9,0xe0,0x40,0x13] +#CHECK: locfhrm %r1, %r3 # encoding: [0xb9,0xe0,0x40,0x13] +#CHECK: locfhrnhe %r1, %r3 # encoding: [0xb9,0xe0,0x50,0x13] +#CHECK: locfhrlh %r1, %r3 # encoding: [0xb9,0xe0,0x60,0x13] +#CHECK: locfhrne %r1, %r3 # encoding: [0xb9,0xe0,0x70,0x13] +#CHECK: locfhrnz %r1, %r3 # encoding: [0xb9,0xe0,0x70,0x13] +#CHECK: locfhre %r1, %r3 # encoding: [0xb9,0xe0,0x80,0x13] +#CHECK: locfhrz %r1, %r3 # encoding: [0xb9,0xe0,0x80,0x13] +#CHECK: locfhrnlh %r1, %r3 # encoding: [0xb9,0xe0,0x90,0x13] +#CHECK: locfhrhe %r1, %r3 # encoding: [0xb9,0xe0,0xa0,0x13] +#CHECK: locfhrnl %r1, %r3 # encoding: [0xb9,0xe0,0xb0,0x13] +#CHECK: locfhrnm %r1, %r3 # encoding: [0xb9,0xe0,0xb0,0x13] +#CHECK: locfhrle %r1, %r3 # encoding: [0xb9,0xe0,0xc0,0x13] +#CHECK: locfhrnh %r1, %r3 # encoding: [0xb9,0xe0,0xd0,0x13] +#CHECK: locfhrnp %r1, %r3 # encoding: [0xb9,0xe0,0xd0,0x13] +#CHECK: locfhrno %r1, %r3 # encoding: [0xb9,0xe0,0xe0,0x13] + + locfhro %r1,%r3 + locfhrh %r1,%r3 + locfhrp %r1,%r3 + locfhrnle %r1,%r3 + locfhrl %r1,%r3 + locfhrm %r1,%r3 + locfhrnhe %r1,%r3 + locfhrlh %r1,%r3 + locfhrne %r1,%r3 + locfhrnz %r1,%r3 + locfhre %r1,%r3 + locfhrz %r1,%r3 + locfhrnlh %r1,%r3 + locfhrhe %r1,%r3 + locfhrnl %r1,%r3 + locfhrnm %r1,%r3 + locfhrle %r1,%r3 + locfhrnh %r1,%r3 + locfhrnp %r1,%r3 + locfhrno %r1,%r3 + +#CHECK: stocfh %r0, 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe1] +#CHECK: stocfh %r0, 0, 15 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe1] +#CHECK: stocfh %r0, -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe1] +#CHECK: stocfh %r0, 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe1] +#CHECK: stocfh %r0, 0(%r1), 0 # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe1] +#CHECK: stocfh %r0, 0(%r15), 0 # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe1] +#CHECK: stocfh %r15, 0, 0 # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe1] +#CHECK: stocfh %r1, 4095(%r2), 3 # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe1] + + stocfh %r0,0,0 + stocfh %r0,0,15 + stocfh %r0,-524288,0 + stocfh %r0,524287,0 + stocfh %r0,0(%r1),0 + stocfh %r0,0(%r15),0 + stocfh %r15,0,0 + stocfh %r1,4095(%r2),3 + +#CHECK: stocfho %r1, 2(%r3) # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe1] +#CHECK: stocfhh %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe1] +#CHECK: stocfhp %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe1] +#CHECK: stocfhnle %r1, 2(%r3) # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe1] +#CHECK: stocfhl %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe1] +#CHECK: stocfhm %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe1] +#CHECK: stocfhnhe %r1, 2(%r3) # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe1] +#CHECK: stocfhlh %r1, 2(%r3) # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe1] +#CHECK: stocfhne %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe1] +#CHECK: stocfhnz %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe1] +#CHECK: stocfhe %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe1] +#CHECK: stocfhz %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe1] +#CHECK: stocfhnlh %r1, 2(%r3) # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe1] +#CHECK: stocfhhe %r1, 2(%r3) # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe1] +#CHECK: stocfhnl %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe1] +#CHECK: stocfhnm %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe1] +#CHECK: stocfhle %r1, 2(%r3) # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe1] +#CHECK: stocfhnh %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe1] +#CHECK: stocfhnp %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe1] +#CHECK: stocfhno %r1, 2(%r3) # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe1] + + stocfho %r1,2(%r3) + stocfhh %r1,2(%r3) + stocfhp %r1,2(%r3) + stocfhnle %r1,2(%r3) + stocfhl %r1,2(%r3) + stocfhm %r1,2(%r3) + stocfhnhe %r1,2(%r3) + stocfhlh %r1,2(%r3) + stocfhne %r1,2(%r3) + stocfhnz %r1,2(%r3) + stocfhe %r1,2(%r3) + stocfhz %r1,2(%r3) + stocfhnlh %r1,2(%r3) + stocfhhe %r1,2(%r3) + stocfhnl %r1,2(%r3) + stocfhnm %r1,2(%r3) + stocfhle %r1,2(%r3) + stocfhnh %r1,2(%r3) + stocfhnp %r1,2(%r3) + stocfhno %r1,2(%r3) + |