diff options
Diffstat (limited to 'llvm/lib/Target/X86')
20 files changed, 290 insertions, 256 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index f13254ffa54..29b6efb922e 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -352,9 +352,15 @@ namespace X86II { MRMSrcMemCC = 36, /// MRMXm - This form is used for instructions that use the Mod/RM byte + /// to specify a memory source, but doesn't use the middle field. And has + /// a condition code. + /// + MRMXmCC = 38, + + /// MRMXm - This form is used for instructions that use the Mod/RM byte /// to specify a memory source, but doesn't use the middle field. /// - MRMXm = 39, // Instruction that uses Mod/RM but not the middle field. + MRMXm = 39, // Next, instructions that operate on a memory r/m operand... MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43, // Format /0 /1 /2 /3 @@ -385,10 +391,16 @@ namespace X86II { /// MRMSrcRegCC = 52, + /// MRMXCCr - This form is used for instructions that use the Mod/RM byte + /// to specify a register source, but doesn't use the middle field. And has + /// a condition code. + /// + MRMXrCC = 54, + /// MRMXr - This form is used for instructions that use the Mod/RM byte /// to specify a register source, but doesn't use the middle field. /// - MRMXr = 55, // Instruction that uses Mod/RM but not the middle field. + MRMXr = 55, // Instructions that operate on a register r/m operand... MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59, // Format /0 /1 /2 /3 @@ -779,12 +791,14 @@ namespace X86II { case X86II::MRMSrcReg4VOp3: case X86II::MRMSrcRegOp4: case X86II::MRMSrcRegCC: + case X86II::MRMXrCC: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: return -1; + case X86II::MRMXmCC: case X86II::MRMXm: case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 2f3cbcfc4c8..05e19a3db1c 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1081,7 +1081,7 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, CurOp += X86::AddrNumOperands; REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R break; - case X86II::MRMXm: + case X86II::MRMXmCC: case X86II::MRMXm: case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: @@ -1089,7 +1089,7 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X break; - case X86II::MRMXr: + case X86II::MRMXrCC: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: @@ -1506,6 +1506,15 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, break; } + case X86II::MRMXrCC: { + unsigned RegOp = CurOp++; + + unsigned CC = MI.getOperand(CurOp++).getImm(); + EmitByte(BaseOpcode + CC, CurByte, OS); + EmitRegModRMByte(MI.getOperand(RegOp), 0, CurByte, OS); + break; + } + case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: @@ -1521,6 +1530,17 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, CurByte, OS); break; + case X86II::MRMXmCC: { + unsigned FirstMemOp = CurOp; + CurOp = FirstMemOp + X86::AddrNumOperands; + + unsigned CC = MI.getOperand(CurOp++).getImm(); + EmitByte(BaseOpcode + CC, CurByte, OS); + + emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI); + break; + } + case X86II::MRMXm: case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index bf1a6c6e9e0..9edb2bfa4ef 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1480,8 +1480,8 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. static const uint16_t SETFOpcTable[2][3] = { - { X86::SETEr, X86::SETNPr, X86::AND8rr }, - { X86::SETNEr, X86::SETPr, X86::OR8rr } + { X86::COND_E, X86::COND_NP, X86::AND8rr }, + { X86::COND_NE, X86::COND_P, X86::OR8rr } }; const uint16_t *SETFOpc = nullptr; switch (Predicate) { @@ -1497,10 +1497,10 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), - FlagReg1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), - FlagReg2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr), + FlagReg1).addImm(SETFOpc[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr), + FlagReg2).addImm(SETFOpc[1]); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]), ResultReg).addReg(FlagReg1).addReg(FlagReg2); updateValueMap(I, ResultReg); @@ -1511,7 +1511,6 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { bool SwapArgs; std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); - unsigned Opc = X86::getSETFromCond(CC); if (SwapArgs) std::swap(LHS, RHS); @@ -1520,7 +1519,8 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr), + ResultReg).addImm(CC); updateValueMap(I, ResultReg); return true; } @@ -2047,8 +2047,8 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. static const uint16_t SETFOpcTable[2][3] = { - { X86::SETNPr, X86::SETEr , X86::TEST8rr }, - { X86::SETPr, X86::SETNEr, X86::OR8rr } + { X86::COND_NP, X86::COND_E, X86::TEST8rr }, + { X86::COND_P, X86::COND_NE, X86::OR8rr } }; const uint16_t *SETFOpc = nullptr; switch (Predicate) { @@ -2080,10 +2080,10 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { if (SETFOpc) { unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), - FlagReg1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), - FlagReg2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr), + FlagReg1).addImm(SETFOpc[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr), + FlagReg2).addImm(SETFOpc[1]); auto const &II = TII.get(SETFOpc[2]); if (II.getNumDefs()) { unsigned TmpReg = createResultReg(&X86::GR8RegClass); @@ -2897,21 +2897,21 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { isCommutativeIntrinsic(II)) std::swap(LHS, RHS); - unsigned BaseOpc, CondOpc; + unsigned BaseOpc, CondCode; switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::sadd_with_overflow: - BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break; + BaseOpc = ISD::ADD; CondCode = X86::COND_O; break; case Intrinsic::uadd_with_overflow: - BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; + BaseOpc = ISD::ADD; CondCode = X86::COND_B; break; case Intrinsic::ssub_with_overflow: - BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break; + BaseOpc = ISD::SUB; CondCode = X86::COND_O; break; case Intrinsic::usub_with_overflow: - BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; + BaseOpc = ISD::SUB; CondCode = X86::COND_B; break; case Intrinsic::smul_with_overflow: - BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break; + BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break; case Intrinsic::umul_with_overflow: - BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break; + BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break; } unsigned LHSReg = getRegForValue(LHS); @@ -2928,7 +2928,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { }; if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) && - CondOpc == X86::SETOr) { + CondCode == X86::COND_O) { // We can use INC/DEC. ResultReg = createResultReg(TLI.getRegClassFor(VT)); bool IsDec = BaseOpc == ISD::SUB; @@ -2987,8 +2987,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // Assign to a GPR since the overflow return value is lowered to a SETcc. unsigned ResultReg2 = createResultReg(&X86::GR8RegClass); assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc), - ResultReg2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr), + ResultReg2).addImm(CondCode); updateValueMap(II, ResultReg, 2); return true; diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp index 5bfad71ec05..e2d4d1ede6f 100644 --- a/llvm/lib/Target/X86/X86FixupSetCC.cpp +++ b/llvm/lib/Target/X86/X86FixupSetCC.cpp @@ -67,30 +67,6 @@ char X86FixupSetCCPass::ID = 0; FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } -bool X86FixupSetCCPass::isSetCCr(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case X86::SETOr: - case X86::SETNOr: - case X86::SETBr: - case X86::SETAEr: - case X86::SETEr: - case X86::SETNEr: - case X86::SETBEr: - case X86::SETAr: - case X86::SETSr: - case X86::SETNSr: - case X86::SETPr: - case X86::SETNPr: - case X86::SETLr: - case X86::SETGEr: - case X86::SETLEr: - case X86::SETGr: - return true; - } -} - // We expect the instruction *immediately* before the setcc to imp-def // EFLAGS (because of scheduling glue). To make this less brittle w.r.t // scheduling, look backwards until we hit the beginning of the @@ -128,7 +104,7 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { // Find a setcc that is used by a zext. // This doesn't have to be the only use, the transformation is safe // regardless. - if (!isSetCCr(MI.getOpcode())) + if (MI.getOpcode() != X86::SETCCr) continue; MachineInstr *ZExt = nullptr; diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index 52ae70a60dc..6cf01c988d7 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -601,8 +601,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { // Otherwise we can just rewrite in-place. if (X86::getCondFromCMov(MI) != X86::COND_INVALID) { rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); - } else if (X86::getCondFromSETOpc(MI.getOpcode()) != - X86::COND_INVALID) { + } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) { rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); } else if (MI.getOpcode() == TargetOpcode::COPY) { rewriteCopy(MI, *FlagUse, CopyDefI); @@ -729,7 +728,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( // Scan backwards across the range of instructions with live EFLAGS. for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) { - X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode()); + X86::CondCode Cond = X86::getCondFromSETCC(MI); if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() && TRI->isVirtualRegister(MI.getOperand(0).getReg())) { assert(MI.getOperand(0).isDef() && @@ -750,7 +749,7 @@ unsigned X86FlagsCopyLoweringPass::promoteCondToReg( DebugLoc TestLoc, X86::CondCode Cond) { unsigned Reg = MRI->createVirtualRegister(PromoteRC); auto SetI = BuildMI(TestMBB, TestPos, TestLoc, - TII->get(X86::getSETFromCond(Cond)), Reg); + TII->get(X86::SETCCr), Reg).addImm(Cond); (void)SetI; LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump()); ++NumSetCCsInserted; @@ -1023,7 +1022,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, MachineInstr &SetCCI, MachineOperand &FlagUse, CondRegArray &CondRegs) { - X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode()); + X86::CondCode Cond = X86::getCondFromSETCC(SetCCI); // Note that we can't usefully rewrite this to the inverse without complex // analysis of the users of the setCC. Largely we rely on duplicates which // could have been avoided already being avoided here. diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index eeffc4d7080..273ed6b0fa6 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2326,11 +2326,14 @@ static X86::CondCode getCondFromNode(SDNode *N) { X86::CondCode CC = X86::COND_INVALID; if (CC == X86::COND_INVALID) CC = X86::getCondFromBranchOpc(N->getMachineOpcode()); - if (CC == X86::COND_INVALID) - CC = X86::getCondFromSETOpc(N->getMachineOpcode()); if (CC == X86::COND_INVALID) { unsigned Opc = N->getMachineOpcode(); - if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || Opc == X86::CMOV64rr) + if (Opc == X86::SETCCr) + CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0)); + else if (Opc == X86::SETCCm) + CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5)); + else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || + Opc == X86::CMOV64rr) CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2)); else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm || Opc == X86::CMOV64rm) diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td index 2c41169bf67..099f6aa8d8b 100644 --- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td +++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td @@ -55,69 +55,52 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" } // isCodeGenOnly = 1, ForceDisassemble = 1 -multiclass CMOV_Aliases<string Name, int CC> { - def : InstAlias<Name#"{w}\t{$src, $dst|$dst, $src}", +// SetCC instructions. +let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in { + def SETCCr : I<0x90, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond), + "set${cond}\t$dst", + [(set GR8:$dst, (X86setcc imm:$cond, EFLAGS))]>, + TB, Sched<[WriteSETCC]>; + def SETCCm : I<0x90, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond), + "set${cond}\t$dst", + [(store (X86setcc imm:$cond, EFLAGS), addr:$dst)]>, + TB, Sched<[WriteSETCCStore]>; +} // Uses = [EFLAGS] + +multiclass CMOV_SETCC_Aliases<string Cond, int CC> { + def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}", (CMOV16rr GR16:$dst, GR16:$src, CC), 0>; - def : InstAlias<Name#"{w}\t{$src, $dst|$dst, $src}", + def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}", (CMOV16rm GR16:$dst, i16mem:$src, CC), 0>; - def : InstAlias<Name#"{l}\t{$src, $dst|$dst, $src}", + def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}", (CMOV32rr GR32:$dst, GR32:$src, CC), 0>; - def : InstAlias<Name#"{l}\t{$src, $dst|$dst, $src}", + def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}", (CMOV32rm GR32:$dst, i32mem:$src, CC), 0>; - def : InstAlias<Name#"{q}\t{$src, $dst|$dst, $src}", + def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}", (CMOV64rr GR64:$dst, GR64:$src, CC), 0>; - def : InstAlias<Name#"{q}\t{$src, $dst|$dst, $src}", + def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}", (CMOV64rm GR64:$dst, i64mem:$src, CC), 0>; -} -defm : CMOV_Aliases<"cmovo" , 0>; -defm : CMOV_Aliases<"cmovno", 1>; -defm : CMOV_Aliases<"cmovb" , 2>; -defm : CMOV_Aliases<"cmovae", 3>; -defm : CMOV_Aliases<"cmove" , 4>; -defm : CMOV_Aliases<"cmovne", 5>; -defm : CMOV_Aliases<"cmovbe", 6>; -defm : CMOV_Aliases<"cmova" , 7>; -defm : CMOV_Aliases<"cmovs" , 8>; -defm : CMOV_Aliases<"cmovns", 9>; -defm : CMOV_Aliases<"cmovp" , 10>; -defm : CMOV_Aliases<"cmovnp", 11>; -defm : CMOV_Aliases<"cmovl" , 12>; -defm : CMOV_Aliases<"cmovge", 13>; -defm : CMOV_Aliases<"cmovle", 14>; -defm : CMOV_Aliases<"cmovg" , 15>; - - -// SetCC instructions. -multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { - let Uses = [EFLAGS] in { - def r : I<opc, MRMXr, (outs GR8:$dst), (ins), - !strconcat(Mnemonic, "\t$dst"), - [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, - TB, Sched<[WriteSETCC]>; - def m : I<opc, MRMXm, (outs), (ins i8mem:$dst), - !strconcat(Mnemonic, "\t$dst"), - [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, - TB, Sched<[WriteSETCCStore]>; - } // Uses = [EFLAGS] + def : InstAlias<"set"#Cond#"\t$dst", (SETCCr GR8:$dst, CC), 0>; + def : InstAlias<"set"#Cond#"\t$dst", (SETCCm i8mem:$dst, CC), 0>; } -defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set -defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set -defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than -defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal -defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to -defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to -defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal -defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than -defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set -defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed -defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set -defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set -defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than -defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal -defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal -defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than +defm : CMOV_SETCC_Aliases<"o" , 0>; +defm : CMOV_SETCC_Aliases<"no", 1>; +defm : CMOV_SETCC_Aliases<"b" , 2>; +defm : CMOV_SETCC_Aliases<"ae", 3>; +defm : CMOV_SETCC_Aliases<"e" , 4>; +defm : CMOV_SETCC_Aliases<"ne", 5>; +defm : CMOV_SETCC_Aliases<"be", 6>; +defm : CMOV_SETCC_Aliases<"a" , 7>; +defm : CMOV_SETCC_Aliases<"s" , 8>; +defm : CMOV_SETCC_Aliases<"ns", 9>; +defm : CMOV_SETCC_Aliases<"p" , 10>; +defm : CMOV_SETCC_Aliases<"np", 11>; +defm : CMOV_SETCC_Aliases<"l" , 12>; +defm : CMOV_SETCC_Aliases<"ge", 13>; +defm : CMOV_SETCC_Aliases<"le", 14>; +defm : CMOV_SETCC_Aliases<"g" , 15>; // SALC is an undocumented instruction. Information for this instruction can be found // here http://www.rcollins.org/secrets/opcodes/SALC.html diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 03494f3689f..1fcc33e8724 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -354,7 +354,7 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), // this happens, it is great. However, if we are left with an 8-bit sbb and an // and, we might as well just match it as a setb. def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), - (SETBr)>; + (SETCCr (i8 2))>; // Patterns to give priority when both inputs are zero so that we don't use // an immediate for the RHS. diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp index e0be42b4d4a..59e62da55f2 100644 --- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -322,22 +322,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = { { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD }, { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD }, { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD }, - { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, - { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, - { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, - { X86::SETBr, X86::SETBm, TB_FOLDED_STORE }, - { X86::SETEr, X86::SETEm, TB_FOLDED_STORE }, - { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE }, - { X86::SETGr, X86::SETGm, TB_FOLDED_STORE }, - { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE }, - { X86::SETLr, X86::SETLm, TB_FOLDED_STORE }, - { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE }, - { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE }, - { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE }, - { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE }, - { X86::SETOr, X86::SETOm, TB_FOLDED_STORE }, - { X86::SETPr, X86::SETPm, TB_FOLDED_STORE }, - { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, + { X86::SETCCr, X86::SETCCm, TB_FOLDED_STORE }, { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD }, diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 5b25101b33d..4bb6008ad6f 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -31,6 +31,7 @@ def MRMSrcMem : Format<33>; def MRMSrcMem4VOp3 : Format<34>; def MRMSrcMemOp4 : Format<35>; def MRMSrcMemCC : Format<36>; +def MRMXmCC: Format<38>; def MRMXm : Format<39>; def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>; def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>; @@ -40,6 +41,7 @@ def MRMSrcReg : Format<49>; def MRMSrcReg4VOp3 : Format<50>; def MRMSrcRegOp4 : Format<51>; def MRMSrcRegCC : Format<52>; +def MRMXrCC: Format<54>; def MRMXr : Format<55>; def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>; def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 911b6b0ec5a..ecb8a40b738 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2001,26 +2001,13 @@ X86::CondCode X86::getCondFromBranchOpc(unsigned BrOpc) { } } -/// Return condition code of a SET opcode. -X86::CondCode X86::getCondFromSETOpc(unsigned Opc) { - switch (Opc) { +/// Return condition code of a SETCC opcode. +X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return X86::COND_INVALID; - case X86::SETAr: case X86::SETAm: return X86::COND_A; - case X86::SETAEr: case X86::SETAEm: return X86::COND_AE; - case X86::SETBr: case X86::SETBm: return X86::COND_B; - case X86::SETBEr: case X86::SETBEm: return X86::COND_BE; - case X86::SETEr: case X86::SETEm: return X86::COND_E; - case X86::SETGr: case X86::SETGm: return X86::COND_G; - case X86::SETGEr: case X86::SETGEm: return X86::COND_GE; - case X86::SETLr: case X86::SETLm: return X86::COND_L; - case X86::SETLEr: case X86::SETLEm: return X86::COND_LE; - case X86::SETNEr: case X86::SETNEm: return X86::COND_NE; - case X86::SETNOr: case X86::SETNOm: return X86::COND_NO; - case X86::SETNPr: case X86::SETNPm: return X86::COND_NP; - case X86::SETNSr: case X86::SETNSm: return X86::COND_NS; - case X86::SETOr: case X86::SETOm: return X86::COND_O; - case X86::SETPr: case X86::SETPm: return X86::COND_P; - case X86::SETSr: case X86::SETSm: return X86::COND_S; + case X86::SETCCr: case X86::SETCCm: + return static_cast<X86::CondCode>( + MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); } } @@ -2139,30 +2126,9 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) { return std::make_pair(CC, NeedSwap); } -/// Return a set opcode for the given condition and -/// whether it has memory operand. -unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { - static const uint16_t Opc[16][2] = { - { X86::SETOr, X86::SETOm }, - { X86::SETNOr, X86::SETNOm }, - { X86::SETBr, X86::SETBm }, - { X86::SETAEr, X86::SETAEm }, - { X86::SETEr, X86::SETEm }, - { X86::SETNEr, X86::SETNEm }, - { X86::SETBEr, X86::SETBEm }, - { X86::SETAr, X86::SETAm }, - { X86::SETSr, X86::SETSm }, - { X86::SETNSr, X86::SETNSm }, - { X86::SETPr, X86::SETPm }, - { X86::SETNPr, X86::SETNPm }, - { X86::SETLr, X86::SETLm }, - { X86::SETGEr, X86::SETGEm }, - { X86::SETLEr, X86::SETLEm }, - { X86::SETGr, X86::SETGm }, - }; - - assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes"); - return Opc[CC][HasMemoryOperand ? 1 : 0]; +/// Return a setcc opcode based on whether it has memory operand. +unsigned X86::getSETOpc(bool HasMemoryOperand) { + return HasMemoryOperand ? X86::SETCCr : X86::SETCCm; } /// Return a cmov opcode for the given register size in bytes, and operand type. @@ -3555,7 +3521,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // If we are done with the basic block, we need to check whether EFLAGS is // live-out. bool IsSafe = false; - SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate; + SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate; MachineBasicBlock::iterator E = CmpInstr.getParent()->end(); for (++I; I != E; ++I) { const MachineInstr &Instr = *I; @@ -3572,16 +3538,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // EFLAGS is used by this instruction. X86::CondCode OldCC = X86::COND_INVALID; - bool OpcIsSET = false; if (IsCmpZero || IsSwapped) { // We decode the condition code from opcode. if (Instr.isBranch()) OldCC = X86::getCondFromBranchOpc(Instr.getOpcode()); else { - OldCC = X86::getCondFromSETOpc(Instr.getOpcode()); - if (OldCC != X86::COND_INVALID) - OpcIsSET = true; - else + OldCC = X86::getCondFromSETCC(Instr); + if (OldCC == X86::COND_INVALID) OldCC = X86::getCondFromCMov(Instr); } if (OldCC == X86::COND_INVALID) return false; @@ -3627,21 +3590,10 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, } if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) { - // Synthesize the new opcode. - bool HasMemoryOperand = Instr.hasOneMemOperand(); - unsigned NewOpc; - if (Instr.isBranch()) - NewOpc = GetCondBranchFromCond(ReplacementCC); - else if(OpcIsSET) - NewOpc = getSETFromCond(ReplacementCC, HasMemoryOperand); - else { - NewOpc = ReplacementCC; - } - // Push the MachineInstr to OpsToUpdate. // If it is safe to remove CmpInstr, the condition code of these // instructions will be modified. - OpsToUpdate.push_back(std::make_pair(&*I, NewOpc)); + OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC)); } if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { // It is safe to remove CmpInstr if EFLAGS is updated again or killed. @@ -3696,11 +3648,11 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // Modify the condition code of instructions in OpsToUpdate. for (auto &Op : OpsToUpdate) { - if (X86::getCondFromCMov(*Op.first) != X86::COND_INVALID) + if (Op.first->isBranch()) + Op.first->setDesc(get(GetCondBranchFromCond(Op.second))); + else Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1) .setImm(Op.second); - else - Op.first->setDesc(get(Op.second)); } return true; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index b0e83527247..2203cd908c9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -42,9 +42,8 @@ unsigned GetCondBranchFromCond(CondCode CC); /// the instruction operands should be swaped to match the condition code. std::pair<CondCode, bool> getX86ConditionCode(CmpInst::Predicate Predicate); -/// Return a set opcode for the given condition and whether it has -/// a memory operand. -unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); +/// Return a setcc opcode based on whether it has a memory operand. +unsigned getSETOpc(bool HasMemoryOperand = false); /// Return a cmov opcode for the given register size in bytes, and operand type. unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false); @@ -53,7 +52,7 @@ unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false); CondCode getCondFromBranchOpc(unsigned Opc); // Turn setCC opcode into condition code. -CondCode getCondFromSETOpc(unsigned Opc); +CondCode getCondFromSETCC(const MachineInstr &MI); // Turn CMov opcode into condition code. CondCode getCondFromCMov(const MachineInstr &MI); diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp index 90ae6bb4df5..c105b4e63d9 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -947,7 +947,6 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I, bool SwapArgs; std::tie(CC, SwapArgs) = X86::getX86ConditionCode( (CmpInst::Predicate)I.getOperand(1).getPredicate()); - unsigned OpSet = X86::getSETFromCond(CC); unsigned LHS = I.getOperand(2).getReg(); unsigned RHS = I.getOperand(3).getReg(); @@ -981,7 +980,7 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I, .addReg(RHS); MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(OpSet), I.getOperand(0).getReg()); + TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC); constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI); @@ -1002,8 +1001,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. static const uint16_t SETFOpcTable[2][3] = { - {X86::SETEr, X86::SETNPr, X86::AND8rr}, - {X86::SETNEr, X86::SETPr, X86::OR8rr}}; + {X86::COND_E, X86::COND_NP, X86::AND8rr}, + {X86::COND_NE, X86::COND_P, X86::OR8rr}}; const uint16_t *SETFOpc = nullptr; switch (Predicate) { default: @@ -1043,9 +1042,9 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(SETFOpc[0]), FlagReg1); + TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]); MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(SETFOpc[1]), FlagReg2); + TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]); MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SETFOpc[2]), ResultReg) .addReg(FlagReg1) @@ -1063,7 +1062,6 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, bool SwapArgs; std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); - unsigned Opc = X86::getSETFromCond(CC); if (SwapArgs) std::swap(LhsReg, RhsReg); @@ -1075,7 +1073,7 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, .addReg(RhsReg); MachineInstr &Set = - *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc), ResultReg); + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC); constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); constrainSelectedInstRegOperands(Set, TII, TRI, RBI); I.eraseFromParent(); diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 4e76a8e2a6e..cd4358dc227 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -736,7 +736,6 @@ def: InstRW<[BWWriteResGroup20], (instrs CWD, ADC16i16, SBB16i16, ADC32i32, SBB32i32, ADC64i32, SBB64i32)>; -def: InstRW<[BWWriteResGroup20], (instregex "SET(A|BE)r")>; def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> { let Latency = 2; @@ -815,7 +814,6 @@ def BWWriteResGroup38 : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> { let ResourceCycles = [1,1,1,1]; } def: InstRW<[BWWriteResGroup38], (instrs CALL64pcrel32)>; -def: InstRW<[BWWriteResGroup38], (instregex "SET(A|BE)m")>; def BWWriteResGroup39 : SchedWriteRes<[BWPort0,BWPort1]> { let Latency = 4; @@ -1627,4 +1625,30 @@ def BWCMOVA_CMOVBErm : SchedWriteVariant<[ def : InstRW<[BWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; def : InstRW<[BWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; +// SETCCs that use both Z and C flag require an extra uop. +def BWWriteSETA_SETBEr : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 2; + let ResourceCycles = [1,1]; + let NumMicroOps = 2; +} + +def BWWriteSETA_SETBEm : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> { + let Latency = 3; + let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 4; +} + +def BWSETA_SETBErr : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [BWWriteSETA_SETBEr]>, + SchedVar<NoSchedPred, [WriteSETCC]> +]>; + +def BWSETA_SETBErm : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [BWWriteSETA_SETBEm]>, + SchedVar<NoSchedPred, [WriteSETCCStore]> +]>; + +def : InstRW<[BWSETA_SETBErr], (instrs SETCCr)>; +def : InstRW<[BWSETA_SETBErm], (instrs SETCCm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index a8855f0f39a..1592a1169da 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -1126,7 +1126,6 @@ def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> { let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup35], (instrs CWD, JCXZ, JECXZ, JRCXZ)>; -def: InstRW<[HWWriteResGroup35], (instregex "SET(A|BE)r")>; def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 7; @@ -1172,7 +1171,6 @@ def HWWriteResGroup45 : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> { let ResourceCycles = [1,1,1,1]; } def: InstRW<[HWWriteResGroup45], (instrs CALL64pcrel32)>; -def: InstRW<[HWWriteResGroup45], (instregex "SET(A|BE)m")>; def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> { let Latency = 8; @@ -1911,4 +1909,30 @@ def HWCMOVA_CMOVBErm : SchedWriteVariant<[ def : InstRW<[HWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; def : InstRW<[HWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; +// SETCCs that use both Z and C flag require an extra uop. +def HWWriteSETA_SETBEr : SchedWriteRes<[HWPort06,HWPort0156]> { + let Latency = 2; + let ResourceCycles = [1,1]; + let NumMicroOps = 2; +} + +def HWWriteSETA_SETBEm : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> { + let Latency = 3; + let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 4; +} + +def HWSETA_SETBErr : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [HWWriteSETA_SETBEr]>, + SchedVar<NoSchedPred, [WriteSETCC]> +]>; + +def HWSETA_SETBErm : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [HWWriteSETA_SETBEm]>, + SchedVar<NoSchedPred, [WriteSETCCStore]> +]>; + +def : InstRW<[HWSETA_SETBErr], (instrs SETCCr)>; +def : InstRW<[HWSETA_SETBErm], (instrs SETCCm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedPredicates.td b/llvm/lib/Target/X86/X86SchedPredicates.td index c1e5ad0834d..41bd776648f 100644 --- a/llvm/lib/Target/X86/X86SchedPredicates.td +++ b/llvm/lib/Target/X86/X86SchedPredicates.td @@ -72,3 +72,15 @@ def IsCMOVArm_Or_CMOVBErm : CheckAny<[ CheckImmOperand_s<7, "X86::COND_A">, CheckImmOperand_s<7, "X86::COND_BE"> ]>; + +// A predicate to check for COND_A and COND_BE SETCCs which have an extra uop +// on recent Intel CPUs. +def IsSETAr_Or_SETBEr : CheckAny<[ + CheckImmOperand_s<1, "X86::COND_A">, + CheckImmOperand_s<1, "X86::COND_BE"> +]>; + +def IsSETAm_Or_SETBEm : CheckAny<[ + CheckImmOperand_s<5, "X86::COND_A">, + CheckImmOperand_s<5, "X86::COND_BE"> +]>; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 234f3dcb552..842d67b5c82 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -615,13 +615,6 @@ def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr, MMX_PSIGNDrr, MMX_PSIGNWrr)>; -def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup9], (instregex "SET(A|BE)r")>; - def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> { let Latency = 2; let NumMicroOps = 2; @@ -772,13 +765,6 @@ def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> { } def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>; -def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { - let Latency = 3; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup43], (instregex "SET(A|BE)m")>; - def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> { let Latency = 5; let NumMicroOps = 4; @@ -1198,4 +1184,30 @@ def SBCMOVA_CMOVBErm : SchedWriteVariant<[ def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; +// SETCCs that use both Z and C flag require an extra uop. +def SBWriteSETA_SETBEr : SchedWriteRes<[SBPort05]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SBWriteSETA_SETBEm : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { + let Latency = 3; + let ResourceCycles = [1,1,2]; + let NumMicroOps = 4; +} + +def SBSETA_SETBErr : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SBWriteSETA_SETBEr]>, + SchedVar<NoSchedPred, [WriteSETCC]> +]>; + +def SBSETA_SETBErm : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SBWriteSETA_SETBEm]>, + SchedVar<NoSchedPred, [WriteSETCCStore]> +]>; + +def : InstRW<[SBSETA_SETBErr], (instrs SETCCr)>; +def : InstRW<[SBSETA_SETBErm], (instrs SETCCm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 87dc88f482f..1119fd3fc11 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -698,13 +698,6 @@ def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> { def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP, MMX_MOVDQ2Qrr)>; -def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SKLWriteResGroup15], (instregex "SET(A|BE)r")>; - def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -840,13 +833,6 @@ def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> { } def: InstRW<[SKLWriteResGroup43], (instrs FNSTSWm)>; -def SKLWriteResGroup44 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { - let Latency = 3; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKLWriteResGroup44], (instregex "SET(A|BE)m")>; - def SKLWriteResGroup45 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> { let Latency = 3; let NumMicroOps = 4; @@ -1782,4 +1768,30 @@ def SKLCMOVA_CMOVBErm : SchedWriteVariant<[ def : InstRW<[SKLCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; def : InstRW<[SKLCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; +// SETCCs that use both Z and C flag require an extra uop. +def SKLWriteSETA_SETBEr : SchedWriteRes<[SKLPort06]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SKLWriteSETA_SETBEm : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { + let Latency = 3; + let ResourceCycles = [1,1,2]; + let NumMicroOps = 4; +} + +def SKLSETA_SETBErr : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKLWriteSETA_SETBEr]>, + SchedVar<NoSchedPred, [WriteSETCC]> +]>; + +def SKLSETA_SETBErm : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKLWriteSETA_SETBEm]>, + SchedVar<NoSchedPred, [WriteSETCCStore]> +]>; + +def : InstRW<[SKLSETA_SETBErr], (instrs SETCCr)>; +def : InstRW<[SKLSETA_SETBErm], (instrs SETCCm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index b532e7ae817..e3456073de3 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -722,13 +722,6 @@ def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> { def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP, MMX_MOVDQ2Qrr)>; -def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SKXWriteResGroup15], (instregex "SET(A|BE)r")>; - def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -901,13 +894,6 @@ def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> { } def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>; -def SKXWriteResGroup46 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { - let Latency = 3; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKXWriteResGroup46], (instregex "SET(A|BE)m")>; - def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> { let Latency = 3; let NumMicroOps = 4; @@ -2498,4 +2484,30 @@ def SKXCMOVA_CMOVBErm : SchedWriteVariant<[ def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; +// SETCCs that use both Z and C flag require an extra uop. +def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { + let Latency = 3; + let ResourceCycles = [1,1,2]; + let NumMicroOps = 4; +} + +def SKXSETA_SETBErr : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKXWriteSETA_SETBEr]>, + SchedVar<NoSchedPred, [WriteSETCC]> +]>; + +def SKXSETA_SETBErm : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKXWriteSETA_SETBEm]>, + SchedVar<NoSchedPred, [WriteSETCCStore]> +]>; + +def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>; +def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>; + } // SchedModel diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td index 6e11ac25172..36f4ad37e6e 100644 --- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td @@ -472,8 +472,15 @@ def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { let ResourceCycles = [2]; let NumMicroOps = 2; } -def : InstRW<[PdWriteSETGEmSETGmSETLEmSETLm], (instrs SETGEm, SETGm, - SETLEm, SETLm)>; + +def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, + SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, + SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, + SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, + SchedVar<NoSchedPred, [WriteSETCCStore]> +]>; +def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>; defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [], 2>; |