diff options
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 103 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SchedM1.td | 37 |
3 files changed, 97 insertions, 46 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 07a4f8c2c36..e9db4fa324c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -673,8 +673,9 @@ static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { if (!Subtarget.hasCustomCheapAsMoveHandling()) return MI.isAsCheapAsAMove(); - - unsigned Imm; + if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 && + isExynosShiftLeftFast(MI)) + return true; switch (MI.getOpcode()) { default: @@ -685,17 +686,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { case AArch64::ADDXri: case AArch64::SUBWri: case AArch64::SUBXri: - return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 || - MI.getOperand(3).getImm() == 0); - - // add/sub on register with shift - case AArch64::ADDWrs: - case AArch64::ADDXrs: - case AArch64::SUBWrs: - case AArch64::SUBXrs: - Imm = MI.getOperand(3).getImm(); - return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 && - AArch64_AM::getArithShiftValue(Imm) < 4); + return (MI.getOperand(3).getImm() == 0); // logical ops on immediate case AArch64::ANDWri: @@ -721,24 +712,6 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { case AArch64::ORRXrr: return true; - // logical ops on register with shift - case AArch64::ANDWrs: - case AArch64::ANDXrs: - case AArch64::BICWrs: - case AArch64::BICXrs: - case AArch64::EONWrs: - case AArch64::EONXrs: - case AArch64::EORWrs: - case AArch64::EORXrs: - case AArch64::ORNWrs: - case AArch64::ORNXrs: - case AArch64::ORRWrs: - case AArch64::ORRXrs: - Imm = MI.getOperand(3).getImm(); - return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 && - AArch64_AM::getShiftValue(Imm) < 4 && - AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL); - // If MOVi32imm or MOVi64imm can be expanded into ORRWri or // ORRXri, it is as cheap as MOV case AArch64::MOVi32imm: @@ -761,6 +734,74 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { llvm_unreachable("Unknown opcode to check as cheap as a move!"); } +bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const { + unsigned Imm, Shift; + + switch (MI.getOpcode()) { + default: + return false; + + // WriteI + case AArch64::ADDSWri: + case AArch64::ADDSXri: + case AArch64::ADDWri: + case AArch64::ADDXri: + case AArch64::SUBSWri: + case AArch64::SUBSXri: + case AArch64::SUBWri: + case AArch64::SUBXri: + return true; + + // WriteISReg + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ANDSWrs: + case AArch64::ANDSXrs: + case AArch64::ANDWrs: + case AArch64::ANDXrs: + case AArch64::BICSWrs: + case AArch64::BICSXrs: + case AArch64::BICWrs: + case AArch64::BICXrs: + case AArch64::EONWrs: + case AArch64::EONXrs: + case AArch64::EORWrs: + case AArch64::EORXrs: + case AArch64::ORNWrs: + case AArch64::ORNXrs: + case AArch64::ORRWrs: + case AArch64::ORRXrs: + case AArch64::SUBSWrs: + case AArch64::SUBSXrs: + case AArch64::SUBWrs: + case AArch64::SUBXrs: + Imm = MI.getOperand(3).getImm(); + Shift = AArch64_AM::getShiftValue(Imm); + return (Shift == 0 || + (Shift <= 3 && AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL)); + + // WriteIEReg + case AArch64::ADDSWrx: + case AArch64::ADDSXrx: + case AArch64::ADDSXrx64: + case AArch64::ADDWrx: + case AArch64::ADDXrx: + case AArch64::ADDXrx64: + case AArch64::SUBSWrx: + case AArch64::SUBSXrx: + case AArch64::SUBSXrx64: + case AArch64::SUBWrx: + case AArch64::SUBXrx: + case AArch64::SUBXrx64: + Imm = MI.getOperand(3).getImm(); + Shift = AArch64_AM::getArithShiftValue(Imm); + return (Shift == 0 || + (Shift <= 3 && AArch64_AM::getExtendType(Imm) == AArch64_AM::UXTX)); + } +} + bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index faf832657fd..689a24b2f24 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -367,6 +367,9 @@ public: insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, unsigned CallClass) const override; + /// Returns true if the instruction has a shift left that can be executed + /// more efficiently. + bool isExynosShiftLeftFast(const MachineInstr &MI) const; /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. bool isFalkorShiftExtFast(const MachineInstr &MI) const; diff --git a/llvm/lib/Target/AArch64/AArch64SchedM1.td b/llvm/lib/Target/AArch64/AArch64SchedM1.td index 708ccdf3d02..0051136ba81 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedM1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedM1.td @@ -62,16 +62,25 @@ let SchedModel = ExynosM1Model in { let SchedModel = ExynosM1Model in { //===----------------------------------------------------------------------===// -// Coarse scheduling model for the Exynos-M1. +// Predicates. + +def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>; + +//===----------------------------------------------------------------------===// +// Coarse scheduling model. def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; } def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; } +def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2; + let ResourceCycles = [2]; } +def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>, + SchedVar<NoSchedPred, [M1WriteAA]>]>; def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; } def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } -def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; } +def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; } -def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; } +def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; } def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteL5, M1WriteA1]>, SchedVar<NoSchedPred, [M1WriteL5]>]>; @@ -85,7 +94,6 @@ def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteS2, def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>, SchedVar<NoSchedPred, [ReadDefault]>]>; -def : SchedAlias<ReadAdrBase, M1ReadAdrBase>; // Branch instructions. // NOTE: Unconditional direct branches actually take neither cycles nor units. @@ -94,7 +102,6 @@ def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; } // Arithmetic and logical integer instructions. def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; } -// TODO: Shift over 3 and some extensions take 2 cycles. def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; } def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; } def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; } @@ -110,7 +117,6 @@ def : WriteRes<WriteID64, [M1UnitC, M1UnitD]> { let Latency = 21; let ResourceCycles = [1, 21]; } // TODO: Long multiplication take 5 cycles and also the ALU. -// TODO: Multiplication with accumulation can be advanced. def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; } // TODO: 64-bit multiplication has a throughput of 1/2. def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; } @@ -119,8 +125,10 @@ def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; } def : WriteRes<WriteExtr, [M1UnitALU, M1UnitALU]> { let Latency = 2; } +// Addressing modes. // TODO: The latency for the post or pre register is 1 cycle. def : WriteRes<WriteAdr, []> { let Latency = 0; } +def : SchedAlias<ReadAdrBase, M1ReadAdrBase>; // Load instructions. def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; } @@ -164,12 +172,10 @@ def : WriteRes<WriteSys, []> { let Latency = 1; } // Generic fast forwarding. // TODO: Add FP register forwarding rules. - def : ReadAdvance<ReadI, 0>; def : ReadAdvance<ReadISReg, 0>; def : ReadAdvance<ReadIEReg, 0>; def : ReadAdvance<ReadIM, 0>; -// Integer multiply-accumulate. // TODO: The forwarding for WriteIM64 saves actually 3 cycles. def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>; def : ReadAdvance<ReadID, 0>; @@ -178,7 +184,7 @@ def : ReadAdvance<ReadAdrBase, 0>; def : ReadAdvance<ReadVLD, 0>; //===----------------------------------------------------------------------===// -// Finer scheduling model for the Exynos-M1. +// Finer scheduling model. def M1WriteNEONA : SchedWriteRes<[M1UnitNALU, M1UnitNALU, @@ -287,7 +293,6 @@ def M1WriteVLDN : SchedWriteRes<[M1UnitL, M1UnitL, M1UnitL]> { let Latency = 14; let ResourceCycles = [7]; } - def M1WriteVSTA : WriteSequence<[WriteVST], 2>; def M1WriteVSTB : WriteSequence<[WriteVST], 3>; def M1WriteVSTC : WriteSequence<[WriteVST], 4>; @@ -340,7 +345,6 @@ def M1WriteVSTI : SchedWriteRes<[M1UnitNALU, // Branch instructions def : InstRW<[M1WriteB1], (instrs Bcc)>; -// NOTE: Conditional branch and link adds a B uop. def : InstRW<[M1WriteA1], (instrs BL)>; // NOTE: Indirect branch and link with LR adds an ALU uop. def : InstRW<[M1WriteA1, @@ -351,6 +355,7 @@ def : InstRW<[M1WriteC1, // Arithmetic and logical integer instructions. def : InstRW<[M1WriteA1], (instrs COPY)>; +def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>; // Divide and multiply instructions. @@ -413,10 +418,12 @@ def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>; def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>; def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>; def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>; -def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>; -def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>; -def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>; -def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>; +def : InstRW<[M1WriteNALU1], (instregex "^SHL[dv]")>; +def : InstRW<[M1WriteNALU1], (instregex "^[SU]SH[LR][dv]")>; +def : InstRW<[M1WriteNALU1], (instregex "^S[RS]I[dv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]RSH[LR][dv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]QR?SHLU?[bdhsv]")>; // ASIMD FP instructions. def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>; |

