diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 110 |
1 files changed, 90 insertions, 20 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 714007f8aba..15d908d0797 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4198,6 +4198,40 @@ static MachineInstr *genFusedMultiplyAcc( FMAInstKind::Accumulator); } +/// genNeg - Helper to generate an intermediate negation of the second operand +/// of Root +static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + unsigned MnegOpc, const TargetRegisterClass *RC) { + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB = + BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB); + + assert(InstrIdxForVirtReg.empty()); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + + return NewVR; +} + +/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate +/// instructions with an additional negation of the accumulator +static MachineInstr *genFusedMultiplyAccNeg( + MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, + MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, + unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { + assert(IdxMulOpd == 1); + + Register NewVR = + genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); + return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, + FMAInstKind::Accumulator, &NewVR); +} + /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate /// instructions. /// @@ -4210,6 +4244,22 @@ static MachineInstr *genFusedMultiplyIdx( FMAInstKind::Indexed); } +/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate +/// instructions with an additional negation of the accumulator +static MachineInstr *genFusedMultiplyIdxNeg( + MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, + MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, + unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { + assert(IdxMulOpd == 1); + + Register NewVR = + genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); + + return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, + FMAInstKind::Indexed, &NewVR); +} + /// genMaddR - Generate madd instruction and combine mul and add using /// an extra virtual register /// Example - an ADD intermediate needs to be stored in a register: @@ -4512,9 +4562,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( break; case MachineCombinerPattern::MULSUBv8i8_OP1: - Opc = AArch64::MLSv8i8; + Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; - MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8, + RC); break; case MachineCombinerPattern::MULSUBv8i8_OP2: Opc = AArch64::MLSv8i8; @@ -4522,9 +4574,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBv16i8_OP1: - Opc = AArch64::MLSv16i8; + Opc = AArch64::MLAv16i8; RC = &AArch64::FPR128RegClass; - MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8, + RC); break; case MachineCombinerPattern::MULSUBv16i8_OP2: Opc = AArch64::MLSv16i8; @@ -4532,9 +4586,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBv4i16_OP1: - Opc = AArch64::MLSv4i16; + Opc = AArch64::MLAv4i16; RC = &AArch64::FPR64RegClass; - MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, + RC); break; case MachineCombinerPattern::MULSUBv4i16_OP2: Opc = AArch64::MLSv4i16; @@ -4542,9 +4598,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBv8i16_OP1: - Opc = AArch64::MLSv8i16; + Opc = AArch64::MLAv8i16; RC = &AArch64::FPR128RegClass; - MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, + RC); break; case MachineCombinerPattern::MULSUBv8i16_OP2: Opc = AArch64::MLSv8i16; @@ -4552,9 +4610,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBv2i32_OP1: - Opc = AArch64::MLSv2i32; + Opc = AArch64::MLAv2i32; RC = &AArch64::FPR64RegClass; - MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, + RC); break; case MachineCombinerPattern::MULSUBv2i32_OP2: Opc = AArch64::MLSv2i32; @@ -4562,9 +4622,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBv4i32_OP1: - Opc = AArch64::MLSv4i32; + Opc = AArch64::MLAv4i32; RC = &AArch64::FPR128RegClass; - MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, + RC); break; case MachineCombinerPattern::MULSUBv4i32_OP2: Opc = AArch64::MLSv4i32; @@ -4614,9 +4676,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( break; case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: - Opc = AArch64::MLSv4i16_indexed; + Opc = AArch64::MLAv4i16_indexed; RC = &AArch64::FPR64RegClass; - MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, + RC); break; case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: Opc = AArch64::MLSv4i16_indexed; @@ -4624,9 +4688,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: - Opc = AArch64::MLSv8i16_indexed; + Opc = AArch64::MLAv8i16_indexed; RC = &AArch64::FPR128RegClass; - MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, + RC); break; case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: Opc = AArch64::MLSv8i16_indexed; @@ -4634,9 +4700,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: - Opc = AArch64::MLSv2i32_indexed; + Opc = AArch64::MLAv2i32_indexed; RC = &AArch64::FPR64RegClass; - MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, + RC); break; case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: Opc = AArch64::MLSv2i32_indexed; @@ -4644,9 +4712,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: - Opc = AArch64::MLSv4i32_indexed; + Opc = AArch64::MLAv4i32_indexed; RC = &AArch64::FPR128RegClass; - MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, + InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, + RC); break; case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: Opc = AArch64::MLSv4i32_indexed; |