summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp110
1 files changed, 90 insertions, 20 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 714007f8aba..15d908d0797 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4198,6 +4198,40 @@ static MachineInstr *genFusedMultiplyAcc(
FMAInstKind::Accumulator);
}
+/// genNeg - Helper to generate an intermediate negation of the second operand
+/// of Root
+static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ unsigned MnegOpc, const TargetRegisterClass *RC) {
+ Register NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB);
+
+ assert(InstrIdxForVirtReg.empty());
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ return NewVR;
+}
+
+/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
+/// instructions with an additional negation of the accumulator
+static MachineInstr *genFusedMultiplyAccNeg(
+ MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
+ MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
+ unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
+ assert(IdxMulOpd == 1);
+
+ Register NewVR =
+ genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
+ return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+}
+
/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
/// instructions.
///
@@ -4210,6 +4244,22 @@ static MachineInstr *genFusedMultiplyIdx(
FMAInstKind::Indexed);
}
+/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
+/// instructions with an additional negation of the accumulator
+static MachineInstr *genFusedMultiplyIdxNeg(
+ MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
+ MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
+ unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
+ assert(IdxMulOpd == 1);
+
+ Register NewVR =
+ genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
+
+ return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
+ FMAInstKind::Indexed, &NewVR);
+}
+
/// genMaddR - Generate madd instruction and combine mul and add using
/// an extra virtual register
/// Example - an ADD intermediate needs to be stored in a register:
@@ -4512,9 +4562,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
break;
case MachineCombinerPattern::MULSUBv8i8_OP1:
- Opc = AArch64::MLSv8i8;
+ Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
- MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
+ RC);
break;
case MachineCombinerPattern::MULSUBv8i8_OP2:
Opc = AArch64::MLSv8i8;
@@ -4522,9 +4574,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv16i8_OP1:
- Opc = AArch64::MLSv16i8;
+ Opc = AArch64::MLAv16i8;
RC = &AArch64::FPR128RegClass;
- MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
+ RC);
break;
case MachineCombinerPattern::MULSUBv16i8_OP2:
Opc = AArch64::MLSv16i8;
@@ -4532,9 +4586,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv4i16_OP1:
- Opc = AArch64::MLSv4i16;
+ Opc = AArch64::MLAv4i16;
RC = &AArch64::FPR64RegClass;
- MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
+ RC);
break;
case MachineCombinerPattern::MULSUBv4i16_OP2:
Opc = AArch64::MLSv4i16;
@@ -4542,9 +4598,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv8i16_OP1:
- Opc = AArch64::MLSv8i16;
+ Opc = AArch64::MLAv8i16;
RC = &AArch64::FPR128RegClass;
- MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
+ RC);
break;
case MachineCombinerPattern::MULSUBv8i16_OP2:
Opc = AArch64::MLSv8i16;
@@ -4552,9 +4610,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv2i32_OP1:
- Opc = AArch64::MLSv2i32;
+ Opc = AArch64::MLAv2i32;
RC = &AArch64::FPR64RegClass;
- MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
+ RC);
break;
case MachineCombinerPattern::MULSUBv2i32_OP2:
Opc = AArch64::MLSv2i32;
@@ -4562,9 +4622,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv4i32_OP1:
- Opc = AArch64::MLSv4i32;
+ Opc = AArch64::MLAv4i32;
RC = &AArch64::FPR128RegClass;
- MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
+ RC);
break;
case MachineCombinerPattern::MULSUBv4i32_OP2:
Opc = AArch64::MLSv4i32;
@@ -4614,9 +4676,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
break;
case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
- Opc = AArch64::MLSv4i16_indexed;
+ Opc = AArch64::MLAv4i16_indexed;
RC = &AArch64::FPR64RegClass;
- MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
+ RC);
break;
case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
Opc = AArch64::MLSv4i16_indexed;
@@ -4624,9 +4688,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
- Opc = AArch64::MLSv8i16_indexed;
+ Opc = AArch64::MLAv8i16_indexed;
RC = &AArch64::FPR128RegClass;
- MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
+ RC);
break;
case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
Opc = AArch64::MLSv8i16_indexed;
@@ -4634,9 +4700,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
- Opc = AArch64::MLSv2i32_indexed;
+ Opc = AArch64::MLAv2i32_indexed;
RC = &AArch64::FPR64RegClass;
- MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
+ RC);
break;
case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
Opc = AArch64::MLSv2i32_indexed;
@@ -4644,9 +4712,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
- Opc = AArch64::MLSv4i32_indexed;
+ Opc = AArch64::MLAv4i32_indexed;
RC = &AArch64::FPR128RegClass;
- MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
+ RC);
break;
case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
Opc = AArch64::MLSv4i32_indexed;
OpenPOWER on IntegriCloud