summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
authorFlorian Hahn <florian.hahn@arm.com>2017-12-06 22:48:36 +0000
committerFlorian Hahn <florian.hahn@arm.com>2017-12-06 22:48:36 +0000
commit5d6a4e43bac17b047d0e3fb60a56db621385f265 (patch)
treeaf855040ced223e0358d658d1247e15064181adb /llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
parenta502ee73c49b2c314e8340b07f38f183fc6b19df (diff)
downloadbcm5719-llvm-5d6a4e43bac17b047d0e3fb60a56db621385f265.tar.gz
bcm5719-llvm-5d6a4e43bac17b047d0e3fb60a56db621385f265.zip
[AArch64] Add patterns to replace fsub fmul with fma fneg.
Summary: This patch adds MachineCombiner patterns for transforming (fsub (fmul x y) z) into (fma x y (fneg z)). This has a lower latency on micro architectures where fneg is cheap. Patch based on work by George Steed. Reviewers: rengolin, joelkevinjones, joel_k_jones, evandro, efriedma Reviewed By: evandro Subscribers: aemerson, javed.absar, llvm-commits, kristof.beyls Differential Revision: https://reviews.llvm.org/D40306 llvm-svn: 319980
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp105
1 files changed, 102 insertions, 3 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bc3c0a4a60e..dacb19330c1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3672,6 +3672,15 @@ static bool getFMAPatterns(MachineInstr &Root,
}
break;
case AArch64::FSUBv2f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv2i32_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
@@ -3683,6 +3692,15 @@ static bool getFMAPatterns(MachineInstr &Root,
}
break;
case AArch64::FSUBv2f64:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2f64)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv2i64_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
@@ -3694,6 +3712,15 @@ static bool getFMAPatterns(MachineInstr &Root,
}
break;
case AArch64::FSUBv4f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv4i32_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
@@ -3790,12 +3817,15 @@ enum class FMAInstKind { Default, Indexed, Accumulator };
/// \param MaddOpc the opcode fo the f|madd instruction
/// \param RC Register class of operands
/// \param kind of fma instruction (addressing mode) to be generated
+/// \param ReplacedAddend is the result register from the instruction
+/// replacing the non-combined operand, if any.
static MachineInstr *
genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
unsigned MaddOpc, const TargetRegisterClass *RC,
- FMAInstKind kind = FMAInstKind::Default) {
+ FMAInstKind kind = FMAInstKind::Default,
+ const unsigned *ReplacedAddend = nullptr) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
@@ -3805,8 +3835,17 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
bool Src0IsKill = MUL->getOperand(1).isKill();
unsigned SrcReg1 = MUL->getOperand(2).getReg();
bool Src1IsKill = MUL->getOperand(2).isKill();
- unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
- bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+
+ unsigned SrcReg2;
+ bool Src2IsKill;
+ if (ReplacedAddend) {
+ // If we just generated a new addend, we must be it's only use.
+ SrcReg2 = *ReplacedAddend;
+ Src2IsKill = true;
+ } else {
+ SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
+ Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+ }
if (TargetRegisterInfo::isVirtualRegister(ResultReg))
MRI.constrainRegClass(ResultReg, RC);
@@ -4326,6 +4365,66 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
FMAInstKind::Accumulator);
}
break;
+ case MachineCombinerPattern::FMLSv2f32_OP1:
+ case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
+ RC = &AArch64::FPR64RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
+ Opc = AArch64::FMLAv2i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv2f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
+ case MachineCombinerPattern::FMLSv4f32_OP1:
+ case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
+ RC = &AArch64::FPR128RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
+ Opc = AArch64::FMLAv4i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv4f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
+ case MachineCombinerPattern::FMLSv2f64_OP1:
+ case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
+ RC = &AArch64::FPR128RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
+ Opc = AArch64::FMLAv2i64_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv2f64;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
OpenPOWER on IntegriCloud