diff options
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 93 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrThumb2.td | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/longMAC.ll | 29 |
6 files changed, 162 insertions, 10 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index a6087d663a3..7360210ff99 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2939,7 +2939,47 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } } + case ARMISD::UMAAL: { + unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3), + getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); + return; + } case ARMISD::UMLAL:{ + // UMAAL is similar to UMLAL but it adds two 32-bit values to the + // 64-bit multiplication result. + if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC && + N->getOperand(3).getOpcode() == ARMISD::ADDE) { + + SDValue Addc = N->getOperand(2); + SDValue Adde = N->getOperand(3); + + if (Adde.getOperand(2).getNode() == Addc.getNode()) { + + ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0)); + ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1)); + + if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0) + { + // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm + // RdLo = one operand to be added, lower 32-bits of res + // RdHi = other operand to be added, upper 32-bits of res + // Rn = first multiply operand + // Rm = second multiply operand + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + Addc.getOperand(0), Addc.getOperand(1), + getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; + CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops); + return; + } + } + } + if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 18a583ffc7b..26b4a1c98df 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1212,6 +1212,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; + case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; @@ -8686,11 +8687,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb1Only()) return SDValue(); - - // Only perform the checks after legalize when the pattern is available. - if (DCI.isBeforeLegalize()) return SDValue(); - // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is @@ -8818,14 +8814,97 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, return resNode; } +static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // UMAAL is similar to UMLAL except that it adds two unsigned values. + // While trying to combine for the other MLAL nodes, first search for the + // chance to use UMAAL. Check if Addc uses another addc node which can first + // be combined into a UMLAL. The other pattern is AddcNode being combined + // into an UMLAL and then using another addc is handled in ISelDAGToDAG. + + if (!Subtarget->hasV6Ops()) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + SDNode *PrevAddc = nullptr; + if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC) + PrevAddc = AddcNode->getOperand(0).getNode(); + else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC) + PrevAddc = AddcNode->getOperand(1).getNode(); + + // If there's no addc chains, just return a search for any MLAL. + if (PrevAddc == nullptr) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + // Try to convert the addc operand to an MLAL and if that fails try to + // combine AddcNode. + SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget); + if (MLAL != SDValue(PrevAddc, 0)) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + // Find the converted UMAAL or quit if it doesn't exist. + SDNode *UmlalNode = nullptr; + SDValue AddHi; + if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) { + UmlalNode = AddcNode->getOperand(0).getNode(); + AddHi = AddcNode->getOperand(1); + } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) { + UmlalNode = AddcNode->getOperand(1).getNode(); + AddHi = AddcNode->getOperand(0); + } else { + return SDValue(); + } + + // The ADDC should be glued to an ADDE node, which uses the same UMLAL as + // the ADDC as well as Zero. + auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3)); + + if (!Zero || Zero->getZExtValue() != 0) + return SDValue(); + + // Check that we have a glued ADDC node. + if (AddcNode->getValueType(1) != MVT::Glue) + return SDValue(); + + // Look for the glued ADDE. + SDNode* AddeNode = AddcNode->getGluedUser(); + if (!AddeNode) + return SDValue(); + + if ((AddeNode->getOperand(0).getNode() == Zero && + AddeNode->getOperand(1).getNode() == UmlalNode) || + (AddeNode->getOperand(0).getNode() == UmlalNode && + AddeNode->getOperand(1).getNode() == Zero)) { + + SelectionDAG &DAG = DCI.DAG; + SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), + UmlalNode->getOperand(2), AddHi }; + SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode), + DAG.getVTList(MVT::i32, MVT::i32), Ops); + + // Replace the ADDs' nodes uses by the UMAAL node's values. + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); + + // Return original node to notify the driver to stop replacing. + return SDValue(AddcNode, 0); + } + return SDValue(); +} + /// PerformADDCCombine - Target-specific dag combine transform from -/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. +/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or +/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL static SDValue PerformADDCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - return AddCombineTo64bitMLAL(N, DCI, Subtarget); + if (Subtarget->isThumb1Only()) return SDValue(); + + // Only perform the checks after legalize when the pattern is available. + if (DCI.isBeforeLegalize()) return SDValue(); + return AddCombineTo64bitUMAAL(N, DCI, Subtarget); } /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 85b820c9f6c..4cdc6182040 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -163,6 +163,7 @@ namespace llvm { UMLAL, // 64bit Unsigned Accumulate Multiply SMLAL, // 64bit Signed Accumulate Multiply + UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index fa18ecd2764..b57f3e84d3b 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -95,6 +95,7 @@ def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >; def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>; def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>; +def ARMUmaal : SDNode<"ARMISD::UMAAL", SDT_ARM64bitmlal>; // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; @@ -3950,9 +3951,10 @@ def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rm; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c642ad90c2d..22aca239565 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2593,8 +2593,9 @@ def t2UMLAL : T2MlaLong<0b110, 0b0000, def t2UMAAL : T2MulLong<0b110, 0b0110, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsThumb2, HasDSP]>; } // hasSideEffects diff --git a/llvm/test/CodeGen/ARM/longMAC.ll b/llvm/test/CodeGen/ARM/longMAC.ll index 3f30fd40b7e..cc2d745aae8 100644 --- a/llvm/test/CodeGen/ARM/longMAC.ll +++ b/llvm/test/CodeGen/ARM/longMAC.ll @@ -116,3 +116,32 @@ define i64 @MACLongTest8(i64 %acc, i32 %lhs, i32 %rhs) { ret i64 %add } +define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) { +;CHECK-LABEL: MACLongTest9: +;CHECK-V7-LE:umaal +;CHECK-V7-BE:umaal +;CHECK-NOT:umaal + %conv = zext i32 %lhs to i64 + %conv1 = zext i32 %rhs to i64 + %mul = mul nuw i64 %conv1, %conv + %conv2 = zext i32 %lo to i64 + %add = add i64 %mul, %conv2 + %conv3 = zext i32 %hi to i64 + %add2 = add i64 %add, %conv3 + ret i64 %add2 +} + +define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) { +;CHECK-LABEL: MACLongTest10: +;CHECK-V7-LE:umaal +;CHECK-V7-BE:umaal +;CHECK-NOT:umaal + %conv = zext i32 %lhs to i64 + %conv1 = zext i32 %rhs to i64 + %mul = mul nuw i64 %conv1, %conv + %conv2 = zext i32 %lo to i64 + %conv3 = zext i32 %hi to i64 + %add = add i64 %conv2, %conv3 + %add2 = add i64 %add, %mul + ret i64 %add2 +} |