diff options
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 44 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/mul-i256.ll | 27 |
2 files changed, 70 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 22ab8497f4c..b69f1c99935 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2133,7 +2133,49 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, LC = RTLIB::MUL_I64; else if (VT == MVT::i128) LC = RTLIB::MUL_I128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + if (LC == RTLIB::UNKNOWN_LIBCALL) { + // We'll expand the multiplication by brute force because we have no other + // options. This is a trivially-generalized version of the code from + // Hacker's Delight (itself derived from Knuth's Algorithm M from section + // 4.3.1). + SDValue Mask = + DAG.getConstant(APInt::getLowBitsSet(NVT.getSizeInBits(), + NVT.getSizeInBits() >> 1), dl, NVT); + SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask); + SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask); + + SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); + SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); + + SDValue Shift = + DAG.getConstant(NVT.getSizeInBits() >> 1, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout())); + SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); + SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); + SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); + + SDValue U = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TL); + SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask); + SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift); + + SDValue V = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL); + SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift); + + SDValue W = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LL, RL), + DAG.getNode(ISD::ADD, dl, NVT, UH, VH)); + Lo = DAG.getNode(ISD::ADD, dl, NVT, TH, + DAG.getNode(ISD::SHL, dl, NVT, V, Shift)); + + Hi = DAG.getNode(ISD::ADD, dl, NVT, W, + DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, RH, LL), + DAG.getNode(ISD::MUL, dl, NVT, RL, LH))); + return; + } SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, diff --git a/llvm/test/CodeGen/X86/mul-i256.ll b/llvm/test/CodeGen/X86/mul-i256.ll new file mode 100644 index 00000000000..8b8b10aa179 --- /dev/null +++ b/llvm/test/CodeGen/X86/mul-i256.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test(i256* %a, i256* %b, i256* %out) #0 { +entry: + %av = load i256, i256* %a + %bv = load i256, i256* %b + %r = mul i256 %av, %bv + store i256 %r, i256* %out + ret void +} + +; CHECK-LABEL: @test +; There is a lot of inter-register motion, and so matching the instruction +; sequence will be fragile. There should be 6 underlying multiplications. +; CHECK: imulq +; CHECK: imulq +; CHECK: imulq +; CHECK: imulq +; CHECK: imulq +; CHECK: imulq +; CHECK-NOT: imulq +; CHECK: retq + +attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" } + |