diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/mul-i256.ll | 27 | 
2 files changed, 70 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 22ab8497f4c..b69f1c99935 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2133,7 +2133,49 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,      LC = RTLIB::MUL_I64;    else if (VT == MVT::i128)      LC = RTLIB::MUL_I128; -  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + +  if (LC == RTLIB::UNKNOWN_LIBCALL) { +    // We'll expand the multiplication by brute force because we have no other +    // options. This is a trivially-generalized version of the code from +    // Hacker's Delight (itself derived from Knuth's Algorithm M from section +    // 4.3.1). +    SDValue Mask = +      DAG.getConstant(APInt::getLowBitsSet(NVT.getSizeInBits(), +                                           NVT.getSizeInBits() >> 1), dl, NVT); +    SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask); +    SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask); + +    SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); +    SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); + +    SDValue Shift = +      DAG.getConstant(NVT.getSizeInBits() >> 1, dl, +                      TLI.getShiftAmountTy(NVT, DAG.getDataLayout())); +    SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); +    SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); +    SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); + +    SDValue U = DAG.getNode(ISD::ADD, dl, NVT, +                            DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TL); +    SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask); +    SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift); + +    SDValue V = DAG.getNode(ISD::ADD, dl, NVT, +                            DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL); +    SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift); + +    SDValue W = DAG.getNode(ISD::ADD, dl, NVT, +                            DAG.getNode(ISD::MUL, dl, NVT, LL, RL), +                            DAG.getNode(ISD::ADD, dl, NVT, UH, VH)); +    Lo = DAG.getNode(ISD::ADD, dl, NVT, TH, +                     DAG.getNode(ISD::SHL, dl, NVT, V, Shift)); + +    Hi = DAG.getNode(ISD::ADD, dl, NVT, W, +                     DAG.getNode(ISD::ADD, dl, NVT, +                                 DAG.getNode(ISD::MUL, dl, NVT, RH, LL),  +                                 DAG.getNode(ISD::MUL, dl, NVT, RL, LH))); +    return; +  }    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };    SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, diff --git a/llvm/test/CodeGen/X86/mul-i256.ll b/llvm/test/CodeGen/X86/mul-i256.ll new file mode 100644 index 00000000000..8b8b10aa179 --- /dev/null +++ b/llvm/test/CodeGen/X86/mul-i256.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test(i256* %a, i256* %b, i256* %out) #0 { +entry: +  %av = load i256, i256* %a +  %bv = load i256, i256* %b +  %r = mul i256 %av, %bv +  store i256 %r, i256* %out +  ret void +} + +; CHECK-LABEL: @test +; There is a lot of inter-register motion, and so matching the instruction +; sequence will be fragile. There should be 6 underlying multiplications. +; CHECK: imulq +; CHECK: imulq +; CHECK: imulq +; CHECK: imulq +; CHECK: imulq +; CHECK: imulq +; CHECK-NOT: imulq +; CHECK: retq + +attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" } +  | 

