summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp44
-rw-r--r--llvm/test/CodeGen/X86/mul-i256.ll27
2 files changed, 70 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 22ab8497f4c..b69f1c99935 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2133,7 +2133,49 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
LC = RTLIB::MUL_I64;
else if (VT == MVT::i128)
LC = RTLIB::MUL_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL) {
+ // We'll expand the multiplication by brute force because we have no other
+ // options. This is a trivially-generalized version of the code from
+ // Hacker's Delight (itself derived from Knuth's Algorithm M from section
+ // 4.3.1).
+ SDValue Mask =
+ DAG.getConstant(APInt::getLowBitsSet(NVT.getSizeInBits(),
+ NVT.getSizeInBits() >> 1), dl, NVT);
+ SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
+ SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
+
+ SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
+ SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
+
+ SDValue Shift =
+ DAG.getConstant(NVT.getSizeInBits() >> 1, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout()));
+ SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
+ SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
+ SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
+
+ SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TL);
+ SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
+ SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
+
+ SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
+ SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
+
+ SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LL, RL),
+ DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, TH,
+ DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
+
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
+ DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
+ DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
+ return;
+ }
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
diff --git a/llvm/test/CodeGen/X86/mul-i256.ll b/llvm/test/CodeGen/X86/mul-i256.ll
new file mode 100644
index 00000000000..8b8b10aa179
--- /dev/null
+++ b/llvm/test/CodeGen/X86/mul-i256.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test(i256* %a, i256* %b, i256* %out) #0 {
+entry:
+ %av = load i256, i256* %a
+ %bv = load i256, i256* %b
+ %r = mul i256 %av, %bv
+ store i256 %r, i256* %out
+ ret void
+}
+
+; CHECK-LABEL: @test
+; There is a lot of inter-register motion, and so matching the instruction
+; sequence will be fragile. There should be 6 underlying multiplications.
+; CHECK: imulq
+; CHECK: imulq
+; CHECK: imulq
+; CHECK: imulq
+; CHECK: imulq
+; CHECK: imulq
+; CHECK-NOT: imulq
+; CHECK: retq
+
+attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" }
+
OpenPOWER on IntegriCloud