summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-09-19 15:57:40 +0000
committerSanjay Patel <spatel@rotateright.com>2018-09-19 15:57:40 +0000
commit4fd2e2a4980d3a0512524b8352669ef4aa9258a9 (patch)
treee8a4697b5a408a2abaaf3d996544de30bf18f4a0 /llvm/lib/Target
parentbd810dbd276a8d8150862b0ede16ea200c89546d (diff)
downloadbcm5719-llvm-4fd2e2a4980d3a0512524b8352669ef4aa9258a9.tar.gz
bcm5719-llvm-4fd2e2a4980d3a0512524b8352669ef4aa9258a9.zip
[DAGCombiner][x86] add transform/hook to decompose integer multiply into shift/add
This is an alternative to D37896. I don't see a way to decompose multiplies generically without a target hook to tell us when it's profitable. ARM and AArch64 may be able to remove some duplicate code that overlaps with this transform. As a first step, we're only getting the most clear wins on the vector examples requested in PR34474: https://bugs.llvm.org/show_bug.cgi?id=34474 As noted in the code comment, it's likely that the x86 constraints are tighter than necessary, but it may not always be a win to replace a pmullw/pmulld. Differential Revision: https://reviews.llvm.org/D52195 llvm-svn: 342554
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp17
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h2
2 files changed, 19 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ce1dd6bfc5f..ff35748a750 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4722,6 +4722,23 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
return true;
}
+bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
+ // TODO: We handle scalars using custom code, but generic combining could make
+ // that unnecessary.
+ APInt MulC;
+ if (!ISD::isConstantSplatVector(C.getNode(), MulC))
+ return false;
+
+ // If vector multiply is legal, assume that's faster than shl + add/sub.
+ // TODO: Multiply is a complex op with higher latency and lower througput in
+ // most implementations, so this check could be loosened based on type
+ // and/or a CPU attribute.
+ if (isOperationLegal(ISD::MUL, VT))
+ return false;
+
+ return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2();
+}
+
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index caed0219962..9a3726e566e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1034,6 +1034,8 @@ namespace llvm {
bool convertSelectOfConstantsToMath(EVT VT) const override;
+ bool decomposeMulByConstant(EVT VT, SDValue C) const override;
+
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
OpenPOWER on IntegriCloud