[X86][SSE] Improve lowering of vXi64 multiplies

As mentioned on PR30845, we were performing our vXi64 multiplication as: AloBlo = pmuludq(a, b); AloBhi = pmuludq(a, psrlqi(b, 32)); AhiBlo = pmuludq(psrlqi(a, 32), b); return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32); when we could avoid one of the upper shifts with: AloBlo = pmuludq(a, b); AloBhi = pmuludq(a, psrlqi(b, 32)); AhiBlo = pmuludq(psrlqi(a, 32), b); return AloBlo + psllqi(AloBhi + AhiBlo, 32); This matches the lowering on gcc/icc. Differential Revision: https://reviews.llvm.org/D27756 llvm-svn: 290267
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-12-21 20:00:10 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-12-21 20:00:10 +0000
commit: 081abbb164cceea0ff5b70d1557f2cf31198f5b9 (patch)
tree: 9b712808d6be653cbd6b052d85fa932a9714d3c7 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent: b0761a0c1ba8ec77d3704d2450d481bc25e60a9d (diff)
download: bcm5719-llvm-081abbb164cceea0ff5b70d1557f2cf31198f5b9.tar.gz
bcm5719-llvm-081abbb164cceea0ff5b70d1557f2cf31198f5b9.zip
1 files changed, 8 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 6b5b5a1528e..db563c08632 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -551,11 +551,11 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SUB,     MVT::v4i64,    4 },
     { ISD::ADD,     MVT::v4i64,    4 },
     // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
-    // are lowered as a series of long multiplies(3), shifts(4) and adds(2)
+    // are lowered as a series of long multiplies(3), shifts(3) and adds(2)
     // Because we believe v4i64 to be a legal type, we must also include the
-    // split factor of two in the cost table. Therefore, the cost here is 18
-    // instead of 9.
-    { ISD::MUL,     MVT::v4i64,    18 },
+    // split factor of two in the cost table. Therefore, the cost here is 16
+    // instead of 8.
+    { ISD::MUL,     MVT::v4i64,    16 },
   };
 
   // Look for AVX1 lowering tricks.
@@ -569,10 +569,10 @@ int X86TTIImpl::getArithmeticInstrCost(
   // Custom lowering of vectors.
   static const CostTblEntry CustomLowered[] = {
     // A v2i64/v4i64 and multiply is custom lowered as a series of long
-    // multiplies(3), shifts(4) and adds(2).
-    { ISD::MUL,     MVT::v2i64,    9 },
-    { ISD::MUL,     MVT::v4i64,    9 },
-    { ISD::MUL,     MVT::v8i64,    9 }
+    // multiplies(3), shifts(3) and adds(2).
+    { ISD::MUL,     MVT::v2i64,    8 },
+    { ISD::MUL,     MVT::v4i64,    8 },
+    { ISD::MUL,     MVT::v8i64,    8 }
   };
   if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
     return LT.first * Entry->Cost;
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-12-21 20:00:10 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-12-21 20:00:10 +0000
commit	081abbb164cceea0ff5b70d1557f2cf31198f5b9 (patch)
tree	9b712808d6be653cbd6b052d85fa932a9714d3c7 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent	b0761a0c1ba8ec77d3704d2450d481bc25e60a9d (diff)
download	bcm5719-llvm-081abbb164cceea0ff5b70d1557f2cf31198f5b9.tar.gz bcm5719-llvm-081abbb164cceea0ff5b70d1557f2cf31198f5b9.zip