[X86][XOP] Added support for the lowering of 128-bit vector shifts to XOP shift instructions

The XOP shifts just have logical/arithmetic versions and the left/right shifts are controlled by whether the value is positive/negative. Because of this I've added new X86ISD nodes instead of trying to force them to use the existing shift nodes. Additionally Excavator cores (bdver4) support XOP and AVX2 - meaning that it should use the AVX2 shifts when it can and fall back to XOP in other cases. Differential Revision: http://reviews.llvm.org/D8690 llvm-svn: 248878
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2015-09-30 08:17:50 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2015-09-30 08:17:50 +0000
commit: 3d11c994f7d85474b80409efb6e0b4916910252d (patch)
tree: 3ab63bc2d19c13e267bc224758bd7b193a9a8977 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent: 82d705e6d99812bd66db531414153117f50728a4 (diff)
download: bcm5719-llvm-3d11c994f7d85474b80409efb6e0b4916910252d.tar.gz
bcm5719-llvm-3d11c994f7d85474b80409efb6e0b4916910252d.zip
1 files changed, 61 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 0cf3163c89d..f23057083f9 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -140,6 +140,12 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,     MVT::v8i64,    1 },
   };
 
+  if (ST->hasAVX512()) {
+    int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * AVX512CostTable[Idx].Cost;
+  }
+
   static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
     // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
     // customize them to detect the cases where shift amount is a scalar one.
@@ -153,7 +159,59 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRL,     MVT::v2i64,    1 },
     { ISD::SHL,     MVT::v4i64,    1 },
     { ISD::SRL,     MVT::v4i64,    1 },
+  };
+
+  // Look for AVX2 lowering tricks.
+  if (ST->hasAVX2()) {
+    if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
+        (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+         Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+      // On AVX2, a packed v16i16 shift left by a constant build_vector
+      // is lowered into a vector multiply (vpmullw).
+      return LT.first;
 
+    int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * AVX2CostTable[Idx].Cost;
+  }
+
+  static const CostTblEntry<MVT::SimpleValueType> XOPCostTable[] = {
+    // 128bit shifts take 1cy, but right shifts require negation beforehand.
+    { ISD::SHL,     MVT::v16i8,    1 },
+    { ISD::SRL,     MVT::v16i8,    2 },
+    { ISD::SRA,     MVT::v16i8,    2 },
+    { ISD::SHL,     MVT::v8i16,    1 },
+    { ISD::SRL,     MVT::v8i16,    2 },
+    { ISD::SRA,     MVT::v8i16,    2 },
+    { ISD::SHL,     MVT::v4i32,    1 },
+    { ISD::SRL,     MVT::v4i32,    2 },
+    { ISD::SRA,     MVT::v4i32,    2 },
+    { ISD::SHL,     MVT::v2i64,    1 },
+    { ISD::SRL,     MVT::v2i64,    2 },
+    { ISD::SRA,     MVT::v2i64,    2 },
+    // 256bit shifts require splitting if AVX2 didn't catch them above.
+    { ISD::SHL,     MVT::v32i8,    2 },
+    { ISD::SRL,     MVT::v32i8,    4 },
+    { ISD::SRA,     MVT::v32i8,    4 },
+    { ISD::SHL,     MVT::v16i16,   2 },
+    { ISD::SRL,     MVT::v16i16,   4 },
+    { ISD::SRA,     MVT::v16i16,   4 },
+    { ISD::SHL,     MVT::v8i32,    2 },
+    { ISD::SRL,     MVT::v8i32,    4 },
+    { ISD::SRA,     MVT::v8i32,    4 },
+    { ISD::SHL,     MVT::v4i64,    2 },
+    { ISD::SRL,     MVT::v4i64,    4 },
+    { ISD::SRA,     MVT::v4i64,    4 },
+  };
+
+  // Look for XOP lowering tricks.
+  if (ST->hasXOP()) {
+    int Idx = CostTableLookup(XOPCostTable, ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * XOPCostTable[Idx].Cost;
+  }
+
+  static const CostTblEntry<MVT::SimpleValueType> AVX2CustomCostTable[] = {
     { ISD::SHL,  MVT::v32i8,      11 }, // vpblendvb sequence.
     { ISD::SHL,  MVT::v16i16,     10 }, // extend/vpsrlvd/pack sequence.
 
@@ -176,23 +234,11 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::UDIV,  MVT::v4i64,  4*20 },
   };
 
-  if (ST->hasAVX512()) {
-    int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
-    if (Idx != -1)
-      return LT.first * AVX512CostTable[Idx].Cost;
-  }
-  // Look for AVX2 lowering tricks.
+  // Look for AVX2 lowering tricks for custom cases.
   if (ST->hasAVX2()) {
-    if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
-        (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
-         Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
-      // On AVX2, a packed v16i16 shift left by a constant build_vector
-      // is lowered into a vector multiply (vpmullw).
-      return LT.first;
-
-    int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
+    int Idx = CostTableLookup(AVX2CustomCostTable, ISD, LT.second);
     if (Idx != -1)
-      return LT.first * AVX2CostTable[Idx].Cost;
+      return LT.first * AVX2CustomCostTable[Idx].Cost;
   }
 
   static const CostTblEntry<MVT::SimpleValueType>
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2015-09-30 08:17:50 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2015-09-30 08:17:50 +0000
commit	3d11c994f7d85474b80409efb6e0b4916910252d (patch)
tree	3ab63bc2d19c13e267bc224758bd7b193a9a8977 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent	82d705e6d99812bd66db531414153117f50728a4 (diff)
download	bcm5719-llvm-3d11c994f7d85474b80409efb6e0b4916910252d.tar.gz bcm5719-llvm-3d11c994f7d85474b80409efb6e0b4916910252d.zip