[Vectorizer] Add a new 'OperandValueKind' in TargetTransformInfo called

'OK_NonUniformConstValue' to identify operands which are constants but not constant splats. The cost model now allows returning 'OK_NonUniformConstValue' for non splat operands that are instances of ConstantVector or ConstantDataVector. With this change, targets are now able to compute different costs for instructions with non-uniform constant operands. For example, On X86 the cost of a vector shift may vary depending on whether the second operand is a uniform or non-uniform constant. This patch applies the following changes: - The cost model computation now takes into account non-uniform constants; - The cost of vector shift instructions has been improved in X86TargetTransformInfo analysis pass; - BBVectorize, SLPVectorizer and LoopVectorize now know how to distinguish between non-uniform and uniform constant operands. Added a new test to verify that the output of opt '-cost-model -analyze' is valid in the following configurations: SSE2, SSE4.1, AVX, AVX2. llvm-svn: 201272
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-02-12 23:43:47 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-02-12 23:43:47 +0000
commit: b7882b3bd11b46cca650e318bb7b921a1bfc8e21 (patch)
tree: 97a7fdb6b7e8ffe0337e7c9260cc03cb39f3c1b9 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent: 386d566395d90b01ddddf3d0ab219097cca8c37c (diff)
download: bcm5719-llvm-b7882b3bd11b46cca650e318bb7b921a1bfc8e21.tar.gz
bcm5719-llvm-b7882b3bd11b46cca650e318bb7b921a1bfc8e21.zip
1 files changed, 33 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 207a7685c59..d50bab99ff3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -225,6 +225,13 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
 
   // Look for AVX2 lowering tricks.
   if (ST->hasAVX2()) {
+    if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
+        (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+         Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+      // On AVX2, a packed v16i16 shift left by a constant build_vector
+      // is lowered into a vector multiply (vpmullw).
+      return LT.first;
+
     int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
     if (Idx != -1)
       return LT.first * AVX2CostTable[Idx].Cost;
@@ -257,6 +264,20 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
       return LT.first * SSE2UniformConstCostTable[Idx].Cost;
   }
 
+  if (ISD == ISD::SHL &&
+      Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
+    EVT VT = LT.second;
+    if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
+        (VT == MVT::v4i32 && ST->hasSSE41()))
+      // Vector shift left by non uniform constant can be lowered
+      // into vector multiply (pmullw/pmulld).
+      return LT.first;
+    if (VT == MVT::v4i32 && ST->hasSSE2())
+      // A vector shift left by non uniform constant is converted
+      // into a vector multiply; the new multiply is eventually
+      // lowered into a sequence of shuffles and 2 x pmuludq.
+      ISD = ISD::MUL;
+  }
 
   static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
     // We don't correctly identify costs of casts because they are marked as
@@ -271,6 +292,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
     { ISD::SHL,  MVT::v8i16,  8*10 }, // Scalarized.
     { ISD::SHL,  MVT::v4i32,  2*5 }, // We optimized this using mul.
     { ISD::SHL,  MVT::v2i64,  2*10 }, // Scalarized.
+    { ISD::SHL,  MVT::v4i64,  4*10 }, // Scalarized. 
 
     { ISD::SRL,  MVT::v16i8,  16*10 }, // Scalarized.
     { ISD::SRL,  MVT::v8i16,  8*10 }, // Scalarized.
@@ -308,6 +330,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
     // We don't have to scalarize unsupported ops. We can issue two half-sized
     // operations and we only need to extract the upper YMM half.
     // Two ops + 1 extract + 1 insert = 4.
+    { ISD::MUL,     MVT::v16i16,   4 },
     { ISD::MUL,     MVT::v8i32,    4 },
     { ISD::SUB,     MVT::v8i32,    4 },
     { ISD::ADD,     MVT::v8i32,    4 },
@@ -323,7 +346,15 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
 
   // Look for AVX1 lowering tricks.
   if (ST->hasAVX() && !ST->hasAVX2()) {
-    int Idx = CostTableLookup(AVX1CostTable, ISD, LT.second);
+    EVT VT = LT.second;
+
+    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
+    // sequence of extract + two vector multiply + insert.
+    if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
+        Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
+      ISD = ISD::MUL;
+
+    int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
     if (Idx != -1)
       return LT.first * AVX1CostTable[Idx].Cost;
   }
@@ -343,7 +374,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   // 2x pmuludq, 2x shuffle.
   if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
       !ST->hasSSE41())
-    return 6;
+    return LT.first * 6;
 
   // Fallback to the default implementation.
   return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-02-12 23:43:47 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-02-12 23:43:47 +0000
commit	b7882b3bd11b46cca650e318bb7b921a1bfc8e21 (patch)
tree	97a7fdb6b7e8ffe0337e7c9260cc03cb39f3c1b9 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent	386d566395d90b01ddddf3d0ab219097cca8c37c (diff)
download	bcm5719-llvm-b7882b3bd11b46cca650e318bb7b921a1bfc8e21.tar.gz bcm5719-llvm-b7882b3bd11b46cca650e318bb7b921a1bfc8e21.zip