[CostModel][X86] Reordered AVX1 arithmetic cost LUT into descending target order. NFCI.

llvm-svn: 291352
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2017-01-07 17:03:51 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2017-01-07 17:03:51 +0000
commit: 100eae1ee06ee97daac0cecf5189e9cfc2b6f11b (patch)
tree: 827da153268b53d2252265796579fea2d483c8f1 /llvm/lib/Target/X86
parent: 32f8d560ddd7ed27098ab5c77fa22aa51806cf6a (diff)
download: bcm5719-llvm-100eae1ee06ee97daac0cecf5189e9cfc2b6f11b.tar.gz
bcm5719-llvm-100eae1ee06ee97daac0cecf5189e9cfc2b6f11b.zip
1 files changed, 27 insertions, 27 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 9167c09b43f..7c3739719bc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -440,6 +440,33 @@ int X86TTIImpl::getArithmeticInstrCost(
       ISD = ISD::MUL;
   }
 
+  static const CostTblEntry AVX1CostTable[] = {
+    // We don't have to scalarize unsupported ops. We can issue two half-sized
+    // operations and we only need to extract the upper YMM half.
+    // Two ops + 1 extract + 1 insert = 4.
+    { ISD::MUL,     MVT::v16i16,   4 },
+    { ISD::MUL,     MVT::v8i32,    4 },
+    { ISD::SUB,     MVT::v32i8,    4 },
+    { ISD::ADD,     MVT::v32i8,    4 },
+    { ISD::SUB,     MVT::v16i16,   4 },
+    { ISD::ADD,     MVT::v16i16,   4 },
+    { ISD::SUB,     MVT::v8i32,    4 },
+    { ISD::ADD,     MVT::v8i32,    4 },
+    { ISD::SUB,     MVT::v4i64,    4 },
+    { ISD::ADD,     MVT::v4i64,    4 },
+
+    // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
+    // are lowered as a series of long multiplies(3), shifts(3) and adds(2)
+    // Because we believe v4i64 to be a legal type, we must also include the
+    // extract+insert in the cost table. Therefore, the cost here is 18
+    // instead of 8.
+    { ISD::MUL,     MVT::v4i64,    18 },
+  };
+
+  if (ST->hasAVX() && !ST->hasAVX2())
+    if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+
   static const CostTblEntry SSE42CostTable[] = {
     { ISD::FDIV,  MVT::f32,   14 }, // Nehalem from http://www.agner.org/
     { ISD::FDIV,  MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
@@ -529,33 +556,6 @@ int X86TTIImpl::getArithmeticInstrCost(
     if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
       return LT.first * Entry->Cost;
 
-  static const CostTblEntry AVX1CostTable[] = {
-    // We don't have to scalarize unsupported ops. We can issue two half-sized
-    // operations and we only need to extract the upper YMM half.
-    // Two ops + 1 extract + 1 insert = 4.
-    { ISD::MUL,     MVT::v16i16,   4 },
-    { ISD::MUL,     MVT::v8i32,    4 },
-    { ISD::SUB,     MVT::v32i8,    4 },
-    { ISD::ADD,     MVT::v32i8,    4 },
-    { ISD::SUB,     MVT::v16i16,   4 },
-    { ISD::ADD,     MVT::v16i16,   4 },
-    { ISD::SUB,     MVT::v8i32,    4 },
-    { ISD::ADD,     MVT::v8i32,    4 },
-    { ISD::SUB,     MVT::v4i64,    4 },
-    { ISD::ADD,     MVT::v4i64,    4 },
-    // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
-    // are lowered as a series of long multiplies(3), shifts(3) and adds(2)
-    // Because we believe v4i64 to be a legal type, we must also include the
-    // extract+insert in the cost table. Therefore, the cost here is 18
-    // instead of 8.
-    { ISD::MUL,     MVT::v4i64,    18 },
-  };
-
-  // Look for AVX1 lowering tricks.
-  if (ST->hasAVX() && !ST->hasAVX2())
-    if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
-      return LT.first * Entry->Cost;
-
   static const CostTblEntry SSE1CostTable[] = {
     { ISD::FDIV, MVT::f32,   17 }, // Pentium III from http://www.agner.org/
     { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2017-01-07 17:03:51 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2017-01-07 17:03:51 +0000
commit	100eae1ee06ee97daac0cecf5189e9cfc2b6f11b (patch)
tree	827da153268b53d2252265796579fea2d483c8f1 /llvm/lib/Target/X86
parent	32f8d560ddd7ed27098ab5c77fa22aa51806cf6a (diff)
download	bcm5719-llvm-100eae1ee06ee97daac0cecf5189e9cfc2b6f11b.tar.gz bcm5719-llvm-100eae1ee06ee97daac0cecf5189e9cfc2b6f11b.zip