Recommit r367901 "[X86] Enable -x86-experimental-vector-widening-legalization by default."

The assert that caused this to be reverted should be fixed now. Original commit message: This patch changes our defualt legalization behavior for 16, 32, and 64 bit vectors with i8/i16/i32/i64 scalar types from promotion to widening. For example, v8i8 will now be widened to v16i8 instead of promoted to v8i16. This keeps the elements widths the same and pads with undef elements. We believe this is a better legalization strategy. But it carries some issues due to the fragmented vector ISA. For example, i8 shifts and multiplies get widened and then later have to be promoted/split into vXi16 vectors. This has the potential to cause regressions so we wanted to get it in early in the 10.0 cycle so we have plenty of time to address them. Next steps will be to merge tests that explicitly test the command line option. And then we can remove the option and its associated code. llvm-svn: 368183
author: Craig Topper <craig.topper@intel.com> 2019-08-07 16:24:26 +0000
committer: Craig Topper <craig.topper@intel.com> 2019-08-07 16:24:26 +0000
commit: 8b5f2ab2a40c60518d7de4a76879572247fac540 (patch)
tree: 69b5aa240bc06dfd88fd390ded1ed5c64a98bd60 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent: 9f75418e1a7edf8f4951e8d86d42bcf97ca64671 (diff)
download: bcm5719-llvm-8b5f2ab2a40c60518d7de4a76879572247fac540.tar.gz
bcm5719-llvm-8b5f2ab2a40c60518d7de4a76879572247fac540.zip
1 files changed, 45 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 3dc59aeb263..0077ff70c56 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -887,7 +887,7 @@ int X86TTIImpl::getArithmeticInstrCost(
 int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                Type *SubTp) {
   // 64-bit packed float vectors (v2f32) are widened to type v4f32.
-  // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
+  // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
 
   // Treat Transpose as 2-op shuffles - there's no difference in lowering.
@@ -2425,14 +2425,6 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
 
 int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
                                            bool IsPairwise) {
-
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
-
-  MVT MTy = LT.second;
-
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
   // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
   // and make it as the cost.
 
@@ -2440,7 +2432,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
     { ISD::FADD,  MVT::v2f64,   2 },
     { ISD::FADD,  MVT::v4f32,   4 },
     { ISD::ADD,   MVT::v2i64,   2 },      // The data reported by the IACA tool is "1.6".
+    { ISD::ADD,   MVT::v2i32,   2 }, // FIXME: chosen to be less than v4i32.
     { ISD::ADD,   MVT::v4i32,   3 },      // The data reported by the IACA tool is "3.5".
+    { ISD::ADD,   MVT::v2i16,   3 }, // FIXME: chosen to be less than v4i16
+    { ISD::ADD,   MVT::v4i16,   4 }, // FIXME: chosen to be less than v8i16
     { ISD::ADD,   MVT::v8i16,   5 },
   };
 
@@ -2449,8 +2444,11 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
     { ISD::FADD,  MVT::v4f64,   5 },
     { ISD::FADD,  MVT::v8f32,   7 },
     { ISD::ADD,   MVT::v2i64,   1 },      // The data reported by the IACA tool is "1.5".
+    { ISD::ADD,   MVT::v2i32,   2 }, // FIXME: chosen to be less than v4i32
     { ISD::ADD,   MVT::v4i32,   3 },      // The data reported by the IACA tool is "3.5".
     { ISD::ADD,   MVT::v4i64,   5 },      // The data reported by the IACA tool is "4.8".
+    { ISD::ADD,   MVT::v2i16,   3 }, // FIXME: chosen to be less than v4i16
+    { ISD::ADD,   MVT::v4i16,   4 }, // FIXME: chosen to be less than v8i16
     { ISD::ADD,   MVT::v8i16,   5 },
     { ISD::ADD,   MVT::v8i32,   5 },
   };
@@ -2459,7 +2457,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
     { ISD::FADD,  MVT::v2f64,   2 },
     { ISD::FADD,  MVT::v4f32,   4 },
     { ISD::ADD,   MVT::v2i64,   2 },      // The data reported by the IACA tool is "1.6".
+    { ISD::ADD,   MVT::v2i32,   2 }, // FIXME: chosen to be less than v4i32
     { ISD::ADD,   MVT::v4i32,   3 },      // The data reported by the IACA tool is "3.3".
+    { ISD::ADD,   MVT::v2i16,   2 },      // The data reported by the IACA tool is "4.3".
+    { ISD::ADD,   MVT::v4i16,   3 },      // The data reported by the IACA tool is "4.3".
     { ISD::ADD,   MVT::v8i16,   4 },      // The data reported by the IACA tool is "4.3".
   };
 
@@ -2468,12 +2469,47 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
     { ISD::FADD,  MVT::v4f64,   3 },
     { ISD::FADD,  MVT::v8f32,   4 },
     { ISD::ADD,   MVT::v2i64,   1 },      // The data reported by the IACA tool is "1.5".
+    { ISD::ADD,   MVT::v2i32,   2 }, // FIXME: chosen to be less than v4i32
     { ISD::ADD,   MVT::v4i32,   3 },      // The data reported by the IACA tool is "2.8".
     { ISD::ADD,   MVT::v4i64,   3 },
+    { ISD::ADD,   MVT::v2i16,   2 },      // The data reported by the IACA tool is "4.3".
+    { ISD::ADD,   MVT::v4i16,   3 },      // The data reported by the IACA tool is "4.3".
     { ISD::ADD,   MVT::v8i16,   4 },
     { ISD::ADD,   MVT::v8i32,   5 },
   };
 
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  // Before legalizing the type, give a chance to look up illegal narrow types
+  // in the table.
+  // FIXME: Is there a better way to do this?
+  EVT VT = TLI->getValueType(DL, ValTy);
+  if (VT.isSimple()) {
+    MVT MTy = VT.getSimpleVT();
+    if (IsPairwise) {
+      if (ST->hasAVX())
+        if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
+          return Entry->Cost;
+
+      if (ST->hasSSE42())
+        if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
+          return Entry->Cost;
+    } else {
+      if (ST->hasAVX())
+        if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+          return Entry->Cost;
+
+      if (ST->hasSSE42())
+        if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
+          return Entry->Cost;
+    }
+  }
+
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+
+  MVT MTy = LT.second;
+
   if (IsPairwise) {
     if (ST->hasAVX())
       if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
author	Craig Topper <craig.topper@intel.com>	2019-08-07 16:24:26 +0000
committer	Craig Topper <craig.topper@intel.com>	2019-08-07 16:24:26 +0000
commit	8b5f2ab2a40c60518d7de4a76879572247fac540 (patch)
tree	69b5aa240bc06dfd88fd390ded1ed5c64a98bd60 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent	9f75418e1a7edf8f4951e8d86d42bcf97ca64671 (diff)
download	bcm5719-llvm-8b5f2ab2a40c60518d7de4a76879572247fac540.tar.gz bcm5719-llvm-8b5f2ab2a40c60518d7de4a76879572247fac540.zip