diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 46 | 
1 files changed, 23 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index a7164ec8ba5..7df72609184 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -261,18 +261,18 @@ unsigned X86TTIImpl::getArithmeticInstrCost(      { ISD::SHL,  MVT::v4i32,   2*5 }, // We optimized this using mul.      { ISD::SHL,  MVT::v2i64,  2*10 }, // Scalarized.      { ISD::SHL,  MVT::v4i64,  4*10 }, // Scalarized. -
 -    { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence.
 -    { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence.
 -    { ISD::SRL,  MVT::v4i32,    16 }, // Shift each lane + blend.
 -    { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized.
 -
 -    { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence.
 -    { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence.
 -    { ISD::SRA,  MVT::v4i32,    16 }, // Shift each lane + blend.
 -    { ISD::SRA,  MVT::v2i64,  2*10 }, // Scalarized.
 -
 -    // It is not a good idea to vectorize division. We have to scalarize it and
 + +    { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence. +    { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence. +    { ISD::SRL,  MVT::v4i32,    16 }, // Shift each lane + blend. +    { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized. + +    { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence. +    { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence. +    { ISD::SRA,  MVT::v4i32,    16 }, // Shift each lane + blend. +    { ISD::SRA,  MVT::v2i64,  2*10 }, // Scalarized. + +    // It is not a good idea to vectorize division. We have to scalarize it and      // in the process we will often end up having to spilling regular      // registers. The overhead of division is going to dominate most kernels      // anyways so try hard to prevent vectorization of division - it is @@ -1117,17 +1117,17 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,    }    return X86TTIImpl::getIntImmCost(Imm, Ty);  } -
 -bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
 -  int DataWidth = DataTy->getPrimitiveSizeInBits();
 -  
 -  // Todo: AVX512 allows gather/scatter, works with strided and random as well
 -  if ((DataWidth < 32) || (Consecutive == 0))
 -    return false;
 -  if (ST->hasAVX512() || ST->hasAVX2()) 
 -    return true;
 -  return false;
 -}
 + +bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) { +  int DataWidth = DataTy->getPrimitiveSizeInBits(); + +  // Todo: AVX512 allows gather/scatter, works with strided and random as well +  if ((DataWidth < 32) || (Consecutive == 0)) +    return false; +  if (ST->hasAVX512() || ST->hasAVX2()) +    return true; +  return false; +}  bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {    return isLegalMaskedLoad(DataType, Consecutive);  | 

