diff options
-rw-r--r-- | llvm/include/llvm/CodeGen/BasicTTIImpl.h | 44 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll | 42 |
2 files changed, 0 insertions, 86 deletions
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ecf20dae461..ded19c5b26f 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -542,50 +542,6 @@ public: unsigned Cost = static_cast<T *>(this)->getMemoryOpCost( Opcode, VecTy, Alignment, AddressSpace); - // Legalize the vector type, and get the legalized and unlegalized type - // sizes. - MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; - unsigned VecTySize = DL.getTypeStoreSize(VecTy); - unsigned VecTyLTSize = VecTyLT.getStoreSize(); - - // Return the ceiling of dividing A by B. - auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; - - // Scale the cost of the memory operation by the fraction of legalized - // instructions that will actually be used. We shouldn't account for the - // cost of dead instructions since they will be removed. - // - // E.g., An interleaved load of factor 8: - // %vec = load <16 x i64>, <16 x i64>* %ptr - // %v0 = shufflevector %vec, undef, <0, 8> - // - // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be - // used (those corresponding to elements [0:1] and [8:9] of the unlegalized - // type). The other loads are unused. - // - // We only scale the cost of loads since interleaved store groups aren't - // allowed to have gaps. - if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { - - // The number of loads of a legal type it will take to represent a load - // of the unlegalized vector type. - unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); - - // The number of elements of the unlegalized type that correspond to a - // single legal instruction. - unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); - - // Determine which legal instructions will be used. - BitVector UsedInsts(NumLegalInsts, false); - for (unsigned Index : Indices) - for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) - UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); - - // Scale the cost of the load by the fraction of legal instructions that - // will be used. - Cost *= UsedInsts.count() / NumLegalInsts; - } - // Then plus the cost of interleave operation. if (Opcode == Instruction::Load) { // The interleave cost is similar to extract sub vectors' elements diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll index df1f9c61940..a0e741a3cdb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll @@ -14,7 +14,6 @@ entry: ; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved ; access group is 2. -; CHECK: LV: Checking a loop in "test_byte_interleaved_cost" ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %tmp = load i8, i8* %arrayidx0, align 4 ; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %tmp = load i8, i8* %arrayidx0, align 4 @@ -38,44 +37,3 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } - -%ig.factor.8 = type { double*, double, double, double, double, double, double, double } -define double @wide_interleaved_group(%ig.factor.8* %s, double %a, double %b, i32 %n) { -entry: - br label %for.body - -; Check the default cost of a strided load with a factor that is greater than -; the maximum allowed. In this test, the interleave factor would be 8, which is -; not supported. - -; CHECK: LV: Checking a loop in "wide_interleaved_group" -; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %1 = load double, double* %0, align 8 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %5 = load double, double* %4, align 8 -; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store double %9, double* %10, align 8 - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %r = phi double [ 0.000000e+00, %entry ], [ %12, %for.body ] - %0 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 2 - %1 = load double, double* %0, align 8 - %2 = fcmp fast olt double %1, %a - %3 = select i1 %2, double 0.000000e+00, double %1 - %4 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 6 - %5 = load double, double* %4, align 8 - %6 = fcmp fast olt double %5, %a - %7 = select i1 %6, double 0.000000e+00, double %5 - %8 = fmul fast double %7, %b - %9 = fadd fast double %8, %3 - %10 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 3 - store double %9, double* %10, align 8 - %11 = fmul fast double %9, %9 - %12 = fadd fast double %11, %r - %i.next = add nuw nsw i64 %i, 1 - %13 = trunc i64 %i.next to i32 - %cond = icmp eq i32 %13, %n - br i1 %cond, label %for.exit, label %for.body - -for.exit: - %r.lcssa = phi double [ %12, %for.body ] - ret double %r.lcssa -} |