diff options
author | Evan Cheng <evan.cheng@apple.com> | 2007-10-26 01:56:11 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2007-10-26 01:56:11 +0000 |
commit | 7f3d02471d04bef0062790c0b79151a9a6fe8266 (patch) | |
tree | 7cb5e4761033e60c238d53823d1e5a05c03d8a2a /llvm/lib | |
parent | 1207cd6b1be6ccbe43a67877b4a28d4a7629b10d (diff) | |
download | bcm5719-llvm-7f3d02471d04bef0062790c0b79151a9a6fe8266.tar.gz bcm5719-llvm-7f3d02471d04bef0062790c0b79151a9a6fe8266.zip |
Loosen up iv reuse to allow reuse of the same stride but a larger type when truncating from the larger type to smaller type is free.
e.g.
Turns this loop:
LBB1_1: # entry.bb_crit_edge
xorl %ecx, %ecx
xorw %dx, %dx
movw %dx, %si
LBB1_2: # bb
movl L_X$non_lazy_ptr, %edi
movw %si, (%edi)
movl L_Y$non_lazy_ptr, %edi
movw %dx, (%edi)
addw $4, %dx
incw %si
incl %ecx
cmpl %eax, %ecx
jne LBB1_2 # bb
into
LBB1_1: # entry.bb_crit_edge
xorl %ecx, %ecx
xorw %dx, %dx
LBB1_2: # bb
movl L_X$non_lazy_ptr, %esi
movw %cx, (%esi)
movl L_Y$non_lazy_ptr, %esi
movw %dx, (%esi)
addw $4, %dx
incl %ecx
cmpl %eax, %ecx
jne LBB1_2 # bb
llvm-svn: 43375
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/README.txt | 32 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 5 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 38 |
4 files changed, 46 insertions, 36 deletions
diff --git a/llvm/lib/Target/X86/README.txt b/llvm/lib/Target/X86/README.txt index ccd15be4e6b..41b38d84c8a 100644 --- a/llvm/lib/Target/X86/README.txt +++ b/llvm/lib/Target/X86/README.txt @@ -339,20 +339,18 @@ void foo(int N) { for (i = 0; i < N; i++) { X = i; Y = i*4; } } -LBB1_1: #bb.preheader - xorl %ecx, %ecx - xorw %dx, %dx -LBB1_2: #bb - movl L_X$non_lazy_ptr, %esi - movw %dx, (%esi) - movw %dx, %si - shlw $2, %si - movl L_Y$non_lazy_ptr, %edi - movw %si, (%edi) - incl %ecx - incw %dx - cmpl %eax, %ecx - jne LBB1_2 #bb +LBB1_1: # entry.bb_crit_edge + xorl %ecx, %ecx + xorw %dx, %dx +LBB1_2: # bb + movl L_X$non_lazy_ptr, %esi + movw %cx, (%esi) + movl L_Y$non_lazy_ptr, %esi + movw %dx, (%esi) + addw $4, %dx + incl %ecx + cmpl %eax, %ecx + jne LBB1_2 # bb vs. @@ -367,11 +365,7 @@ L4: cmpl %edx, %edi jne L4 -There are 3 issues: - -1. Lack of post regalloc LICM. -2. LSR unable to reused IV for a different type (i16 vs. i32) even though - the cast would be free. +This is due to the lack of post regalloc LICM. //===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4d528abb6b4..172aa5338b9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5122,6 +5122,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, } +bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { + if (!Ty1->isInteger() || !Ty2->isInteger()) + return false; + return Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits(); +} + + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7123adaad27..b68de5a6753 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -359,6 +359,11 @@ namespace llvm { /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + /// isTruncateFree - Return true if it's free to truncate a value of + /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in + /// register EAX to i16 by referencing its sub-register AX. + virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index fbe3171f81f..d81ea2b7994 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -178,7 +178,7 @@ private: bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - unsigned CheckForIVReuse(bool, const SCEVHandle&, + unsigned CheckForIVReuse(bool, bool, const SCEVHandle&, IVExpr&, const Type*, const std::vector<BasedUser>& UsersToProcess); bool ValidStride(bool, int64_t, @@ -980,15 +980,17 @@ bool LoopStrengthReduce::ValidStride(bool HasBaseReg, /// RequiresTypeConversion - Returns true if converting Ty to NewTy is not /// a nop. -bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty, - const Type *NewTy) { - if (Ty == NewTy) +bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, + const Type *Ty2) { + if (Ty1 == Ty2) return false; - return (!Ty->canLosslesslyBitCastTo(NewTy) && - !(isa<PointerType>(NewTy) && - Ty->canLosslesslyBitCastTo(UIntPtrTy)) && - !(isa<PointerType>(Ty) && - NewTy->canLosslesslyBitCastTo(UIntPtrTy))); + if (TLI && TLI->isTruncateFree(Ty1, Ty2)) + return false; + return (!Ty1->canLosslesslyBitCastTo(Ty2) && + !(isa<PointerType>(Ty2) && + Ty1->canLosslesslyBitCastTo(UIntPtrTy)) && + !(isa<PointerType>(Ty1) && + Ty2->canLosslesslyBitCastTo(UIntPtrTy))); } /// CheckForIVReuse - Returns the multiple if the stride is the multiple @@ -997,20 +999,23 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty, /// this stride to be rewritten as prev iv * factor. It returns 0 if no /// reuse is possible. unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, + bool AllUsesAreAddresses, const SCEVHandle &Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); - if (SInt == 1) return 0; - for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(), SE = IVsByStride.end(); SI != SE; ++SI) { int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SInt != -SSInt && + if (SI->first != Stride && (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0)) continue; int64_t Scale = SInt / SSInt; + // When scale is 1, we don't need to worry about whether the + // multiplication can be folded into the addressing mode. + if (!AllUsesAreAddresses && Scale != 1) + continue; // Check that this stride is valid for all the types used for loads and // stores; if it can be used for some and not others, we might as well use // the original stride everywhere, since we have to create the IV for it @@ -1021,7 +1026,7 @@ unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // FIXME: Only handle base == 0 for now. // Only reuse previous IV if it would not require a type conversion. if (isZero(II->Base) && - !RequiresTypeConversion(II->Base->getType(),Ty)) { + !RequiresTypeConversion(II->Base->getType(), Ty)) { IV = *II; return Scale; } @@ -1183,10 +1188,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, SE->getIntegerSCEV(0, Type::Int32Ty), 0, 0); unsigned RewriteFactor = 0; - if (AllUsesAreAddresses) - RewriteFactor = CheckForIVReuse(HaveCommonExprs, Stride, ReuseIV, - CommonExprs->getType(), - UsersToProcess); + RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses, + Stride, ReuseIV, CommonExprs->getType(), + UsersToProcess); if (RewriteFactor != 0) { DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << " :\n"; |