Loosen up iv reuse to allow reuse of the same stride but a larger type when truncating from the larger type to smaller type is free.

e.g. Turns this loop: LBB1_1: # entry.bb_crit_edge xorl %ecx, %ecx xorw %dx, %dx movw %dx, %si LBB1_2: # bb movl L_X$non_lazy_ptr, %edi movw %si, (%edi) movl L_Y$non_lazy_ptr, %edi movw %dx, (%edi) addw $4, %dx incw %si incl %ecx cmpl %eax, %ecx jne LBB1_2 # bb into LBB1_1: # entry.bb_crit_edge xorl %ecx, %ecx xorw %dx, %dx LBB1_2: # bb movl L_X$non_lazy_ptr, %esi movw %cx, (%esi) movl L_Y$non_lazy_ptr, %esi movw %dx, (%esi) addw $4, %dx incl %ecx cmpl %eax, %ecx jne LBB1_2 # bb llvm-svn: 43375
author: Evan Cheng <evan.cheng@apple.com> 2007-10-26 01:56:11 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2007-10-26 01:56:11 +0000
commit: 7f3d02471d04bef0062790c0b79151a9a6fe8266 (patch)
tree: 7cb5e4761033e60c238d53823d1e5a05c03d8a2a /llvm/lib
parent: 1207cd6b1be6ccbe43a67877b4a28d4a7629b10d (diff)
download: bcm5719-llvm-7f3d02471d04bef0062790c0b79151a9a6fe8266.tar.gz
bcm5719-llvm-7f3d02471d04bef0062790c0b79151a9a6fe8266.zip
4 files changed, 46 insertions, 36 deletions
diff --git a/llvm/lib/Target/X86/README.txt b/llvm/lib/Target/X86/README.txt
index ccd15be4e6b..41b38d84c8a 100644
--- a/llvm/lib/Target/X86/README.txt
+++ b/llvm/lib/Target/X86/README.txt
@@ -339,20 +339,18 @@ void foo(int N) {
   for (i = 0; i < N; i++) { X = i; Y = i*4; }
 }
 
-LBB1_1:	#bb.preheader
-	xorl %ecx, %ecx
-	xorw %dx, %dx
-LBB1_2:	#bb
-	movl L_X$non_lazy_ptr, %esi
-	movw %dx, (%esi)
-	movw %dx, %si
-	shlw $2, %si
-	movl L_Y$non_lazy_ptr, %edi
-	movw %si, (%edi)
-	incl %ecx
-	incw %dx
-	cmpl %eax, %ecx
-	jne LBB1_2	#bb
+LBB1_1:	# entry.bb_crit_edge
+	xorl	%ecx, %ecx
+	xorw	%dx, %dx
+LBB1_2:	# bb
+	movl	L_X$non_lazy_ptr, %esi
+	movw	%cx, (%esi)
+	movl	L_Y$non_lazy_ptr, %esi
+	movw	%dx, (%esi)
+	addw	$4, %dx
+	incl	%ecx
+	cmpl	%eax, %ecx
+	jne	LBB1_2	# bb
 
 vs.
 
@@ -367,11 +365,7 @@ L4:
 	cmpl	%edx, %edi
 	jne	L4
 
-There are 3 issues:
-
-1. Lack of post regalloc LICM.
-2. LSR unable to reused IV for a different type (i16 vs. i32) even though
-   the cast would be free.
+This is due to the lack of post regalloc LICM.
 
 //===---------------------------------------------------------------------===//
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4d528abb6b4..172aa5338b9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5122,6 +5122,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
 }
 
 
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+  if (!Ty1->isInteger() || !Ty2->isInteger())
+    return false;
+  return Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits();
+}
+
+
 /// isShuffleMaskLegal - Targets can use this to indicate that they only
 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 7123adaad27..b68de5a6753 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -359,6 +359,11 @@ namespace llvm {
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
 
+    /// isTruncateFree - Return true if it's free to truncate a value of
+    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+    /// register EAX to i16 by referencing its sub-register AX.
+    virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+  
     /// isShuffleMaskLegal - Targets can use this to indicate that they only
     /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
     /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index fbe3171f81f..d81ea2b7994 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -178,7 +178,7 @@ private:
     bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse,
                        const SCEVHandle *&CondStride);
     bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
-    unsigned CheckForIVReuse(bool, const SCEVHandle&,
+    unsigned CheckForIVReuse(bool, bool, const SCEVHandle&,
                              IVExpr&, const Type*,
                              const std::vector<BasedUser>& UsersToProcess);
     bool ValidStride(bool, int64_t,
@@ -980,15 +980,17 @@ bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
 
 /// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
 /// a nop.
-bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
-                                                const Type *NewTy) {
-  if (Ty == NewTy)
+bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
+                                                const Type *Ty2) {
+  if (Ty1 == Ty2)
     return false;
-  return (!Ty->canLosslesslyBitCastTo(NewTy) &&
-          !(isa<PointerType>(NewTy) &&
-            Ty->canLosslesslyBitCastTo(UIntPtrTy)) &&
-          !(isa<PointerType>(Ty) &&
-            NewTy->canLosslesslyBitCastTo(UIntPtrTy)));
+  if (TLI && TLI->isTruncateFree(Ty1, Ty2))
+    return false;
+  return (!Ty1->canLosslesslyBitCastTo(Ty2) &&
+          !(isa<PointerType>(Ty2) &&
+            Ty1->canLosslesslyBitCastTo(UIntPtrTy)) &&
+          !(isa<PointerType>(Ty1) &&
+            Ty2->canLosslesslyBitCastTo(UIntPtrTy)));
 }
 
 /// CheckForIVReuse - Returns the multiple if the stride is the multiple
@@ -997,20 +999,23 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
 /// this stride to be rewritten as prev iv * factor. It returns 0 if no
 /// reuse is possible.
 unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
+                                bool AllUsesAreAddresses,
                                 const SCEVHandle &Stride, 
                                 IVExpr &IV, const Type *Ty,
                                 const std::vector<BasedUser>& UsersToProcess) {
   if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
     int64_t SInt = SC->getValue()->getSExtValue();
-    if (SInt == 1) return 0;
-
     for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(),
            SE = IVsByStride.end(); SI != SE; ++SI) {
       int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-      if (SInt != -SSInt &&
+      if (SI->first != Stride &&
           (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
         continue;
       int64_t Scale = SInt / SSInt;
+      // When scale is 1, we don't need to worry about whether the
+      // multiplication can be folded into the addressing mode.
+      if (!AllUsesAreAddresses && Scale != 1)
+        continue;
       // Check that this stride is valid for all the types used for loads and
       // stores; if it can be used for some and not others, we might as well use
       // the original stride everywhere, since we have to create the IV for it
@@ -1021,7 +1026,7 @@ unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
           // FIXME: Only handle base == 0 for now.
           // Only reuse previous IV if it would not require a type conversion.
           if (isZero(II->Base) &&
-              !RequiresTypeConversion(II->Base->getType(),Ty)) {
+              !RequiresTypeConversion(II->Base->getType(), Ty)) {
             IV = *II;
             return Scale;
           }
@@ -1183,10 +1188,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
                    SE->getIntegerSCEV(0, Type::Int32Ty),
                    0, 0);
   unsigned RewriteFactor = 0;
-  if (AllUsesAreAddresses)
-    RewriteFactor = CheckForIVReuse(HaveCommonExprs, Stride, ReuseIV,
-                                    CommonExprs->getType(),
-                                    UsersToProcess);
+  RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
+                                  Stride, ReuseIV, CommonExprs->getType(),
+                                  UsersToProcess);
   if (RewriteFactor != 0) {
     DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
          << " and BASE " << *ReuseIV.Base << " :\n";
author	Evan Cheng <evan.cheng@apple.com>	2007-10-26 01:56:11 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2007-10-26 01:56:11 +0000
commit	7f3d02471d04bef0062790c0b79151a9a6fe8266 (patch)
tree	7cb5e4761033e60c238d53823d1e5a05c03d8a2a /llvm/lib
parent	1207cd6b1be6ccbe43a67877b4a28d4a7629b10d (diff)
download	bcm5719-llvm-7f3d02471d04bef0062790c0b79151a9a6fe8266.tar.gz bcm5719-llvm-7f3d02471d04bef0062790c0b79151a9a6fe8266.zip