diff options
| author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2016-08-17 13:24:19 +0000 |
|---|---|---|
| committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2016-08-17 13:24:19 +0000 |
| commit | 7a79422536d8ca8779fef9e78911def1d7eaf6f9 (patch) | |
| tree | bc4492cac01e82649e517ab96769d5c178778107 /llvm/test/CodeGen/SystemZ/loop-01.ll | |
| parent | a086b9fd15161fb50043b0921be4b2e11018b0cb (diff) | |
| download | bcm5719-llvm-7a79422536d8ca8779fef9e78911def1d7eaf6f9.tar.gz bcm5719-llvm-7a79422536d8ca8779fef9e78911def1d7eaf6f9.zip | |
[LoopStrenghtReduce] Refactoring and addition of a new target cost function.
Refactored so that a LSRUse owns its fixups, as oppsed to letting the
LSRInstance own them. This makes it easier to rate formulas for
LSRUses, since the fixups are available directly. The Offsets vector
has been removed since it was no longer necessary.
New target hook isFoldableMemAccessOffset(), which is used during formula
rating.
For SystemZ, this is useful to express that loads and stores with
float or vector types with a big/negative offset should be avoided in
loops. Without this, LSR will generate a lot of negative offsets that
would require extra instructions for loading the address.
Updated tests:
test/CodeGen/SystemZ/loop-01.ll
Reviewed by: Quentin Colombet and Ulrich Weigand.
https://reviews.llvm.org/D19152
llvm-svn: 278927
Diffstat (limited to 'llvm/test/CodeGen/SystemZ/loop-01.ll')
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/loop-01.ll | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/SystemZ/loop-01.ll b/llvm/test/CodeGen/SystemZ/loop-01.ll index b51c96d52e3..321be4b8e62 100644 --- a/llvm/test/CodeGen/SystemZ/loop-01.ll +++ b/llvm/test/CodeGen/SystemZ/loop-01.ll @@ -1,6 +1,8 @@ ; Test loop tuning. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-Z13 ; Test that strength reduction is applied to addresses with a scale factor, ; but that indexed addressing can still be used. @@ -122,3 +124,118 @@ loop.next: exit: ret void } + +; Test that negative offsets are avoided for loads of floating point. +%s.float = type { float, float, float } +define void @f5(%s.float* nocapture %a, + %s.float* nocapture readonly %b, + i32 zeroext %S) { +; CHECK-Z13-LABEL: f5: +; CHECK-Z13-NOT: -{{[0-9]+}}(%r + +entry: + %cmp9 = icmp eq i32 %S, 0 + br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %a1 = getelementptr inbounds %s.float, %s.float* %b, i64 %indvars.iv, i32 0 + %tmp = load float, float* %a1, align 4 + %b4 = getelementptr inbounds %s.float, %s.float* %b, i64 %indvars.iv, i32 1 + %tmp1 = load float, float* %b4, align 4 + %add = fadd float %tmp, %tmp1 + %c = getelementptr inbounds %s.float, %s.float* %b, i64 %indvars.iv, i32 2 + %tmp2 = load float, float* %c, align 4 + %add7 = fadd float %add, %tmp2 + %a10 = getelementptr inbounds %s.float, %s.float* %a, i64 %indvars.iv, i32 0 + store float %add7, float* %a10, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %S + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +; Test that negative offsets are avoided for loads of double. +%s.double = type { double, double, double } +define void @f6(%s.double* nocapture %a, + %s.double* nocapture readonly %b, + i32 zeroext %S) { +; CHECK-Z13-LABEL: f6: +; CHECK-Z13-NOT: -{{[0-9]+}}(%r +entry: + %cmp9 = icmp eq i32 %S, 0 + br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %a1 = getelementptr inbounds %s.double, %s.double* %b, i64 %indvars.iv, i32 0 + %tmp = load double, double* %a1, align 4 + %b4 = getelementptr inbounds %s.double, %s.double* %b, i64 %indvars.iv, i32 1 + %tmp1 = load double, double* %b4, align 4 + %add = fadd double %tmp, %tmp1 + %c = getelementptr inbounds %s.double, %s.double* %b, i64 %indvars.iv, i32 2 + %tmp2 = load double, double* %c, align 4 + %add7 = fadd double %add, %tmp2 + %a10 = getelementptr inbounds %s.double, %s.double* %a, i64 %indvars.iv, i32 0 + store double %add7, double* %a10, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %S + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +; Test that negative offsets are avoided for memory accesses of vector type. +%s.vec = type { <4 x i32>, <4 x i32>, <4 x i32> } +define void @f7(%s.vec* nocapture %a, + %s.vec* nocapture readonly %b, + i32 zeroext %S) { +; CHECK-Z13-LABEL: f7: +; CHECK-Z13-NOT: -{{[0-9]+}}(%r +entry: + %cmp9 = icmp eq i32 %S, 0 + br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %a1 = getelementptr inbounds %s.vec, %s.vec* %b, i64 %indvars.iv, i32 0 + %tmp = load <4 x i32>, <4 x i32>* %a1, align 4 + %b4 = getelementptr inbounds %s.vec, %s.vec* %b, i64 %indvars.iv, i32 1 + %tmp1 = load <4 x i32>, <4 x i32>* %b4, align 4 + %add = add <4 x i32> %tmp1, %tmp + %c = getelementptr inbounds %s.vec, %s.vec* %b, i64 %indvars.iv, i32 2 + %tmp2 = load <4 x i32>, <4 x i32>* %c, align 4 + %add7 = add <4 x i32> %add, %tmp2 + %a10 = getelementptr inbounds %s.vec, %s.vec* %a, i64 %indvars.iv, i32 0 + store <4 x i32> %add7, <4 x i32>* %a10, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %S + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} |

