diff options
| author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2017-07-21 11:59:37 +0000 |
|---|---|---|
| committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2017-07-21 11:59:37 +0000 |
| commit | 024e319489daa7fe28e0d639502f2f141fb5a146 (patch) | |
| tree | c838b5343c4d602232ba351c8cb5ea4e40474798 /llvm/test/CodeGen | |
| parent | 7d2b15a7ab5607ed5f3afd85426fdd37cb0bec70 (diff) | |
| download | bcm5719-llvm-024e319489daa7fe28e0d639502f2f141fb5a146.tar.gz bcm5719-llvm-024e319489daa7fe28e0d639502f2f141fb5a146.zip | |
[SystemZ, LoopStrengthReduce]
This patch makes LSR generate better code for SystemZ in the cases of memory
intrinsics, Load->Store pairs or comparison of immediate with memory.
In order to achieve this, the following common code changes were made:
* New TTI hook: LSRWithInstrQueries(), which defaults to false. Controls if
LSR should do instruction-based addressing evaluations by calling
isLegalAddressingMode() with the Instruction pointers.
* In LoopStrengthReduce: handle address operands of memset, memmove and memcpy
as address uses, and call isFoldableMemAccessOffset() for any LSRUse::Address,
not just loads or stores.
SystemZ changes:
* isLSRCostLess() implemented with Insns first, and without ImmCost.
* New function supportedAddressingMode() that is a helper for TTI methods
looking at Instructions passed via pointers.
Review: Ulrich Weigand, Quentin Colombet
https://reviews.llvm.org/D35262
https://reviews.llvm.org/D35049
llvm-svn: 308729
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/dag-combine-01.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/loop-01.ll | 83 |
2 files changed, 83 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/SystemZ/dag-combine-01.ll b/llvm/test/CodeGen/SystemZ/dag-combine-01.ll index a56a118dada..019421cfdfe 100644 --- a/llvm/test/CodeGen/SystemZ/dag-combine-01.ll +++ b/llvm/test/CodeGen/SystemZ/dag-combine-01.ll @@ -40,7 +40,7 @@ for.body.3.lr.ph.i: ; preds = %for.body.3.lr.ph.i. for.body.3.i: ; preds = %for.body.3.i, %for.body.3.lr.ph.i ; CHECK-LABEL: .LBB0_5: ; CHECK-NOT: stfh %r{{.*}}, 0(%r{{.*}}) -; CHECK: lg %r{{.*}}, -4(%r{{.*}}) +; CHECK: lg %r{{.*}}, 8(%r{{.*}}) ; Overlapping load should go before the store %indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ] %3 = shl nsw i64 %indvars.iv.i, 6 diff --git a/llvm/test/CodeGen/SystemZ/loop-01.ll b/llvm/test/CodeGen/SystemZ/loop-01.ll index 321be4b8e62..79afc7f4198 100644 --- a/llvm/test/CodeGen/SystemZ/loop-01.ll +++ b/llvm/test/CodeGen/SystemZ/loop-01.ll @@ -9,7 +9,7 @@ define void @f1(i32 *%dest, i32 %a) { ; CHECK-LABEL: f1: ; CHECK-NOT: sllg -; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}}) +; CHECK: st %r3, 400({{%r[1-5],%r[1-5]}}) ; CHECK: br %r14 entry: br label %loop @@ -239,3 +239,84 @@ for.body: ; preds = %for.body.preheader, %for.body %exitcond = icmp eq i32 %lftr.wideiv, %S br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } + +; Test that a memcpy loop does not get a lot of lays before each mvc (D12 and no index-reg). +%0 = type { %1, %2* } +%1 = type { %2*, %2* } +%2 = type <{ %3, i32, [4 x i8] }> +%3 = type { i16*, i16*, i16* } + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0 + +define void @f8() { +; CHECK-Z13-LABEL: f8: +; CHECK-Z13: mvc +; CHECK-Z13-NEXT: mvc +; CHECK-Z13-NEXT: mvc +; CHECK-Z13-NEXT: mvc + +bb: + %tmp = load %0*, %0** undef, align 8 + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + br label %bb2 + +bb2: ; preds = %bb1, %bb + %tmp3 = phi %0* [ %tmp, %bb ], [ undef, %bb1 ] + %tmp4 = phi %0* [ undef, %bb ], [ undef, %bb1 ] + br label %bb5 + +bb5: ; preds = %bb5, %bb2 + %tmp6 = phi %0* [ %tmp21, %bb5 ], [ %tmp3, %bb2 ] + %tmp7 = phi %0* [ %tmp20, %bb5 ], [ %tmp4, %bb2 ] + %tmp8 = getelementptr inbounds %0, %0* %tmp7, i64 -1 + %tmp9 = getelementptr inbounds %0, %0* %tmp6, i64 -1 + %tmp10 = bitcast %0* %tmp9 to i8* + %tmp11 = bitcast %0* %tmp8 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 24, i32 8, i1 false) + %tmp12 = getelementptr inbounds %0, %0* %tmp7, i64 -2 + %tmp13 = getelementptr inbounds %0, %0* %tmp6, i64 -2 + %tmp14 = bitcast %0* %tmp13 to i8* + %tmp15 = bitcast %0* %tmp12 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 8, i1 false) + %tmp16 = getelementptr inbounds %0, %0* %tmp7, i64 -3 + %tmp17 = getelementptr inbounds %0, %0* %tmp6, i64 -3 + %tmp18 = bitcast %0* %tmp17 to i8* + %tmp19 = bitcast %0* %tmp16 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp18, i8* %tmp19, i64 24, i32 8, i1 false) + %tmp20 = getelementptr inbounds %0, %0* %tmp7, i64 -4 + %tmp21 = getelementptr inbounds %0, %0* %tmp6, i64 -4 + %tmp22 = bitcast %0* %tmp21 to i8* + %tmp23 = bitcast %0* %tmp20 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp22, i8* %tmp23, i64 24, i32 8, i1 false) + br label %bb5 +} + +; Test that a chsi does not need an aghik inside the loop (no index reg) +define void @f9() { +; CHECK-Z13-LABEL: f9: +; CHECK-Z13: # =>This Inner Loop Header: Depth=1 +; CHECK-Z13-NOT: aghik +; CHECK-Z13: chsi + +entry: + br label %for.body.i63 + +for.body.i63: ; preds = %for.inc.i, %entry + %indvars.iv155.i = phi i64 [ 0, %entry ], [ %indvars.iv.next156.i.3, %for.inc.i ] + %arrayidx.i62 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv155.i + %tmp = load i32, i32* %arrayidx.i62, align 4 + %cmp9.i = icmp eq i32 %tmp, 0 + br i1 %cmp9.i, label %for.inc.i, label %if.then10.i + +if.then10.i: ; preds = %for.body.i63 + unreachable + +for.inc.i: ; preds = %for.body.i63 + %indvars.iv.next156.i = or i64 %indvars.iv155.i, 1 + %arrayidx.i62.1 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv.next156.i + %tmp1 = load i32, i32* %arrayidx.i62.1, align 4 + %indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4 + br label %for.body.i63 +} |

