[SystemZ, LoopStrengthReduce]

This patch makes LSR generate better code for SystemZ in the cases of memory intrinsics, Load->Store pairs or comparison of immediate with memory. In order to achieve this, the following common code changes were made: * New TTI hook: LSRWithInstrQueries(), which defaults to false. Controls if LSR should do instruction-based addressing evaluations by calling isLegalAddressingMode() with the Instruction pointers. * In LoopStrengthReduce: handle address operands of memset, memmove and memcpy as address uses, and call isFoldableMemAccessOffset() for any LSRUse::Address, not just loads or stores. SystemZ changes: * isLSRCostLess() implemented with Insns first, and without ImmCost. * New function supportedAddressingMode() that is a helper for TTI methods looking at Instructions passed via pointers. Review: Ulrich Weigand, Quentin Colombet https://reviews.llvm.org/D35262 https://reviews.llvm.org/D35049 llvm-svn: 308729
author: Jonas Paulsson <paulsson@linux.vnet.ibm.com> 2017-07-21 11:59:37 +0000
committer: Jonas Paulsson <paulsson@linux.vnet.ibm.com> 2017-07-21 11:59:37 +0000
commit: 024e319489daa7fe28e0d639502f2f141fb5a146 (patch)
tree: c838b5343c4d602232ba351c8cb5ea4e40474798 /llvm/test/CodeGen
parent: 7d2b15a7ab5607ed5f3afd85426fdd37cb0bec70 (diff)
download: bcm5719-llvm-024e319489daa7fe28e0d639502f2f141fb5a146.tar.gz
bcm5719-llvm-024e319489daa7fe28e0d639502f2f141fb5a146.zip
2 files changed, 83 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/SystemZ/dag-combine-01.ll b/llvm/test/CodeGen/SystemZ/dag-combine-01.ll
index a56a118dada..019421cfdfe 100644
--- a/llvm/test/CodeGen/SystemZ/dag-combine-01.ll
+++ b/llvm/test/CodeGen/SystemZ/dag-combine-01.ll
@@ -40,7 +40,7 @@ for.body.3.lr.ph.i:                               ; preds = %for.body.3.lr.ph.i.
 for.body.3.i:                                     ; preds = %for.body.3.i, %for.body.3.lr.ph.i
 ; CHECK-LABEL: .LBB0_5:
 ; CHECK-NOT:    stfh    %r{{.*}}, 0(%r{{.*}})
-; CHECK:        lg      %r{{.*}}, -4(%r{{.*}})
+; CHECK:        lg      %r{{.*}}, 8(%r{{.*}})
 ; Overlapping load should go before the store
   %indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ]
   %3 = shl nsw i64 %indvars.iv.i, 6
diff --git a/llvm/test/CodeGen/SystemZ/loop-01.ll b/llvm/test/CodeGen/SystemZ/loop-01.ll
index 321be4b8e62..79afc7f4198 100644
--- a/llvm/test/CodeGen/SystemZ/loop-01.ll
+++ b/llvm/test/CodeGen/SystemZ/loop-01.ll
@@ -9,7 +9,7 @@
 define void @f1(i32 *%dest, i32 %a) {
 ; CHECK-LABEL: f1:
 ; CHECK-NOT: sllg
-; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
+; CHECK: st %r3, 400({{%r[1-5],%r[1-5]}})
 ; CHECK: br %r14
 entry:
   br label %loop
@@ -239,3 +239,84 @@ for.body:                           ; preds = %for.body.preheader, %for.body
   %exitcond = icmp eq i32 %lftr.wideiv, %S
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
+
+; Test that a memcpy loop does not get a lot of lays before each mvc (D12 and no index-reg).
+%0 = type { %1, %2* }
+%1 = type { %2*, %2* }
+%2 = type <{ %3, i32, [4 x i8] }>
+%3 = type { i16*, i16*, i16* }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0
+
+define void @f8() {
+; CHECK-Z13-LABEL: f8:
+; CHECK-Z13: mvc
+; CHECK-Z13-NEXT: mvc
+; CHECK-Z13-NEXT: mvc
+; CHECK-Z13-NEXT: mvc
+
+bb:
+  %tmp = load %0*, %0** undef, align 8
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp3 = phi %0* [ %tmp, %bb ], [ undef, %bb1 ]
+  %tmp4 = phi %0* [ undef, %bb ], [ undef, %bb1 ]
+  br label %bb5
+
+bb5:                                              ; preds = %bb5, %bb2
+  %tmp6 = phi %0* [ %tmp21, %bb5 ], [ %tmp3, %bb2 ]
+  %tmp7 = phi %0* [ %tmp20, %bb5 ], [ %tmp4, %bb2 ]
+  %tmp8 = getelementptr inbounds %0, %0* %tmp7, i64 -1
+  %tmp9 = getelementptr inbounds %0, %0* %tmp6, i64 -1
+  %tmp10 = bitcast %0* %tmp9 to i8*
+  %tmp11 = bitcast %0* %tmp8 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 24, i32 8, i1 false)
+  %tmp12 = getelementptr inbounds %0, %0* %tmp7, i64 -2
+  %tmp13 = getelementptr inbounds %0, %0* %tmp6, i64 -2
+  %tmp14 = bitcast %0* %tmp13 to i8*
+  %tmp15 = bitcast %0* %tmp12 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 8, i1 false)
+  %tmp16 = getelementptr inbounds %0, %0* %tmp7, i64 -3
+  %tmp17 = getelementptr inbounds %0, %0* %tmp6, i64 -3
+  %tmp18 = bitcast %0* %tmp17 to i8*
+  %tmp19 = bitcast %0* %tmp16 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp18, i8* %tmp19, i64 24, i32 8, i1 false)
+  %tmp20 = getelementptr inbounds %0, %0* %tmp7, i64 -4
+  %tmp21 = getelementptr inbounds %0, %0* %tmp6, i64 -4
+  %tmp22 = bitcast %0* %tmp21 to i8*
+  %tmp23 = bitcast %0* %tmp20 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp22, i8* %tmp23, i64 24, i32 8, i1 false)
+  br label %bb5
+}
+
+; Test that a chsi does not need an aghik inside the loop (no index reg)
+define void @f9() {
+; CHECK-Z13-LABEL: f9:
+; CHECK-Z13: # =>This Inner Loop Header: Depth=1
+; CHECK-Z13-NOT: aghik
+; CHECK-Z13: chsi
+
+entry:
+  br label %for.body.i63
+
+for.body.i63:                                     ; preds = %for.inc.i, %entry
+  %indvars.iv155.i = phi i64 [ 0, %entry ], [ %indvars.iv.next156.i.3, %for.inc.i ]
+  %arrayidx.i62 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv155.i
+  %tmp = load i32, i32* %arrayidx.i62, align 4
+  %cmp9.i = icmp eq i32 %tmp, 0
+  br i1 %cmp9.i, label %for.inc.i, label %if.then10.i
+
+if.then10.i:                                      ; preds = %for.body.i63
+  unreachable
+
+for.inc.i:                                        ; preds = %for.body.i63
+  %indvars.iv.next156.i = or i64 %indvars.iv155.i, 1
+  %arrayidx.i62.1 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv.next156.i
+  %tmp1 = load i32, i32* %arrayidx.i62.1, align 4
+  %indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4
+  br label %for.body.i63
+}
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>	2017-07-21 11:59:37 +0000
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>	2017-07-21 11:59:37 +0000
commit	024e319489daa7fe28e0d639502f2f141fb5a146 (patch)
tree	c838b5343c4d602232ba351c8cb5ea4e40474798 /llvm/test/CodeGen
parent	7d2b15a7ab5607ed5f3afd85426fdd37cb0bec70 (diff)
download	bcm5719-llvm-024e319489daa7fe28e0d639502f2f141fb5a146.tar.gz bcm5719-llvm-024e319489daa7fe28e0d639502f2f141fb5a146.zip