summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-09-30 06:24:14 +0000
committerNadav Rotem <nrotem@apple.com>2012-09-30 06:24:14 +0000
commit45715b25f79ebdff1a187284d5c9830fd69e09b6 (patch)
tree5a2d705d9870c2d0d6f1d58fd7f18a8b474c0537 /llvm/test
parentdda435bcefac9d6c126e2d7d0411751ac31fa428 (diff)
downloadbcm5719-llvm-45715b25f79ebdff1a187284d5c9830fd69e09b6.tar.gz
bcm5719-llvm-45715b25f79ebdff1a187284d5c9830fd69e09b6.zip
A DAGCombine optimization for merging consecutive stores. This optimization is not profitable in many cases
because moden processos can store multiple values in parallel, and preparing the consecutive store requires some work. We only handle these cases: 1. Consecutive stores where the values and consecutive loads. For example: int a = p->a; int b = p->b; q->a = a; q->b = b; 2. Consecutive stores where the values are constants. Foe example: q->a = 4; q->b = 5; llvm-svn: 164910
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/MergeConsecutiveStores.ll150
-rw-r--r--llvm/test/CodeGen/X86/loop-strength-reduce-2.ll10
-rw-r--r--llvm/test/CodeGen/X86/loop-strength-reduce-3.ll5
-rw-r--r--llvm/test/CodeGen/X86/loop-strength-reduce.ll5
4 files changed, 158 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
new file mode 100644
index 00000000000..435f38c8adb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -0,0 +1,150 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
+
+@a = common global [10000 x %struct.A] zeroinitializer, align 8
+
+; Move all of the constants using a single vector store.
+; CHECK: merge_const_store
+; CHECK: movq %xmm0
+; CHECK: ret
+define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
+ %1 = icmp sgt i32 %count, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+.lr.ph:
+ %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+ %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
+ %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+ store i8 1, i8* %2, align 1
+ %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+ store i8 2, i8* %3, align 1
+ %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
+ store i8 3, i8* %4, align 1
+ %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
+ store i8 4, i8* %5, align 1
+ %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
+ store i8 5, i8* %6, align 1
+ %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
+ store i8 6, i8* %7, align 1
+ %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
+ store i8 7, i8* %8, align 1
+ %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
+ store i8 8, i8* %9, align 1
+ %10 = add nsw i32 %i.02, 1
+ %11 = getelementptr inbounds %struct.A* %.01, i64 1
+ %exitcond = icmp eq i32 %10, %count
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+._crit_edge:
+ ret void
+}
+
+; Move the first 4 constants as a single vector. Move the rest as scalars.
+; CHECK: merge_nonconst_store
+; CHECK: movd %xmm0
+; CHECK: movb
+; CHECK: movb
+; CHECK: movb
+; CHECK: movb
+; CHECK: ret
+define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
+ %1 = icmp sgt i32 %count, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+.lr.ph:
+ %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+ %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
+ %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+ store i8 1, i8* %2, align 1
+ %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+ store i8 2, i8* %3, align 1
+ %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
+ store i8 3, i8* %4, align 1
+ %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
+ store i8 4, i8* %5, align 1
+ %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
+ store i8 %zz, i8* %6, align 1 ; <----------- Not a const;
+ %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
+ store i8 6, i8* %7, align 1
+ %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
+ store i8 7, i8* %8, align 1
+ %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
+ store i8 8, i8* %9, align 1
+ %10 = add nsw i32 %i.02, 1
+ %11 = getelementptr inbounds %struct.A* %.01, i64 1
+ %exitcond = icmp eq i32 %10, %count
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+._crit_edge:
+ ret void
+}
+
+
+;CHECK: merge_loads
+; load:
+;CHECK: movw
+; store:
+;CHECK: movw
+;CHECK: ret
+define void @merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
+ %1 = icmp sgt i32 %count, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0
+ %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
+ %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
+ br label %4
+
+; <label>:4 ; preds = %4, %.lr.ph
+ %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
+ %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
+ %5 = load i8* %2, align 1
+ %6 = load i8* %3, align 1
+ %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+ store i8 %5, i8* %7, align 1
+ %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+ store i8 %6, i8* %8, align 1
+ %9 = add nsw i32 %i.02, 1
+ %10 = getelementptr inbounds %struct.A* %.01, i64 1
+ %exitcond = icmp eq i32 %9, %count
+ br i1 %exitcond, label %._crit_edge, label %4
+
+._crit_edge: ; preds = %4, %0
+ ret void
+}
+
+; The loads and the stores are interleved. Can't merge them.
+;CHECK: no_merge_loads
+;CHECK: movb
+;CHECK: movb
+;CHECK: movb
+;CHECK: movb
+;CHECK: ret
+define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
+ %1 = icmp sgt i32 %count, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0
+ %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
+ %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
+ br label %a4
+
+a4: ; preds = %4, %.lr.ph
+ %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
+ %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
+ %a5 = load i8* %2, align 1
+ %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+ store i8 %a5, i8* %a7, align 1
+ %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+ %a6 = load i8* %3, align 1
+ store i8 %a6, i8* %a8, align 1
+ %a9 = add nsw i32 %i.02, 1
+ %a10 = getelementptr inbounds %struct.A* %.01, i64 1
+ %exitcond = icmp eq i32 %a9, %count
+ br i1 %exitcond, label %._crit_edge, label %a4
+
+._crit_edge: ; preds = %4, %0
+ ret void
+}
+
+
diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce-2.ll b/llvm/test/CodeGen/X86/loop-strength-reduce-2.ll
index b546462b684..b094fed2f67 100644
--- a/llvm/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/llvm/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -1,20 +1,18 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
-; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s -check-prefix=STATIC
;
; Make sure the common loop invariant A is hoisted up to preheader,
; since too many registers are needed to subsume it into the addressing modes.
; It's safe to sink A in when it's not pic.
; PIC: align
-; PIC: movl $4, -4([[REG:%e[a-z]+]])
-; PIC: movl $5, ([[REG]])
+; PIC: movlpd %xmm0, -4([[REG:%e[a-z]+]])
; PIC: addl $4, [[REG]]
; PIC: decl {{%e[[a-z]+}}
; PIC: jne
; STATIC: align
-; STATIC: movl $4, -4(%ecx)
-; STATIC: movl $5, (%ecx)
+; STATIC: movlpd %xmm0, -4(%ecx)
; STATIC: addl $4, %ecx
; STATIC: decl %eax
; STATIC: jne
diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce-3.ll b/llvm/test/CodeGen/X86/loop-strength-reduce-3.ll
index b1c9fb9c077..e2d77f2a6fd 100644
--- a/llvm/test/CodeGen/X86/loop-strength-reduce-3.ll
+++ b/llvm/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck %s
; CHECK: align
-; CHECK: movl $4, -4(%ecx)
-; CHECK: movl $5, (%ecx)
+; CHECK: movlpd %xmm0, -4(%ecx)
; CHECK: addl $4, %ecx
; CHECK: decl %eax
; CHECK: jne
diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce.ll b/llvm/test/CodeGen/X86/loop-strength-reduce.ll
index 42c6ac4983d..d197451eee3 100644
--- a/llvm/test/CodeGen/X86/loop-strength-reduce.ll
+++ b/llvm/test/CodeGen/X86/loop-strength-reduce.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s
; CHECK: align
-; CHECK: movl $4, -4(%ecx)
-; CHECK: movl $5, (%ecx)
+; CHECK: movlpd %xmm0, -4(%ecx)
; CHECK: addl $4, %ecx
; CHECK: decl %eax
; CHECK: jne
OpenPOWER on IntegriCloud