diff options
| author | Wei Mi <wmi@google.com> | 2017-01-17 23:39:07 +0000 |
|---|---|---|
| committer | Wei Mi <wmi@google.com> | 2017-01-17 23:39:07 +0000 |
| commit | 8f4178a59efa56c1f4f211f9e023e0926b04555c (patch) | |
| tree | e8c39e98a2174b3c77df44287995926c048c4052 /llvm/test | |
| parent | 6dede18cb12d2322532ebcfb041f463caf183d01 (diff) | |
| download | bcm5719-llvm-8f4178a59efa56c1f4f211f9e023e0926b04555c.tar.gz bcm5719-llvm-8f4178a59efa56c1f4f211f9e023e0926b04555c.zip | |
[RegisterCoalescing] Remove partial redundent copy.
The patch is to solve the performance problem described in PR27827.
Register coalescing sometimes cannot remove a copy because of interference.
But if we can find a reverse copy in one of the predecessor block of the copy,
the copy is partially redundent and we may remove the copy partially by moving
it to the predecessor block without the reverse copy.
Differential Revision: https://reviews.llvm.org/D28585
llvm-svn: 292292
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/pre-coalesce.ll | 51 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pre-coalesce.mir | 122 |
2 files changed, 173 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/pre-coalesce.ll b/llvm/test/CodeGen/X86/pre-coalesce.ll new file mode 100644 index 00000000000..9cd6365453c --- /dev/null +++ b/llvm/test/CodeGen/X86/pre-coalesce.ll @@ -0,0 +1,51 @@ +; RUN: llc -regalloc=greedy -mtriple=x86_64-unknown-linux-gnu < %s -o - | FileCheck %s +; +; The test is to check no redundent mov as follows will be generated in %while.body loop. +; .LBB0_2: +; movsbl %cl, %ecx +; movl %edx, %eax ==> This movl can be promoted outside of loop. +; shll $5, %eax +; ... +; movl %eax, %edx +; jne .LBB0_2 +; +; CHECK-LABEL: foo: +; CHECK: [[L0:.LBB0_[0-9]+]]: # %while.body +; CHECK: movl %[[REGA:.*]], %[[REGB:.*]] +; CHECK-NOT: movl %[[REGB]], %[[REGA]] +; CHECK: jne [[L0]] +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@b = common local_unnamed_addr global i8* null, align 8 +@a = common local_unnamed_addr global i32 0, align 4 + +define i32 @foo() local_unnamed_addr { +entry: + %t0 = load i8*, i8** @b, align 8 + %t1 = load i8, i8* %t0, align 1 + %cmp4 = icmp eq i8 %t1, 0 + %t2 = load i32, i32* @a, align 4 + br i1 %cmp4, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %t3 = phi i32 [ %add3, %while.body ], [ %t2, %while.body.preheader ] + %t4 = phi i8 [ %t5, %while.body ], [ %t1, %while.body.preheader ] + %conv = sext i8 %t4 to i32 + %add = mul i32 %t3, 33 + %add3 = add nsw i32 %add, %conv + store i32 %add3, i32* @a, align 4 + %t5 = load i8, i8* %t0, align 1 + %cmp = icmp eq i8 %t5, 0 + br i1 %cmp, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %.lcssa = phi i32 [ %t2, %entry ], [ %add3, %while.end.loopexit ] + ret i32 %.lcssa +} diff --git a/llvm/test/CodeGen/X86/pre-coalesce.mir b/llvm/test/CodeGen/X86/pre-coalesce.mir new file mode 100644 index 00000000000..11805fe090b --- /dev/null +++ b/llvm/test/CodeGen/X86/pre-coalesce.mir @@ -0,0 +1,122 @@ +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass simple-register-coalescing -o - %s | FileCheck %s +# Check there is no partial redundent copy left in the loop after register coalescing. +--- | + ; ModuleID = '<stdin>' + source_filename = "<stdin>" + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + @b = common local_unnamed_addr global i8* null, align 8 + @a = common local_unnamed_addr global i32 0, align 4 + + define i32 @foo() local_unnamed_addr { + entry: + %t0 = load i8*, i8** @b, align 8 + %t1 = load i8, i8* %t0, align 1 + %cmp4 = icmp eq i8 %t1, 0 + %t2 = load i32, i32* @a, align 4 + br i1 %cmp4, label %while.end, label %while.body.preheader + + while.body.preheader: ; preds = %entry + br label %while.body + + while.body: ; preds = %while.body, %while.body.preheader + %t3 = phi i32 [ %add3, %while.body ], [ %t2, %while.body.preheader ] + %t4 = phi i8 [ %t5, %while.body ], [ %t1, %while.body.preheader ] + %conv = sext i8 %t4 to i32 + %add = mul i32 %t3, 33 + %add3 = add nsw i32 %add, %conv + store i32 %add3, i32* @a, align 4 + %t5 = load i8, i8* %t0, align 1 + %cmp = icmp eq i8 %t5, 0 + br i1 %cmp, label %while.end, label %while.body + + while.end: ; preds = %while.body, %entry + %.lcssa = phi i32 [ %t2, %entry ], [ %add3, %while.body ] + ret i32 %.lcssa + } + +... +--- +# Check A = B and B = A copies will not exist in the loop at the same time. +# CHECK: name: foo +# CHECK: [[L1:bb.3.while.body]]: +# CHECK: %[[REGA:.*]] = COPY %[[REGB:.*]] +# CHECK-NOT: %[[REGB]] = COPY %[[REGA]] +# CHECK: JNE_1 %[[L1]] + +name: foo +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr8 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } + - { id: 4, class: gr8 } + - { id: 5, class: gr32 } + - { id: 6, class: gr8 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr32 } + - { id: 11, class: gr32 } + - { id: 12, class: gr8 } + - { id: 13, class: gr32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.4(0x30000000), %bb.1.while.body.preheader(0x50000000) + + %0 = MOV64rm %rip, 1, _, @b, _ :: (dereferenceable load 8 from @b) + %12 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.t0) + TEST8rr %12, %12, implicit-def %eflags + %11 = MOV32rm %rip, 1, _, @a, _ :: (dereferenceable load 4 from @a) + JNE_1 %bb.1.while.body.preheader, implicit killed %eflags + + bb.4: + successors: %bb.3.while.end(0x80000000) + + %10 = COPY %11 + JMP_1 %bb.3.while.end + + bb.1.while.body.preheader: + successors: %bb.2.while.body(0x80000000) + + bb.2.while.body: + successors: %bb.3.while.end(0x04000000), %bb.2.while.body(0x7c000000) + + %8 = MOVSX32rr8 %12 + %10 = COPY %11 + %10 = SHL32ri %10, 5, implicit-def dead %eflags + %10 = ADD32rr %10, %11, implicit-def dead %eflags + %10 = ADD32rr %10, %8, implicit-def dead %eflags + MOV32mr %rip, 1, _, @a, _, %10 :: (store 4 into @a) + %12 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.t0) + TEST8rr %12, %12, implicit-def %eflags + %11 = COPY %10 + JNE_1 %bb.2.while.body, implicit killed %eflags + JMP_1 %bb.3.while.end + + bb.3.while.end: + %eax = COPY %10 + RET 0, killed %eax + +... |

