[RegisterCoalescing] Remove partial redundent copy.

The patch is to solve the performance problem described in PR27827. Register coalescing sometimes cannot remove a copy because of interference. But if we can find a reverse copy in one of the predecessor block of the copy, the copy is partially redundent and we may remove the copy partially by moving it to the predecessor block without the reverse copy. Differential Revision: https://reviews.llvm.org/D28585 llvm-svn: 292292
author: Wei Mi <wmi@google.com> 2017-01-17 23:39:07 +0000
committer: Wei Mi <wmi@google.com> 2017-01-17 23:39:07 +0000
commit: 8f4178a59efa56c1f4f211f9e023e0926b04555c (patch)
tree: e8c39e98a2174b3c77df44287995926c048c4052 /llvm/test
parent: 6dede18cb12d2322532ebcfb041f463caf183d01 (diff)
download: bcm5719-llvm-8f4178a59efa56c1f4f211f9e023e0926b04555c.tar.gz
bcm5719-llvm-8f4178a59efa56c1f4f211f9e023e0926b04555c.zip
2 files changed, 173 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/pre-coalesce.ll b/llvm/test/CodeGen/X86/pre-coalesce.ll
new file mode 100644
index 00000000000..9cd6365453c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pre-coalesce.ll
@@ -0,0 +1,51 @@
+; RUN: llc -regalloc=greedy -mtriple=x86_64-unknown-linux-gnu  < %s -o - | FileCheck %s
+;
+; The test is to check no redundent mov as follows will be generated in %while.body loop.
+;  .LBB0_2:
+;    movsbl	%cl, %ecx
+;    movl	%edx, %eax   ==> This movl can be promoted outside of loop.
+;    shll	$5, %eax
+;    ...
+;    movl	%eax, %edx
+;    jne     .LBB0_2
+;
+; CHECK-LABEL: foo:
+; CHECK: [[L0:.LBB0_[0-9]+]]: # %while.body
+; CHECK: movl %[[REGA:.*]], %[[REGB:.*]]
+; CHECK-NOT: movl %[[REGB]], %[[REGA]]
+; CHECK: jne [[L0]]
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@b = common local_unnamed_addr global i8* null, align 8
+@a = common local_unnamed_addr global i32 0, align 4
+
+define i32 @foo() local_unnamed_addr {
+entry:
+  %t0 = load i8*, i8** @b, align 8
+  %t1 = load i8, i8* %t0, align 1
+  %cmp4 = icmp eq i8 %t1, 0
+  %t2 = load i32, i32* @a, align 4
+  br i1 %cmp4, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %t3 = phi i32 [ %add3, %while.body ], [ %t2, %while.body.preheader ]
+  %t4 = phi i8 [ %t5, %while.body ], [ %t1, %while.body.preheader ]
+  %conv = sext i8 %t4 to i32
+  %add = mul i32 %t3, 33
+  %add3 = add nsw i32 %add, %conv
+  store i32 %add3, i32* @a, align 4
+  %t5 = load i8, i8* %t0, align 1
+  %cmp = icmp eq i8 %t5, 0
+  br i1 %cmp, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %.lcssa = phi i32 [ %t2, %entry ], [ %add3, %while.end.loopexit ]
+  ret i32 %.lcssa
+}
diff --git a/llvm/test/CodeGen/X86/pre-coalesce.mir b/llvm/test/CodeGen/X86/pre-coalesce.mir
new file mode 100644
index 00000000000..11805fe090b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pre-coalesce.mir
@@ -0,0 +1,122 @@
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass simple-register-coalescing -o - %s | FileCheck %s
+# Check there is no partial redundent copy left in the loop after register coalescing.
+--- |
+  ; ModuleID = '<stdin>'
+  source_filename = "<stdin>"
+  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+  
+  @b = common local_unnamed_addr global i8* null, align 8
+  @a = common local_unnamed_addr global i32 0, align 4
+  
+  define i32 @foo() local_unnamed_addr {
+  entry:
+    %t0 = load i8*, i8** @b, align 8
+    %t1 = load i8, i8* %t0, align 1
+    %cmp4 = icmp eq i8 %t1, 0
+    %t2 = load i32, i32* @a, align 4
+    br i1 %cmp4, label %while.end, label %while.body.preheader
+  
+  while.body.preheader:                             ; preds = %entry
+    br label %while.body
+  
+  while.body:                                       ; preds = %while.body, %while.body.preheader
+    %t3 = phi i32 [ %add3, %while.body ], [ %t2, %while.body.preheader ]
+    %t4 = phi i8 [ %t5, %while.body ], [ %t1, %while.body.preheader ]
+    %conv = sext i8 %t4 to i32
+    %add = mul i32 %t3, 33
+    %add3 = add nsw i32 %add, %conv
+    store i32 %add3, i32* @a, align 4
+    %t5 = load i8, i8* %t0, align 1
+    %cmp = icmp eq i8 %t5, 0
+    br i1 %cmp, label %while.end, label %while.body
+  
+  while.end:                                        ; preds = %while.body, %entry
+    %.lcssa = phi i32 [ %t2, %entry ], [ %add3, %while.body ]
+    ret i32 %.lcssa
+  }
+
+...
+---
+# Check A = B and B = A copies will not exist in the loop at the same time.
+# CHECK: name: foo
+# CHECK: [[L1:bb.3.while.body]]:
+# CHECK: %[[REGA:.*]] = COPY %[[REGB:.*]]
+# CHECK-NOT: %[[REGB]] = COPY %[[REGA]]
+# CHECK: JNE_1 %[[L1]]
+
+name:            foo
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: gr64 }
+  - { id: 1, class: gr8 }
+  - { id: 2, class: gr32 }
+  - { id: 3, class: gr32 }
+  - { id: 4, class: gr8 }
+  - { id: 5, class: gr32 }
+  - { id: 6, class: gr8 }
+  - { id: 7, class: gr32 }
+  - { id: 8, class: gr32 }
+  - { id: 9, class: gr32 }
+  - { id: 10, class: gr32 }
+  - { id: 11, class: gr32 }
+  - { id: 12, class: gr8 }
+  - { id: 13, class: gr32 }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    successors: %bb.4(0x30000000), %bb.1.while.body.preheader(0x50000000)
+  
+    %0 = MOV64rm %rip, 1, _, @b, _ :: (dereferenceable load 8 from @b)
+    %12 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.t0)
+    TEST8rr %12, %12, implicit-def %eflags
+    %11 = MOV32rm %rip, 1, _, @a, _ :: (dereferenceable load 4 from @a)
+    JNE_1 %bb.1.while.body.preheader, implicit killed %eflags
+  
+  bb.4:
+    successors: %bb.3.while.end(0x80000000)
+  
+    %10 = COPY %11
+    JMP_1 %bb.3.while.end
+  
+  bb.1.while.body.preheader:
+    successors: %bb.2.while.body(0x80000000)
+
+  bb.2.while.body:
+    successors: %bb.3.while.end(0x04000000), %bb.2.while.body(0x7c000000)
+  
+    %8 = MOVSX32rr8 %12
+    %10 = COPY %11
+    %10 = SHL32ri %10, 5, implicit-def dead %eflags
+    %10 = ADD32rr %10, %11, implicit-def dead %eflags
+    %10 = ADD32rr %10, %8, implicit-def dead %eflags
+    MOV32mr %rip, 1, _, @a, _, %10 :: (store 4 into @a)
+    %12 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.t0)
+    TEST8rr %12, %12, implicit-def %eflags
+    %11 = COPY %10
+    JNE_1 %bb.2.while.body, implicit killed %eflags
+    JMP_1 %bb.3.while.end
+  
+  bb.3.while.end:
+    %eax = COPY %10
+    RET 0, killed %eax
+
+...
author	Wei Mi <wmi@google.com>	2017-01-17 23:39:07 +0000
committer	Wei Mi <wmi@google.com>	2017-01-17 23:39:07 +0000
commit	8f4178a59efa56c1f4f211f9e023e0926b04555c (patch)
tree	e8c39e98a2174b3c77df44287995926c048c4052 /llvm/test
parent	6dede18cb12d2322532ebcfb041f463caf183d01 (diff)
download	bcm5719-llvm-8f4178a59efa56c1f4f211f9e023e0926b04555c.tar.gz bcm5719-llvm-8f4178a59efa56c1f4f211f9e023e0926b04555c.zip