summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/MachineRegisterInfo.h7
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp7
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp6
-rw-r--r--llvm/test/CodeGen/X86/addr-mode-matcher-2.ll3
-rw-r--r--llvm/test/CodeGen/X86/or-branch.ll3
-rw-r--r--llvm/test/CodeGen/X86/peephole-fold-testrr.mir88
-rw-r--r--llvm/test/CodeGen/X86/sibcall.ll6
-rw-r--r--llvm/test/CodeGen/X86/x86-shrink-wrapping.ll6
8 files changed, 110 insertions, 16 deletions
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index eb69da936d3..b5deed1f501 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -561,9 +561,14 @@ public:
}
/// hasOneNonDBGUse - Return true if there is exactly one non-Debug
- /// instruction using the specified register.
+ /// use of the specified register.
bool hasOneNonDBGUse(unsigned RegNo) const;
+ /// hasOneNonDBGUse - Return true if there is exactly one non-Debug
+ /// instruction using the specified register. Said instruction may have
+ /// multiple uses.
+ bool hasOneNonDBGUser(unsigned RegNo) const;
+
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index be4b13bcef2..f0fd0405d69 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -423,6 +423,13 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
return ++UI == use_nodbg_end();
}
+bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const {
+ use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
+ if (UI == use_instr_nodbg_end())
+ return false;
+ return ++UI == use_instr_nodbg_end();
+}
+
/// clearKillFlags - Iterate over all the uses of the given register and
/// clear the kill flag from the MachineOperand. This function is used by
/// optimization passes which extend register lifetimes and need only
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index a3f1b83b157..38512e5a1a7 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1306,7 +1306,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
/// Check whether MI is a candidate for folding into a later instruction.
/// We only fold loads to virtual registers and the virtual register defined
-/// has a single use.
+/// has a single user.
bool PeepholeOptimizer::isLoadFoldable(
MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
if (!MI.canFoldAsLoad() || !MI.mayLoad())
@@ -1316,12 +1316,12 @@ bool PeepholeOptimizer::isLoadFoldable(
return false;
unsigned Reg = MI.getOperand(0).getReg();
- // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
+ // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
// loads. It should be checked when processing uses of the load, since
// uses can be removed during peephole.
if (!MI.getOperand(0).getSubReg() &&
TargetRegisterInfo::isVirtualRegister(Reg) &&
- MRI->hasOneNonDBGUse(Reg)) {
+ MRI->hasOneNonDBGUser(Reg)) {
FoldAsLoadDefCandidates.insert(Reg);
return true;
}
diff --git a/llvm/test/CodeGen/X86/addr-mode-matcher-2.ll b/llvm/test/CodeGen/X86/addr-mode-matcher-2.ll
index 1b32ba40c66..c29f7e81158 100644
--- a/llvm/test/CodeGen/X86/addr-mode-matcher-2.ll
+++ b/llvm/test/CodeGen/X86/addr-mode-matcher-2.ll
@@ -24,8 +24,7 @@
define void @foo(i1 zeroext, i32) nounwind {
; X86-LABEL: foo:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: testb %al, %al
+; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB0_1
; X86-NEXT: # %bb.3:
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/or-branch.ll b/llvm/test/CodeGen/X86/or-branch.ll
index a1a29cf744f..8341e378990 100644
--- a/llvm/test/CodeGen/X86/or-branch.ll
+++ b/llvm/test/CodeGen/X86/or-branch.ll
@@ -8,8 +8,7 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp)
; JUMP2-NEXT: jl .LBB0_3
; JUMP2-NEXT: # %bb.1: # %entry
-; JUMP2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; JUMP2-NEXT: testl %eax, %eax
+; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; JUMP2-NEXT: je .LBB0_3
; JUMP2-NEXT: # %bb.2: # %UnifiedReturnBlock
; JUMP2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/peephole-fold-testrr.mir b/llvm/test/CodeGen/X86/peephole-fold-testrr.mir
new file mode 100644
index 00000000000..1594f52cb0c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/peephole-fold-testrr.mir
@@ -0,0 +1,88 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-- %s -o - | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+ target triple = "x86_64-unknown-linux-gnu"
+
+ define i32 @atomic(i8** %arg) {
+ %load = load atomic i8*, i8** %arg unordered, align 8
+ %cmp = icmp eq i8* %load, null
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+ }
+
+ define i32 @nonatomic_unoptimized(i8** %arg) {
+ %load = load i8*, i8** %arg, align 8
+ %cmp = icmp eq i8* %load, null
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+ }
+
+...
+---
+name: atomic
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr64 }
+ - { id: 1, class: gr64 }
+ - { id: 2, class: gr8 }
+ - { id: 3, class: gr32 }
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+machineFunctionInfo: {}
+body: |
+ bb.0 (%ir-block.0):
+ liveins: $rdi
+
+ ; CHECK-LABEL: name: atomic
+ ; CHECK: liveins: $rdi
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
+ ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load unordered 8 from %ir.arg)
+ ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+ ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+ ; CHECK: $eax = COPY [[MOVZX32rr8_]]
+ ; CHECK: RET 0, $eax
+ %0:gr64 = COPY $rdi
+ %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.arg)
+ TEST64rr %1, %1, implicit-def $eflags
+ %2:gr8 = SETCCr 4, implicit $eflags
+ %3:gr32 = MOVZX32rr8 killed %2
+ $eax = COPY %3
+ RET 0, $eax
+
+...
+---
+name: nonatomic_unoptimized
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr64 }
+ - { id: 1, class: gr64 }
+ - { id: 2, class: gr8 }
+ - { id: 3, class: gr32 }
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+machineFunctionInfo: {}
+body: |
+ bb.0 (%ir-block.0):
+ liveins: $rdi
+
+ ; CHECK-LABEL: name: nonatomic_unoptimized
+ ; CHECK: liveins: $rdi
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
+ ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 8 from %ir.arg)
+ ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+ ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+ ; CHECK: $eax = COPY [[MOVZX32rr8_]]
+ ; CHECK: RET 0, $eax
+ %0:gr64 = COPY $rdi
+ %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load 8 from %ir.arg)
+ TEST64rr %1, %1, implicit-def $eflags
+ %2:gr8 = SETCCr 4, implicit $eflags
+ %3:gr32 = MOVZX32rr8 killed %2
+ $eax = COPY %3
+ RET 0, $eax
+
+...
diff --git a/llvm/test/CodeGen/X86/sibcall.ll b/llvm/test/CodeGen/X86/sibcall.ll
index 6902460fd51..98e17b1aa60 100644
--- a/llvm/test/CodeGen/X86/sibcall.ll
+++ b/llvm/test/CodeGen/X86/sibcall.ll
@@ -264,8 +264,7 @@ declare i32 @foo4()
define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
; X86-LABEL: t11:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB11_1
; X86-NEXT: # %bb.2: # %bb
; X86-NEXT: jmp foo5 # TAILCALL
@@ -311,8 +310,7 @@ declare i32 @foo5(i32, i32, i32, i32, i32)
define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
; X86-LABEL: t12:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB12_1
; X86-NEXT: # %bb.2: # %bb
; X86-NEXT: jmp foo6 # TAILCALL
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index 77dd0753c27..8c514b3b2b7 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -1386,8 +1386,7 @@ define i32 @irreducibleCFG() #4 {
; ENABLE-NEXT: jmp LBB16_1
; ENABLE-NEXT: LBB16_2: ## %split
; ENABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax
-; ENABLE-NEXT: movl (%rax), %eax
-; ENABLE-NEXT: testl %eax, %eax
+; ENABLE-NEXT: cmpl $0, (%rax)
; ENABLE-NEXT: je LBB16_3
; ENABLE-NEXT: ## %bb.4: ## %for.body4.i
; ENABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax
@@ -1430,8 +1429,7 @@ define i32 @irreducibleCFG() #4 {
; DISABLE-NEXT: jmp LBB16_1
; DISABLE-NEXT: LBB16_2: ## %split
; DISABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax
-; DISABLE-NEXT: movl (%rax), %eax
-; DISABLE-NEXT: testl %eax, %eax
+; DISABLE-NEXT: cmpl $0, (%rax)
; DISABLE-NEXT: je LBB16_3
; DISABLE-NEXT: ## %bb.4: ## %for.body4.i
; DISABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax
OpenPOWER on IntegriCloud