summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Kuperstein <michael.m.kuperstein@intel.com>2015-08-11 08:19:43 +0000
committerMichael Kuperstein <michael.m.kuperstein@intel.com>2015-08-11 08:19:43 +0000
commit82814f63c035de29b37485d14749f4ddfb7d10d5 (patch)
treeb98ec3c7ee4c332c889cff207e3209aecda397a7
parentc186ac7aeab6ae2f37f365bea99a02d49074c26f (diff)
downloadbcm5719-llvm-82814f63c035de29b37485d14749f4ddfb7d10d5.tar.gz
bcm5719-llvm-82814f63c035de29b37485d14749f4ddfb7d10d5.zip
Allow PeepholeOptimizer to fold a few more cases
The condition for clearing the folding candidate list was clamped together with the "uninteresting instruction" condition. This is too conservative, e.g. we don't need to clear the list when encountering an IMPLICIT_DEF. Differential Revision: http://reviews.llvm.org/D11591 llvm-svn: 244577
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp9
-rw-r--r--llvm/test/CodeGen/X86/avx-cvt.ll3
-rw-r--r--llvm/test/CodeGen/X86/shift-bmi2.ll20
3 files changed, 14 insertions, 18 deletions
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 21d603b19ef..3dc73bac5b8 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1236,14 +1236,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// If there exists an instruction which belongs to the following
// categories, we will discard the load candidates.
+ if (MI->mayStore() || MI->isCall() || MI->hasUnmodeledSideEffects())
+ FoldAsLoadDefCandidates.clear();
+
if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() ||
- MI->hasUnmodeledSideEffects()) {
- FoldAsLoadDefCandidates.clear();
+ MI->hasUnmodeledSideEffects())
continue;
- }
- if (MI->mayStore() || MI->isCall())
- FoldAsLoadDefCandidates.clear();
if ((isUncoalescableCopy(*MI) &&
optimizeUncoalescableCopy(MI, LocalMIs)) ||
diff --git a/llvm/test/CodeGen/X86/avx-cvt.ll b/llvm/test/CodeGen/X86/avx-cvt.ll
index 6df3e5324c1..2f039862b36 100644
--- a/llvm/test/CodeGen/X86/avx-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx-cvt.ll
@@ -113,8 +113,7 @@ define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
define void @fpext() nounwind uwtable {
; CHECK-LABEL: fpext:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vcvtss2sd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: retq
%f = alloca float, align 4
diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll
index 63b6ec55fac..fdeddffdfb0 100644
--- a/llvm/test/CodeGen/X86/shift-bmi2.ll
+++ b/llvm/test/CodeGen/X86/shift-bmi2.ll
@@ -30,11 +30,10 @@ entry:
%x = load i32, i32* %p
%shl = shl i32 %x, %shamt
; BMI2: shl32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: shl32p
-; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
@@ -75,7 +74,7 @@ entry:
%x = load i64, i64* %p
%shl = shl i64 %x, %shamt
; BMI264: shl64p
-; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
@@ -107,11 +106,10 @@ entry:
%x = load i32, i32* %p
%shl = lshr i32 %x, %shamt
; BMI2: lshr32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: lshr32p
-; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
@@ -130,7 +128,7 @@ entry:
%x = load i64, i64* %p
%shl = lshr i64 %x, %shamt
; BMI264: lshr64p
-; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
@@ -153,10 +151,10 @@ entry:
%shl = ashr i32 %x, %shamt
; BMI2: ashr32p
; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: ashr32p
-; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
@@ -175,7 +173,7 @@ entry:
%x = load i64, i64* %p
%shl = ashr i64 %x, %shamt
; BMI264: ashr64p
-; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
OpenPOWER on IntegriCloud