summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-09-05 13:40:29 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-09-05 13:40:29 +0000
commit49f9ba37d8b676ae39c050ef6d02d3d904685a53 (patch)
tree56bcdb6640a22e5873fe878781205b24729afd7d
parentbd7c45e7a827ed546912b8d7b02d80e820eb3e7b (diff)
downloadbcm5719-llvm-49f9ba37d8b676ae39c050ef6d02d3d904685a53.tar.gz
bcm5719-llvm-49f9ba37d8b676ae39c050ef6d02d3d904685a53.zip
[X86] Limit store merge size when implicitfloat is enabled (PR34421)
As suggested by @niravd : https://bugs.llvm.org/show_bug.cgi?id=34421#c2 Differential Revision: https://reviews.llvm.org/D37464 llvm-svn: 312534
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp14
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/test/CodeGen/X86/pr34421.ll40
3 files changed, 57 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 023e539250c..81eeead911d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4611,6 +4611,20 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasLZCNT();
}
+bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+ const SelectionDAG &DAG) const {
+ // Do not merge to float value size (128 bytes) if no implicit
+ // float attribute is set.
+ bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat);
+
+ if (NoFloat) {
+ unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
+ return (MemVT.getSizeInBits() <= MaxIntSize);
+ }
+ return true;
+}
+
bool X86TargetLowering::isCtlzFast() const {
return Subtarget.hasFastLZCNT();
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 53cd8ca5361..663b9532338 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -814,6 +814,9 @@ namespace llvm {
bool mergeStoresAfterLegalization() const override { return true; }
+ bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+ const SelectionDAG &DAG) const override;
+
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
diff --git a/llvm/test/CodeGen/X86/pr34421.ll b/llvm/test/CodeGen/X86/pr34421.ll
new file mode 100644
index 00000000000..5db8b4c601e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr34421.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-apple-macosx10.13.0 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s --check-prefix=X64
+
+define void @thread_selfcounts() noimplicitfloat noredzone nounwind {
+; X86-LABEL: thread_selfcounts:
+; X86: ## BB#0: ## %entry
+; X86-NEXT: subl $44, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: ## -- End function
+;
+; X64-LABEL: thread_selfcounts:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: subq $40, %rsp
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT: movq %rax, (%rsp)
+; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT: ## -- End function
+entry:
+ %counts = alloca [2 x i64], align 16
+ %thread_counts = alloca [3 x i64], align 16
+ %arraydecay = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 0
+ %0 = load i64, i64* %arraydecay, align 16
+ %arrayidx3 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 0
+ store i64 %0, i64* %arrayidx3, align 16
+ %arrayidx6 = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 1
+ %1 = load i64, i64* %arrayidx6, align 8
+ %arrayidx10 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 1
+ store i64 %1, i64* %arrayidx10, align 8
+ unreachable
+}
+
OpenPOWER on IntegriCloud