summaryrefslogtreecommitdiffstats
path: root/meta-phosphor/common/recipes-core
diff options
context:
space:
mode:
authorAndrew Geissler <andrewg@us.ibm.com>2017-09-07 14:32:45 -0500
committerBrad Bishop <bradleyb@fuzziesquirrel.com>2017-09-26 00:38:31 +0000
commit1b3762aeea3f6a41c95a0f94a674121ea6118cc5 (patch)
treea48b8f737f50751572a0fadbaff0c9f88eac6d0a /meta-phosphor/common/recipes-core
parent7b2b5df7399d546f53d0ad10ddbd3f273bced373 (diff)
downloadtalos-openbmc-1b3762aeea3f6a41c95a0f94a674121ea6118cc5.tar.gz
talos-openbmc-1b3762aeea3f6a41c95a0f94a674121ea6118cc5.zip
Set default restart policy for phosphor services
Reduce number of service restarts and increase wait time between restarts. - Change the StartLimitBurst to 3 Five just seems excessive for our services in openbmc. In all fail scenarios seen so far (other then with phosphor-hwmon), either restarting once does the job or restarting all 5 times does not help and we just end up hitting the 5 limit anyway. - Change the RestartSec from 100ms to 1s. When a service hits a failure, our new debug collection service kicks in. When a core file is involved, it's been found that generating 5 core files within ~500ms puts a huge strain on the BMC. Also, if the bmc is going to get a fix on a restart of a service, the more time the better (think retries on device driver scenarios). Change-Id: I0e5bb3f32022cdb72d00f1a861a69653ef937cf1 Signed-off-by: Andrew Geissler <andrewg@us.ibm.com>
Diffstat (limited to 'meta-phosphor/common/recipes-core')
-rw-r--r--meta-phosphor/common/recipes-core/systemd/systemd/service-restart-policy.conf20
-rw-r--r--meta-phosphor/common/recipes-core/systemd/systemd_%.bbappend3
2 files changed, 23 insertions, 0 deletions
diff --git a/meta-phosphor/common/recipes-core/systemd/systemd/service-restart-policy.conf b/meta-phosphor/common/recipes-core/systemd/systemd/service-restart-policy.conf
new file mode 100644
index 000000000..0f950733d
--- /dev/null
+++ b/meta-phosphor/common/recipes-core/systemd/systemd/service-restart-policy.conf
@@ -0,0 +1,20 @@
+# This file overrides some defaults for systemd
+#
+# - Change the RestartSec from 100ms to 1s.
+# When a service hits a failure, our new debug collection service kicks
+# in. When a core file is involved, it's been found that generating 5 core
+# files within ~500ms puts a huge strain on the BMC. Also, if the bmc is
+# going to get a fix on a restart of a service, the more time the better
+# (think retries on device driver scenarios).
+#
+# - Change the StartLimitBurst to 3
+# Five just seems excessive for our services in openbmc. In all fail
+# scenarios seen so far (other then with phosphor-hwmon), either
+# restarting once does the job or restarting all 5 times does not help
+# and we just end up hitting the 5 limit anyway.
+#
+# See systemd-system.conf(5) for details on the conf files
+
+[Manager]
+DefaultRestartSec=1s
+DefaultStartLimitBurst=3
diff --git a/meta-phosphor/common/recipes-core/systemd/systemd_%.bbappend b/meta-phosphor/common/recipes-core/systemd/systemd_%.bbappend
index 74a196d36..1003e9ead 100644
--- a/meta-phosphor/common/recipes-core/systemd/systemd_%.bbappend
+++ b/meta-phosphor/common/recipes-core/systemd/systemd_%.bbappend
@@ -6,6 +6,7 @@ PACKAGECONFIG_remove = "machined hibernate ldconfig binfmt backlight localed \
quotacheck kdbus ima smack polkit logind bootchart utmp"
FILESEXTRAPATHS_append := "${THISDIR}/${PN}:"
SRC_URI += "file://default.network"
+SRC_URI += "file://service-restart-policy.conf"
SRC_URI += "file://0001-Export-message_append_cmdline.patch"
SRC_URI += "file://0002-systemd-Make-pam-compile-shared-library.patch"
SRC_URI += "file://0003-basic-Factor-out-string-checking-from-name_to_prefix.patch"
@@ -18,11 +19,13 @@ SRC_URI += "file://0007-journal-Add-Synchronize-dbus-method.patch"
RRECOMMENDS_${PN} += "obmc-targets"
FILES_${PN} += "${libdir}/systemd/network/default.network"
+FILES_${PN} += "${libdir}/systemd/system.conf.d/service-restart-policy.conf"
EXTRA_OECONF += " --disable-hwdb"
do_install_append() {
install -m 644 ${WORKDIR}/default.network ${D}${libdir}/systemd/network/
+ install -m 644 -D ${WORKDIR}/service-restart-policy.conf ${D}${libdir}/systemd/system.conf.d/service-restart-policy.conf
#TODO Remove after this issue is resolved
#https://github.com/openbmc/openbmc/issues/152
OpenPOWER on IntegriCloud