summaryrefslogtreecommitdiffstats
path: root/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon
diff options
context:
space:
mode:
authorMatt Spinler <spinler@us.ibm.com>2017-06-08 12:46:22 -0500
committerPatrick Williams <patrick@stwcx.xyz>2017-07-07 16:12:18 +0000
commita49ed3db6a46350ccb348d79caa7d4903b2ab3a8 (patch)
treed8fb63373403e58ac27dcaeb81674a5cba34f958 /meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon
parent65ab69b6a64ea954e447deb23c27e33f4745eb6a (diff)
downloadtalos-openbmc-a49ed3db6a46350ccb348d79caa7d4903b2ab3a8.tar.gz
talos-openbmc-a49ed3db6a46350ccb348d79caa7d4903b2ab3a8.zip
Monitor fan apps for watchdog protection
This overrides the fan-control and fan-monitor service files to call the fan-watchdog-monitor service on fails. The watchdog monitor service will start the obmc-fan-watchdog-takeover target to expire the fan watchdog when these apps have exceeded their retry limits. Note that fan-presence-tach and fan-control-init do not need watchdog protection because they are in the poweron path and will fail the poweron if they fail. Resolves openbmc/openbmc#1688 Change-Id: I807da09f60213104163833114a8240f5fae9053a Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Diffstat (limited to 'meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon')
-rw-r--r--meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend3
-rw-r--r--meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%/fan-watchdog-monitor.conf5
-rw-r--r--meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb4
-rw-r--r--meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog/witherspoon-fan-watchdog-monitor@.service9
4 files changed, 20 insertions, 1 deletions
diff --git a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend
index b9fc6631b..edcd8f2f3 100644
--- a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend
+++ b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend
@@ -22,3 +22,6 @@ COOLING_ENV_FMT = "obmc/phosphor-fan/phosphor-cooling-type-{0}.conf"
SYSTEMD_ENVIRONMENT_FILE_phosphor-cooling-type += "${@compose_list(d, 'COOLING_ENV_FMT', 'OBMC_CHASSIS_INSTANCES')}"
+#These 2 services are protected by the watchdog
+SYSTEMD_OVERRIDE_phosphor-fan-control += "fan-watchdog-monitor.conf:phosphor-fan-control@0.service.d/fan-watchdog-monitor.conf"
+SYSTEMD_OVERRIDE_phosphor-fan-monitor += "fan-watchdog-monitor.conf:phosphor-fan-monitor@0.service.d/fan-watchdog-monitor.conf"
diff --git a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%/fan-watchdog-monitor.conf b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%/fan-watchdog-monitor.conf
new file mode 100644
index 000000000..023e61a64
--- /dev/null
+++ b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%/fan-watchdog-monitor.conf
@@ -0,0 +1,5 @@
+[Unit]
+#These overrides allow the fan watchdog to take over when this service dies
+OnFailure=witherspoon-fan-watchdog-monitor@%n.service
+StartLimitIntervalSec=5
+StartLimitBurst=3
diff --git a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb
index cebeefda6..a74293ff8 100644
--- a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb
+++ b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb
@@ -10,6 +10,8 @@ RESET_SERVICE = "witherspoon-reset-fan-watchdog.service"
TGTFMT = "obmc-chassis-poweron@0.target"
RESET_FMT = "../${RESET_SERVICE}:${TGTFMT}.requires/${RESET_SERVICE}"
-SYSTEMD_SERVICE_${PN} += "${RESET_SERVICE}"
+MONITOR_SERVICE = "witherspoon-fan-watchdog-monitor@.service"
+
+SYSTEMD_SERVICE_${PN} += "${RESET_SERVICE} ${MONITOR_SERVICE}"
SYSTEMD_LINK_${PN} += "${RESET_FMT}"
SYSTEMD_ENVIRONMENT_FILE_${PN} += "obmc/witherspoon-fan-watchdog/witherspoon-reset-fan-watchdog.conf"
diff --git a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog/witherspoon-fan-watchdog-monitor@.service b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog/witherspoon-fan-watchdog-monitor@.service
new file mode 100644
index 000000000..9eb9697e0
--- /dev/null
+++ b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog/witherspoon-fan-watchdog-monitor@.service
@@ -0,0 +1,9 @@
+[Unit]
+Description=Fan Watchdog Failure Monitor
+
+#This can get called every time a process dies, so ensure it's never limited
+StartLimitIntervalSec=0
+
+[Service]
+Type=oneshot
+ExecStart=/usr/sbin/phosphor-unit-failure-monitor --source %i --target obmc-fan-watchdog-takeover.target --action start
OpenPOWER on IntegriCloud