diff options
author | Matt Spinler <spinler@us.ibm.com> | 2017-06-08 12:46:22 -0500 |
---|---|---|
committer | Patrick Williams <patrick@stwcx.xyz> | 2017-07-07 16:12:18 +0000 |
commit | a49ed3db6a46350ccb348d79caa7d4903b2ab3a8 (patch) | |
tree | d8fb63373403e58ac27dcaeb81674a5cba34f958 /meta-openbmc-machines/meta-openpower | |
parent | 65ab69b6a64ea954e447deb23c27e33f4745eb6a (diff) | |
download | talos-openbmc-a49ed3db6a46350ccb348d79caa7d4903b2ab3a8.tar.gz talos-openbmc-a49ed3db6a46350ccb348d79caa7d4903b2ab3a8.zip |
Monitor fan apps for watchdog protection
This overrides the fan-control and fan-monitor service files
to call the fan-watchdog-monitor service on fails.
The watchdog monitor service will start the obmc-fan-watchdog-takeover
target to expire the fan watchdog when these apps have exceeded
their retry limits.
Note that fan-presence-tach and fan-control-init do not need watchdog
protection because they are in the poweron path and will fail the
poweron if they fail.
Resolves openbmc/openbmc#1688
Change-Id: I807da09f60213104163833114a8240f5fae9053a
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Diffstat (limited to 'meta-openbmc-machines/meta-openpower')
4 files changed, 20 insertions, 1 deletions
diff --git a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend index b9fc6631b..edcd8f2f3 100644 --- a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend +++ b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%.bbappend @@ -22,3 +22,6 @@ COOLING_ENV_FMT = "obmc/phosphor-fan/phosphor-cooling-type-{0}.conf" SYSTEMD_ENVIRONMENT_FILE_phosphor-cooling-type += "${@compose_list(d, 'COOLING_ENV_FMT', 'OBMC_CHASSIS_INSTANCES')}" +#These 2 services are protected by the watchdog +SYSTEMD_OVERRIDE_phosphor-fan-control += "fan-watchdog-monitor.conf:phosphor-fan-control@0.service.d/fan-watchdog-monitor.conf" +SYSTEMD_OVERRIDE_phosphor-fan-monitor += "fan-watchdog-monitor.conf:phosphor-fan-monitor@0.service.d/fan-watchdog-monitor.conf" diff --git a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%/fan-watchdog-monitor.conf b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%/fan-watchdog-monitor.conf new file mode 100644 index 000000000..023e61a64 --- /dev/null +++ b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/phosphor-fan%/fan-watchdog-monitor.conf @@ -0,0 +1,5 @@ +[Unit] +#These overrides allow the fan watchdog to take over when this service dies +OnFailure=witherspoon-fan-watchdog-monitor@%n.service +StartLimitIntervalSec=5 +StartLimitBurst=3 diff --git a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb index cebeefda6..a74293ff8 100644 --- a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb +++ b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog.bb @@ -10,6 +10,8 @@ RESET_SERVICE = "witherspoon-reset-fan-watchdog.service" TGTFMT = "obmc-chassis-poweron@0.target" RESET_FMT = "../${RESET_SERVICE}:${TGTFMT}.requires/${RESET_SERVICE}" -SYSTEMD_SERVICE_${PN} += "${RESET_SERVICE}" +MONITOR_SERVICE = "witherspoon-fan-watchdog-monitor@.service" + +SYSTEMD_SERVICE_${PN} += "${RESET_SERVICE} ${MONITOR_SERVICE}" SYSTEMD_LINK_${PN} += "${RESET_FMT}" SYSTEMD_ENVIRONMENT_FILE_${PN} += "obmc/witherspoon-fan-watchdog/witherspoon-reset-fan-watchdog.conf" diff --git a/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog/witherspoon-fan-watchdog-monitor@.service b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog/witherspoon-fan-watchdog-monitor@.service new file mode 100644 index 000000000..9eb9697e0 --- /dev/null +++ b/meta-openbmc-machines/meta-openpower/meta-ibm/meta-witherspoon/recipes-phosphor/fans/witherspoon-fan-watchdog/witherspoon-fan-watchdog-monitor@.service @@ -0,0 +1,9 @@ +[Unit] +Description=Fan Watchdog Failure Monitor + +#This can get called every time a process dies, so ensure it's never limited +StartLimitIntervalSec=0 + +[Service] +Type=oneshot +ExecStart=/usr/sbin/phosphor-unit-failure-monitor --source %i --target obmc-fan-watchdog-takeover.target --action start |