summaryrefslogtreecommitdiffstats
path: root/occ_errors.cpp
diff options
context:
space:
mode:
authorAndrew Geissler <geissonator@yahoo.com>2018-01-17 11:07:01 -0800
committerBrad Bishop <bradleyb@fuzziesquirrel.com>2018-01-24 02:53:51 +0000
commit1111087e39e61fb220b987542643ad9bf0a100c7 (patch)
treebf1f8254a5f8a386b1b9bf447e052f2f22369dcd /occ_errors.cpp
parent777cee935865a7e2dcce4eb09b9834cf032d85f9 (diff)
downloadopenpower-occ-control-1111087e39e61fb220b987542643ad9bf0a100c7.zip
openpower-occ-control-1111087e39e61fb220b987542643ad9bf0a100c7.tar.gz
Retry OCC read operations
OCC communication can get intermittent failures. Retry all reads 3 times before declaring a failure and exiting out of the openpower-occ-control application. Resolves openbmc/openbmc#2805 Change-Id: I34dca5bc3c19a1f88975b427bdb6a683b41dbcb7 Signed-off-by: Andrew Geissler <geissonator@yahoo.com>
Diffstat (limited to 'occ_errors.cpp')
-rw-r--r--occ_errors.cpp39
1 files changed, 26 insertions, 13 deletions
diff --git a/occ_errors.cpp b/occ_errors.cpp
index ee41db9..f4a5f69 100644
--- a/occ_errors.cpp
+++ b/occ_errors.cpp
@@ -129,24 +129,37 @@ void Error::analyzeEvent()
std::string Error::readFile(int len) const
{
auto data = std::make_unique<char[]>(len+1);
+ auto retries = 3;
+ auto delay = std::chrono::milliseconds{100};
- // This file get created soon after binding. A value of 0 is
- // deemed success and anything else is a Failure
- // Since all the sysfs files would have size of 4096, if we read 0
- // bytes -or- value '0', then it just means we are fine
- auto r = read(fd, data.get(), len);
- if (r < 0)
+ // OCC / FSI have intermittent issues so retry all reads
+ while (true)
{
- elog<ReadFailure>(
- phosphor::logging::org::open_power::OCC::Device::
- ReadFailure::CALLOUT_ERRNO(errno),
- phosphor::logging::org::open_power::OCC::Device::
- ReadFailure::CALLOUT_DEVICE_PATH(file.c_str()));
+ // This file get created soon after binding. A value of 0 is
+ // deemed success and anything else is a Failure
+ // Since all the sysfs files would have size of 4096, if we read 0
+ // bytes -or- value '0', then it just means we are fine
+ auto r = read(fd, data.get(), len);
+ if (r < 0)
+ {
+ retries--;
+ if (retries == 0)
+ {
+ elog<ReadFailure>(
+ phosphor::logging::org::open_power::OCC::Device::
+ ReadFailure::CALLOUT_ERRNO(errno),
+ phosphor::logging::org::open_power::OCC::Device::
+ ReadFailure::CALLOUT_DEVICE_PATH(file.c_str()));
+ break;
+ }
+ std::this_thread::sleep_for(delay);
+ continue;
+ }
+ break;
}
-
// Need to seek to START, else the poll returns immediately telling
// there is data to be read
- r = lseek(fd, 0, SEEK_SET);
+ auto r = lseek(fd, 0, SEEK_SET);
if (r < 0)
{
log<level::ERR>("Failure seeking error file to START");
OpenPOWER on IntegriCloud