From e4b0b77180adc4c2f63e83abbf849f09fd399055 Mon Sep 17 00:00:00 2001 From: Nagaraju Goruganti Date: Thu, 30 Nov 2017 02:12:45 -0600 Subject: Roll errors after reaching Cap size Fixes openbmc/openbmc#2687 -cap on error exists to deal with small flashes, right now the cap is 100 for high severity errors and it is 10 for info(and below) severity) errors. -previous policy was to stop logging new errors once cap is reached for high severity errors. -based on feedback from field engineers, it's better to log new errors, external tools will typically periodically read and store errors, so we won't lose them.so the new policy now is to roll errors. Change-Id: I18b509d81c076fcb80667300cbfda050027bb422 Signed-off-by: Nagaraju Goruganti --- log_manager.cpp | 45 +++++++++++++++++++++++---------------------- log_manager.hpp | 16 ++++------------ 2 files changed, 27 insertions(+), 34 deletions(-) diff --git a/log_manager.cpp b/log_manager.cpp index 0d8e753..cc5e187 100644 --- a/log_manager.cpp +++ b/log_manager.cpp @@ -28,7 +28,6 @@ namespace internal void Manager::commit(uint64_t transactionId, std::string errMsg) { auto reqLevel = level::ERR; // Default to ERR - size_t realErrCnt = entries.size() - infoErrors.size(); auto levelmap = g_errLevelMap.find(errMsg); if (levelmap != g_errLevelMap.end()) @@ -38,17 +37,9 @@ void Manager::commit(uint64_t transactionId, std::string errMsg) if (static_cast(reqLevel) < Entry::sevLowerLimit) { - if (capped) + if (realErrors.size() >= ERROR_CAP) { - return; - } - if (realErrCnt >= ERROR_CAP) - { - log("Reached error cap, Ignoring error", - entry("SIZE=%d", realErrCnt), - entry("ERROR_CAP=%d", ERROR_CAP)); - capped = true; - return; + erase(realErrors.front()); } } else @@ -171,6 +162,10 @@ void Manager::commit(uint64_t transactionId, std::string errMsg) { infoErrors.push_back(entryId); } + else + { + realErrors.push_back(entryId); + } auto ms = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()).count(); auto objPath = std::string(OBJ_ENTRY) + '/' + @@ -223,13 +218,22 @@ void Manager::erase(uint32_t entryId) fs::path errorPath(ERRLOG_PERSIST_PATH); errorPath /= std::to_string(entryId); fs::remove(errorPath); - if (entry->second->severity() >= Entry::sevLowerLimit) + + auto removeId = [](std::list& ids , uint32_t id) { - auto it = std::find(infoErrors.begin(), infoErrors.end(), entryId); - if (it != infoErrors.end()) + auto it = std::find(ids.begin(), ids.end(), id); + if (it != ids.end()) { - infoErrors.erase(it); + ids.erase(it); } + }; + if (entry->second->severity() >= Entry::sevLowerLimit) + { + removeId(infoErrors, entryId); + } + else + { + removeId(realErrors, entryId); } entries.erase(entry); } @@ -238,13 +242,6 @@ void Manager::erase(uint32_t entryId) logging::log("Invalid entry ID to delete", logging::entry("ID=%d", entryId)); } - - size_t realErrCnt = entries.size() - infoErrors.size(); - - if (realErrCnt < ERROR_CAP) - { - capped = false; - } } void Manager::restore() @@ -280,6 +277,10 @@ void Manager::restore() { infoErrors.push_back(idNum); } + else + { + realErrors.push_back(idNum); + } entries.insert(std::make_pair(idNum, std::move(e))); errorIds.push_back(idNum); diff --git a/log_manager.hpp b/log_manager.hpp index 399a243..b165ff3 100644 --- a/log_manager.hpp +++ b/log_manager.hpp @@ -54,8 +54,7 @@ class Manager : public details::ServerObject Manager(sdbusplus::bus::bus& bus, const char* objPath) : details::ServerObject(bus, objPath), busLog(bus), - entryId(0), - capped(false) {}; + entryId(0){}; /* * @fn commit() @@ -112,21 +111,14 @@ class Manager : public details::ServerObject /** @brief Persistent map of Entry dbus objects and their ID */ std::map> entries; + /** @brief List of error ids for high severity errors */ + std::list realErrors; + /** @brief List of error ids for Info(and below) severity */ std::list infoErrors; /** @brief Id of last error log entry */ uint32_t entryId; - - /** - * @brief Flag to log error for the first time when error cap is - * reached. - * @details Flag used to log error message for the first time when the - * error cap value is reached. It is reset when user delete's error - * entries and total entries existing is less than the error cap - * value. - */ - bool capped; }; } //namespace internal -- cgit v1.2.1