diff options
Diffstat (limited to 'ecomp-portal-BE-common/src/main/java/org/openecomp/portalapp/portal/listener/HealthMonitor.java')
-rw-r--r-- | ecomp-portal-BE-common/src/main/java/org/openecomp/portalapp/portal/listener/HealthMonitor.java | 76 |
1 files changed, 41 insertions, 35 deletions
diff --git a/ecomp-portal-BE-common/src/main/java/org/openecomp/portalapp/portal/listener/HealthMonitor.java b/ecomp-portal-BE-common/src/main/java/org/openecomp/portalapp/portal/listener/HealthMonitor.java index 9de7a4c9..f9a6b905 100644 --- a/ecomp-portal-BE-common/src/main/java/org/openecomp/portalapp/portal/listener/HealthMonitor.java +++ b/ecomp-portal-BE-common/src/main/java/org/openecomp/portalapp/portal/listener/HealthMonitor.java @@ -19,7 +19,6 @@ */ package org.openecomp.portalapp.portal.listener; -import java.util.Date; import java.util.List; import javax.annotation.PostConstruct; @@ -28,11 +27,9 @@ import javax.annotation.PreDestroy; import org.hibernate.Query; import org.hibernate.Session; import org.hibernate.SessionFactory; -import org.openecomp.portalapp.portal.domain.SharedContext; import org.openecomp.portalapp.portal.logging.aop.EPMetricsLog; import org.openecomp.portalapp.portal.logging.format.EPAppMessagesEnum; import org.openecomp.portalapp.portal.logging.logic.EPLogUtil; -import org.openecomp.portalapp.portal.service.SharedContextService; import org.openecomp.portalapp.portal.ueb.EPUebHelper; import org.openecomp.portalapp.portal.utils.EPCommonSystemProperties; import org.openecomp.portalsdk.core.logging.logic.EELFLoggerDelegate; @@ -47,29 +44,29 @@ import org.springframework.transaction.annotation.Transactional; @EPMetricsLog public class HealthMonitor { + private EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class); + @Autowired private SessionFactory sessionFactory; @Autowired private EPUebHelper epUebHelper; - @Autowired - private SharedContextService sharedContextService; - private static boolean databaseUp; private static boolean uebUp; private static boolean frontEndUp; private static boolean backEndUp; private static boolean dbClusterStatusOk; private static boolean dbPermissionsOk; + + /** + * Read directly by external classes. + */ public static boolean isSuspended = false; private Thread healthMonitorThread; - private EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class); - public HealthMonitor() { - } public static boolean isDatabaseUp() { @@ -103,7 +100,7 @@ public class HealthMonitor { int numIntervalsDatabasePermissionsIncorrect = 0; int numIntervalsUebHasBeenDown = 0; - logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth started"); + logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started"); long sleepInterval = (Long .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000); @@ -146,7 +143,7 @@ public class HealthMonitor { if (dbPermissionsOk == false) { if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) { logger.debug(EELFLoggerDelegate.debugLogger, - "monitorEPHealth: database permissions not correct, logging to error log to trigger alert."); + "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert."); EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError); numIntervalsDatabasePermissionsIncorrect++; } else { @@ -162,8 +159,10 @@ public class HealthMonitor { // uebUp = this.checkIfUebUp(); if (uebUp == false) { + if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) { - logger.debug(EELFLoggerDelegate.debugLogger, "UEB down, logging to error log to trigger alert"); + logger.debug(EELFLoggerDelegate.debugLogger, + "monitorEPHealth: UEB down, logging to error log to trigger alert"); // Write a Log entry that will generate an alert EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckUebClusterError); numIntervalsUebHasBeenDown++; @@ -189,15 +188,18 @@ public class HealthMonitor { // could return information in the json content of a health check. // + // + // Get DB status. If down, signal alert once every X intervals. + // if (Thread.interrupted()) { - logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth: interrupted, leaving loop"); + logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted"); break; } try { Thread.sleep(sleepInterval); } catch (InterruptedException e) { - logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth interrupted", e); + logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e); Thread.currentThread().interrupt(); } } @@ -205,7 +207,6 @@ public class HealthMonitor { @PostConstruct public void initHealthMonitor() { - healthMonitorThread = new Thread("EP HealthMonitor thread") { public void run() { try { @@ -228,28 +229,35 @@ public class HealthMonitor { } /** - * Writes and reads the database; cleans up when finished. + * This routine checks whether the database can be read. In June 2017 we + * experimented with checking if the database can be WRITTEN. Writes failed + * with some regularity in a MariaDB Galera cluster, and in that + * environment, the resulting alerts in the log triggered a health monitor + * cron job to shut down the Tomcat instance. The root cause of the cluster + * write failures was not determined. * - * @return True on success; false otherwise. + * @return true if the database can be read. */ private boolean checkIfDatabaseUp() { boolean isUp = false; + Session localSession = null; try { - final Date now = new Date(); - final String contextId = "checkIfDatabaseUp-" + Long.toString(now.getTime()); - final String key = "checkIfDatabaseUp-key"; - final String value = "checkIfDatabaseUp-value"; - sharedContextService.addSharedContext(contextId, key, value); - SharedContext sc = sharedContextService.getSharedContext(contextId, key); - if (sc == null || sc.getCvalue() == null || !value.equals(sc.getCvalue())) - throw new Exception("Failed to retrieve shared context"); - int removed = sharedContextService.deleteSharedContexts(contextId); - if (removed != 1) - throw new Exception("Failed to delete shared context"); - isUp = true; + localSession = sessionFactory.openSession(); + if (localSession != null) { + String sql = "select app_name from fn_app where app_id=1"; + Query query = localSession.createSQLQuery(sql); + @SuppressWarnings("unchecked") + List<String> queryList = query.list(); + if (queryList != null) { + isUp = true; + } + } } catch (Exception e) { - logger.error(EELFLoggerDelegate.errorLogger, "checkIfDatabaseUp failed", e); + logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e); isUp = false; + } finally { + if (localSession != null) + localSession.close(); } return isUp; } @@ -273,8 +281,7 @@ public class HealthMonitor { } catch (Exception e) { logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e); if ((e.getCause() != null) && (e.getCause().getMessage() != null)) { - logger.error(EELFLoggerDelegate.errorLogger, - "checkClusterStatus() exception cause", e.getCause()); + logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause()); } isUp = false; } finally { @@ -305,7 +312,7 @@ public class HealthMonitor { } if (isUp == false) { logger.error(EELFLoggerDelegate.errorLogger, - "checkDatabaseAndPermissions() returning false. SHOW GRANTS FOR CURRENT_USER being dumped:"); + "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:"); for (String str : grantsList) { logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]"); } @@ -314,8 +321,7 @@ public class HealthMonitor { } catch (Exception e) { logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e); if ((e.getCause() != null) && (e.getCause().getMessage() != null)) { - logger.error(EELFLoggerDelegate.errorLogger, - "checkDatabasePermissions() exception msg = ", e.getCause()); + logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause()); } isUp = false; } finally { |