aboutsummaryrefslogtreecommitdiffstats
path: root/integrity-monitor/src/main/java/org/openecomp/policy/common/im/IntegrityMonitor.java
diff options
context:
space:
mode:
authorGuo Ruijing <ruijing.guo@intel.com>2017-07-28 08:21:14 +0000
committerPamela Dragosh <pdragosh@research.att.com>2017-07-31 08:31:07 -0400
commitf8a620d1ff2b0d33b08a22279058f3e0253bdde1 (patch)
tree9092df7a6c57a46d36988245f60db0c29a7b1404 /integrity-monitor/src/main/java/org/openecomp/policy/common/im/IntegrityMonitor.java
parente7bd0934d383c811b0b7302c42991aa1d61941c4 (diff)
[POLICY-71] replace openecomp for policy-common
Change-Id: I3241f5d1f0234043b4dff718eda1ffdc48052276 Signed-off-by: Guo Ruijing <ruijing.guo@intel.com> Signed-off-by: Pamela Dragosh <pdragosh@research.att.com>
Diffstat (limited to 'integrity-monitor/src/main/java/org/openecomp/policy/common/im/IntegrityMonitor.java')
-rw-r--r--integrity-monitor/src/main/java/org/openecomp/policy/common/im/IntegrityMonitor.java1566
1 files changed, 0 insertions, 1566 deletions
diff --git a/integrity-monitor/src/main/java/org/openecomp/policy/common/im/IntegrityMonitor.java b/integrity-monitor/src/main/java/org/openecomp/policy/common/im/IntegrityMonitor.java
deleted file mode 100644
index 4e573d8c..00000000
--- a/integrity-monitor/src/main/java/org/openecomp/policy/common/im/IntegrityMonitor.java
+++ /dev/null
@@ -1,1566 +0,0 @@
-/*-
- * ============LICENSE_START=======================================================
- * Integrity Monitor
- * ================================================================================
- * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
- * ================================================================================
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ============LICENSE_END=========================================================
- */
-
-package org.openecomp.policy.common.im;
-
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.List;
-import java.util.Properties;
-import java.util.concurrent.TimeUnit;
-
-import javax.management.JMX;
-import javax.management.MBeanServerConnection;
-import javax.persistence.EntityManager;
-import javax.persistence.EntityManagerFactory;
-import javax.persistence.EntityTransaction;
-import javax.persistence.FlushModeType;
-import javax.persistence.LockModeType;
-import javax.persistence.Persistence;
-import javax.persistence.Query;
-
-import org.openecomp.policy.common.im.jpa.ForwardProgressEntity;
-import org.openecomp.policy.common.im.jpa.ResourceRegistrationEntity;
-import org.openecomp.policy.common.im.jpa.StateManagementEntity;
-import org.openecomp.policy.common.logging.flexlogger.FlexLogger;
-import org.openecomp.policy.common.logging.flexlogger.Logger;
-//import org.apache.log4j.Logger;
-import org.openecomp.policy.common.im.jmx.ComponentAdmin;
-import org.openecomp.policy.common.im.jmx.ComponentAdminMBean;
-import org.openecomp.policy.common.im.jmx.JmxAgentConnection;
-
-/**
- * IntegrityMonitor
- * Main class for monitoring the integrity of a resource and managing its state. State management follows
- * the X.731 ITU standard.
- */
-public class IntegrityMonitor {
- private static final Logger logger = FlexLogger.getLogger(IntegrityMonitor.class.getName());
-
- //private static final Map<String, IntegrityMonitor> imInstances = new HashMap<String, IntegrityMonitor>();
-
- // only allow one instance of IntegrityMonitor
- private static IntegrityMonitor instance = null;
-
- private static String resourceName = null;
- boolean alarmExists = false;
-
- /*
- * Error message that is written by the dependencyCheck() method. It is made available externally
- * through the evaluateSanity() method.
- */
- private String dependencyCheckErrorMsg = "";
-
-
- // The entity manager factory for JPA access
- private EntityManagerFactory emf;
- private EntityManager em;
-
- // Persistence Unit for JPA
- private static final String PERSISTENCE_UNIT = "operationalPU";
-
- private StateManagement stateManager = null;
-
- private static final int CYCLE_INTERVAL_MILLIS = 1000;
-
- // The forward progress counter is incremented as the
- // process being monitored makes forward progress
- private int fpCounter = 0;
- private int lastFpCounter = 0;
-
- // elapsed time since last FP counter check
- private long elapsedTime = 0;
-
- // elapsed time since last test transaction check
- private long elapsedTestTransTime = 0;
-
- // elapsed time since last write Fpc check
- private long elapsedWriteFpcTime = 0;
-
- // last dependency health check time. Initialize so that the periodic check starts after 60 seconds.
- // This allows time for dependents to come up.
- private long lastDependencyCheckTime = System.currentTimeMillis();
-
- // Time of the last state audit. It is initialized at the time of the IM construction
- private Date lastStateAuditTime = new Date();
-
- //Interval between state audits in ms. We leave it turned off by default so that it will only
- //be run on the nodes which we want doing the audit. In particular, we only want it to run
- //on the droolspdps
- private static long stateAuditIntervalMs = 0L;
-
- // the number of cycles since 'fpCounter' was last changed
- private int missedCycles = 0;
-
- // forward progress monitoring interval
- private static int monitorInterval = IntegrityMonitorProperties.DEFAULT_MONITOR_INTERVAL;
- // The number of periods the counter fails to increment before an alarm is raised.
- private static int failedCounterThreshold = IntegrityMonitorProperties.DEFAULT_FAILED_COUNTER_THRESHOLD;
- // test transaction interval
- private static int testTransInterval = IntegrityMonitorProperties.DEFAULT_TEST_INTERVAL;
- // write Fpc to DB interval
- private static int writeFpcInterval = IntegrityMonitorProperties.DEFAULT_WRITE_FPC_INTERVAL;
-
- // A lead subsystem will have dependency groups with resource names in the properties file.
- // For non-lead subsystems, the dependency_group property will be absent.
- private static String [] dep_groups = null;
-
- public static boolean isUnitTesting = false;
-
- // can turn on health checking of dependents via jmx test() call by setting this property to true
- private static boolean testViaJmx = false;
-
- private static String jmxFqdn = null;
-
- // this is the max interval seconds allowed without any forward progress counter updates
- private static int maxFpcUpdateInterval = IntegrityMonitorProperties.DEFAULT_MAX_FPC_UPDATE_INTERVAL;
-
- // Node types
- private enum NodeType {
- pdp_xacml,
- pdp_drools,
- pap,
- pap_admin,
- logparser,
- brms_gateway,
- astra_gateway,
- elk_server,
- pypdp
-
- }
-
- private static String site_name;
- private static String node_type;
- private Date refreshStateAuditLastRunDate;
- private static long refreshStateAuditIntervalMs = 600000; //run it once per 10 minutes
-
- //lock objects
- private final Object evaluateSanityLock = new Object();
- private final Object fpMonitorCycleLock = new Object();
- private final Object dependencyCheckLock = new Object();
- private final Object testTransactionLock = new Object();
- private final Object startTransactionLock = new Object();
- private final Object endTransactionLock = new Object();
- private final Object checkTestTransactionLock = new Object();
- private final Object checkWriteFpcLock = new Object();
- private static final Object getInstanceLock = new Object();
- private final Object refreshStateAuditLock = new Object();
- private final Object IMFLUSHLOCK = new Object();
-
- /**
- * Get an instance of IntegrityMonitor for a given resource name. It creates one if it does not exist.
- * Only one instance is allowed to be created per resource name.
- * @param resourceName The resource name of the resource
- * @param properties a set of properties passed in from the resource
- * @return The new instance of IntegrityMonitor
- * @throws Exception if unable to create jmx url or the constructor returns an exception
- */
- public static IntegrityMonitor getInstance(String resourceName, Properties properties) throws Exception {
- synchronized(getInstanceLock){
- logger.info("getInstance() called - resourceName=" + resourceName);
- if (resourceName == null || resourceName.isEmpty() || properties == null) {
- logger.error("Error: getIntegrityMonitorInstance() called with invalid input");
- return null;
- }
-
- if (instance == null) {
- logger.info("Creating new instance of IntegrityMonitor");
- instance = new IntegrityMonitor(resourceName, properties);
- }
- return instance;
- }
- }
-
- public static IntegrityMonitor getInstance() throws Exception{
- logger.info("getInstance() called");
- if (instance == null) {
- String msg = "No IntegrityMonitor instance exists."
- + " Please use the method IntegrityMonitor.getInstance(String resourceName, Properties properties)";
- throw new IntegrityMonitorPropertiesException(msg);
- }else{
- return instance;
- }
- }
-
- public static void deleteInstance(){
- logger.info("deleteInstance() called");
- if(isUnitTesting){
- instance=null;
- }
- logger.info("deleteInstance() exit");
- }
- /**
- * IntegrityMonitor constructor. It is invoked from the getInstance() method in
- * this class or from the constructor of a child or sub-class. A class can extend
- * the IntegrityMonitor class if there is a need to override any of the base
- * methods (ex. subsystemTest()). Only one instance is allowed to be created per
- * resource name.
- * @param resourceName The resource name of the resource
- * @param properties a set of properties passed in from the resource
- * @throws Exception if any errors are encountered in the consructor
- */
- protected IntegrityMonitor(String resourceName, Properties properties) throws Exception {
-
- // singleton check since this constructor can be called from a child or sub-class
- if (instance != null) {
- String msg = "IM object exists and only one instance allowed";
- logger.error(msg);
- throw new Exception("IntegrityMonitor constructor exception: " + msg);
- }
- instance = this;
-
- IntegrityMonitor.resourceName = resourceName;
-
- /*
- * Validate that the properties file contains all the needed properties. Throws
- * an IntegrityMonitorPropertiesException
- */
- validateProperties(properties);
-
- // construct jmx url
- String jmxUrl = getJmxUrl();
-
- //
- // Create the entity manager factory
- //
- emf = Persistence.createEntityManagerFactory(PERSISTENCE_UNIT, properties);
- //
- // Did it get created?
- //
- if (emf == null) {
- logger.error("Error creating IM entity manager factory with persistence unit: "
- + PERSISTENCE_UNIT);
- throw new Exception("Unable to create IM Entity Manager Factory");
- }
-
- // add entry to forward progress and resource registration tables in DB
-
- // Start a transaction
- em = emf.createEntityManager();
- EntityTransaction et = em.getTransaction();
-
- et.begin();
-
- try {
- // if ForwardProgress entry exists for resourceName, update it. If not found, create a new entry
- Query fquery = em.createQuery("Select f from ForwardProgressEntity f where f.resourceName=:rn");
- fquery.setParameter("rn", resourceName);
-
- @SuppressWarnings("rawtypes")
- List fpList = fquery.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
- ForwardProgressEntity fpx = null;
- if(!fpList.isEmpty()){
- //ignores multiple results
- fpx = (ForwardProgressEntity) fpList.get(0);
- // refresh the object from DB in case cached data was returned
- em.refresh(fpx);
- logger.info("Resource " + resourceName + " exists and will be updated - old fpc=" + fpx.getFpcCount() + ", lastUpdated=" + fpx.getLastUpdated());
- fpx.setFpcCount(fpCounter);
- }else{
- //Create a forward progress object
- logger.info("Adding resource " + resourceName + " to ForwardProgress table");
- fpx = new ForwardProgressEntity();
- }
- //update/set columns in entry
- fpx.setResourceName(resourceName);
- em.persist(fpx);
- // flush to the DB
- synchronized(IMFLUSHLOCK){
- em.flush();
- }
-
- // if ResourceRegistration entry exists for resourceName, update it. If not found, create a new entry
- Query rquery = em.createQuery("Select r from ResourceRegistrationEntity r where r.resourceName=:rn");
- rquery.setParameter("rn", resourceName);
-
- @SuppressWarnings("rawtypes")
- List rrList = rquery.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
- ResourceRegistrationEntity rrx = null;
- if(!rrList.isEmpty()){
- //ignores multiple results
- rrx = (ResourceRegistrationEntity) rrList.get(0);
- // refresh the object from DB in case cached data was returned
- em.refresh(rrx);
- logger.info("Resource " + resourceName + " exists and will be updated - old url=" + rrx.getResourceUrl() + ", createdDate=" + rrx.getCreatedDate());
- rrx.setLastUpdated(new Date());
- }else{
- // register resource by adding entry to table in DB
- logger.info("Adding resource " + resourceName + " to ResourceRegistration table");
- rrx = new ResourceRegistrationEntity();
- }
- //update/set columns in entry
- rrx.setResourceName(resourceName);
- rrx.setResourceUrl(jmxUrl);
- rrx.setNodeType(node_type);
- rrx.setSite(site_name);
- em.persist(rrx);
- // flush to the DB
- synchronized(IMFLUSHLOCK){
- et.commit();
- }
-
- } catch (Exception e) {
- logger.error("IntegrityMonitor constructor DB table update failed with exception: " + e);
- try {
- if (et.isActive()) {
- synchronized(IMFLUSHLOCK){
- et.rollback();
- }
- }
- } catch (Exception e1) {
- // ignore
- }
- throw e;
- }
-
- // create instance of StateMangement class and pass emf to it
- stateManager = new StateManagement(emf, resourceName);
-
- /**
- * Initialize the state and status attributes. This will maintain any Administrative state value
- * but will set the operational state = enabled, availability status = null, standby status = null.
- * The integrity monitor will set the operational state via the FPManager and the owning application
- * must set the standby status by calling promote/demote on the StateManager.
- */
- stateManager.initializeState();
-
-
- // create management bean
- try {
- new ComponentAdmin(resourceName, this, stateManager);
- } catch (Exception e) {
- logger.error("ComponentAdmin constructor exception: " + e.toString());
- }
-
- new FPManager();
-
-
- }
-
- private static String getJmxUrl() throws Exception {
-
- // get the jmx remote port and construct the JMX URL
- Properties systemProps = System.getProperties();
- String jmx_port = systemProps.getProperty("com.sun.management.jmxremote.port");
- String jmx_err_msg = "";
- if (jmx_port == null) {
- jmx_err_msg = "System property com.sun.management.jmxremote.port for JMX remote port is not set";
- logger.error(jmx_err_msg);
- throw new Exception("getJmxUrl exception: " + jmx_err_msg);
- }
-
- int port = 0;
- try {
- port = Integer.parseInt(jmx_port);
- } catch (NumberFormatException e) {
- jmx_err_msg = "JMX remote port is not a valid integer value - " + jmx_port;
- logger.error(jmx_err_msg);
- throw new Exception("getJmxUrl exception: " + jmx_err_msg);
- }
-
- try {
- if (jmxFqdn == null) {
- jmxFqdn = InetAddress.getLocalHost().getCanonicalHostName(); // get FQDN of this host
- }
- } catch (Exception e) {
- String msg = "getJmxUrl could not get hostname" + e;
- logger.error(msg);
- throw new Exception("getJmxUrl Exception: " + msg);
- }
- if (jmxFqdn == null) {
- String msg = "getJmxUrl encountered null hostname";
- logger.error(msg);
- throw new Exception("getJmxUrl error: " + msg);
- }
-
- // assemble the jmx url
- String jmx_url = "service:jmx:rmi:///jndi/rmi://" + jmxFqdn + ":" + port + "/jmxrmi";
- logger.info("IntegerityMonitor - jmx url=" + jmx_url);
-
- return jmx_url;
- }
- /**
- * evaluateSanity() is designed to be called by an external entity to evealuate the sanity
- * of the node. It checks the operational and administrative states and the standby
- * status. If the operational state is disabled, it will include the dependencyCheckErrorMsg
- * which includes information about any dependency (node) which has failed.
- */
- public void evaluateSanity() throws Exception {
- logger.debug("evaluateSanity called ....");
- synchronized(evaluateSanityLock){
-
- String error_msg = dependencyCheckErrorMsg;
- logger.debug("evaluateSanity dependencyCheckErrorMsg = " + error_msg);
-
- // check op state and throw exception if disabled
- if ((stateManager.getOpState() != null) && stateManager.getOpState().equals(StateManagement.DISABLED)) {
- String msg = "Resource " + resourceName + " operation state is disabled. " + error_msg;
- logger.debug(msg);
- throw new Exception(msg);
- }
-
- // check admin state and throw exception if locked
- if ((stateManager.getAdminState() != null) && stateManager.getAdminState().equals(StateManagement.LOCKED)) {
- String msg = "Resource " + resourceName + " is administratively locked";
- logger.debug(msg);
- throw new AdministrativeStateException("IntegrityMonitor Admin State Exception: " + msg);
- }
- // check standby state and throw exception if cold standby
- if ((stateManager.getStandbyStatus() != null) && stateManager.getStandbyStatus().equals(StateManagement.COLD_STANDBY)){
- String msg = "Resource " + resourceName + " is cold standby";
- logger.debug(msg);
- throw new StandbyStatusException("IntegrityMonitor Standby Status Exception: " + msg);
- }
-
-/*
- * This is checked in the FPManager where the state is coordinated
- if (fpcError) {
- String msg = resourceName + ": no forward progress detected";
- logger.error(msg);
- throw new ForwardProgressException(msg);
- }
-
- * Additional testing to be provided by susbsystemTest() which could be overridden
- * This has been moved to dependencyCheck where it is treated as testing of a dependency
- subsystemTest();
-*/
- }
-
- }
-
- /*
- * This method checks the forward progress counter and the state of
- * a dependency. If the dependency is unavailable or failed, an
- * error message is created which is checked when evaluateSanity interface
- * is called. If the error message is set then the evaluateSanity
- * will return an error.
- */
- public String stateCheck(String dep) {
- logger.debug("checking state of dependent resource: " + dep);
-
- String error_msg = null;
- ForwardProgressEntity forwardProgressEntity = null;
- StateManagementEntity stateManagementEntity = null;
-
- // Start a transaction
- EntityTransaction et = em.getTransaction();
- et.begin();
-
- try{
- Query query = em.createQuery("Select p from ForwardProgressEntity p where p.resourceName=:resource");
- query.setParameter("resource", dep);
-
- @SuppressWarnings("rawtypes")
- List fpList = query.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
-
- if (!fpList.isEmpty()) {
- // exists
- forwardProgressEntity = (ForwardProgressEntity) fpList.get(0);
- // refresh the object from DB in case cached data was returned
- em.refresh(forwardProgressEntity);
- logger.debug("Found entry in ForwardProgressEntity table for dependent Resource=" + dep);
- } else {
- error_msg = dep + ": resource not found in ForwardProgressEntity database table";
- logger.debug(error_msg);
- logger.error(error_msg);
- }
- synchronized(IMFLUSHLOCK){
- et.commit();
- }
- }
- catch(Exception ex){
- // log an error
- error_msg = dep + ": ForwardProgressEntity DB operation failed with exception: " + ex;
- logger.debug(error_msg);
- logger.error(error_msg);
- synchronized(IMFLUSHLOCK){
- if(et.isActive()){
- et.rollback();
- }
- }
- }
-
- if(error_msg==null){
- // Start a transaction
- et = em.getTransaction();
- et.begin();
- try {
- // query if StateManagement entry exists for dependent resource
- Query query = em.createQuery("Select p from StateManagementEntity p where p.resourceName=:resource");
- query.setParameter("resource", dep);
-
- @SuppressWarnings("rawtypes")
- List smList = query.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
- if (!smList.isEmpty()) {
- // exist
- stateManagementEntity = (StateManagementEntity) smList.get(0);
- // refresh the object from DB in case cached data was returned
- em.refresh(stateManagementEntity);
- logger.debug("Found entry in StateManagementEntity table for dependent Resource=" + dep);
- } else {
- error_msg = dep + ": resource not found in state management entity database table";
- logger.debug(error_msg);
- logger.error(error_msg);
- }
-
- synchronized(IMFLUSHLOCK){
- et.commit();
- }
- } catch (Exception e) {
- // log an error
- error_msg = dep + ": StateManagementEntity DB read failed with exception: " + e;
- logger.debug(error_msg);
- logger.error(error_msg);
- synchronized(IMFLUSHLOCK){
- if(et.isActive()){
- et.rollback();
- }
- }
- }
- }
-
- //verify that the ForwardProgress is current (check last_updated)
- if(error_msg==null){
- Date date = new Date();
- long diffMs = date.getTime() - forwardProgressEntity.getLastUpdated().getTime();
- logger.debug("IntegrityMonitor.stateCheck(): diffMs = " + diffMs);
-
- //Threshold for a stale entry
- long staleMs = failedCounterThreshold * monitorInterval * 1000;
- logger.debug("IntegrityMonitor.stateCheck(): staleMs = " + staleMs);
-
- if(diffMs > staleMs){
- //ForwardProgress is stale. Disable it
- try {
- if(!stateManagementEntity.getOpState().equals(StateManagement.DISABLED)){
- logger.debug("IntegrityMonitor.stateCheck(): Changing OpStat = disabled for " + dep);
- stateManager.disableFailed(dep);
- }
- } catch (Exception e) {
- String msg = "IntegrityMonitor.stateCheck(): Failed to diableFail dependent resource = " + dep
- + "; " + e.getMessage();
- logger.debug(msg);
- logger.error(msg);
- }
- }
- }
-
- // check operation, admin and standby states of dependent resource
- if (error_msg == null) {
- if ((stateManager.getAdminState() != null) && stateManagementEntity.getAdminState().equals(StateManagement.LOCKED)) {
- error_msg = dep + ": resource is administratively locked";
- logger.debug(error_msg);
- logger.error(error_msg);
- } else if ((stateManager.getOpState() != null) && stateManagementEntity.getOpState().equals(StateManagement.DISABLED)) {
- error_msg = dep + ": resource is operationally disabled";
- logger.debug(error_msg);
- logger.error(error_msg);
- } else if ((stateManager.getStandbyStatus() != null) && stateManagementEntity.getStandbyStatus().equals(StateManagement.COLD_STANDBY)) {
- error_msg = dep + ": resource is cold standby";
- logger.debug(error_msg);
- logger.error(error_msg);
- }
- }
-
- String returnMsg = "IntegrityMonitor.stateCheck(): returned error_msg: " + error_msg;
- logger.debug(returnMsg);
- return error_msg;
- }
-
- private String fpCheck(String dep) {
- logger.debug("checking forward progress count of dependent resource: " + dep);
-
- String error_msg = null;
-
- // check FPC count - a changing FPC count indicates the resource JVM is running
-
- // Start a transaction
- EntityTransaction et = em.getTransaction();
- et.begin();
- try {
- Query fquery = em.createQuery("Select f from ForwardProgressEntity f where f.resourceName=:rn");
- fquery.setParameter("rn", dep);
-
- @SuppressWarnings("rawtypes")
- List fpList = fquery.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
- ForwardProgressEntity fpx = null;
- if (!fpList.isEmpty()) {
- //ignores multiple results
- fpx = (ForwardProgressEntity) fpList.get(0);
- // refresh the object from DB in case cached data was returned
- em.refresh(fpx);
- logger.debug("Dependent resource " + dep + " - fpc=" + fpx.getFpcCount() + ", lastUpdated=" + fpx.getLastUpdated());
- long currTime = System.currentTimeMillis();
- // if dependent resource FPC has not been updated, consider it an error
- if ((currTime - fpx.getLastUpdated().getTime()) > (1000 * maxFpcUpdateInterval)) {
- error_msg = dep + ": FP count has not been updated in the last " + maxFpcUpdateInterval + " seconds";
- logger.error(error_msg);
- try {
- // create instance of StateMangement class for dependent
- StateManagement depStateManager = new StateManagement(emf, dep);
- if (depStateManager != null) {
- if(!depStateManager.getOpState().equals(StateManagement.DISABLED)){
- logger.info("Forward progress not detected for dependent resource " + dep + ". Setting dependent's state to disable failed.");
- depStateManager.disableFailed();
- }
- }
- } catch (Exception e) {
- // ignore errors
- logger.info("Update dependent state failed with exception: " + e);
- }
- }
- } else {
- // resource entry not found in FPC table
- error_msg = dep + ": resource not found in ForwardProgressEntity table in the DB";
- logger.error(error_msg);
- }
- synchronized(IMFLUSHLOCK){
- et.commit();
- }
- } catch (Exception e) {
- // log an error and continue
- error_msg = dep + ": ForwardProgressEntity DB read failed with exception: " + e;
- logger.error(error_msg);
- synchronized(IMFLUSHLOCK){
- if(et.isActive()){
- et.rollback();
- }
- }
- }
-
- return error_msg;
- }
-
- public ArrayList<ForwardProgressEntity> getAllForwardProgressEntity(){
- if(logger.isDebugEnabled()){
- logger.debug("getAllForwardProgressEntity: entry");
- }
- ArrayList<ForwardProgressEntity> fpList = new ArrayList<ForwardProgressEntity>();
- // Start a transaction
- EntityTransaction et = em.getTransaction();
- et.begin();
- try {
- Query fquery = em.createQuery("Select e from ForwardProgressEntity e");
- @SuppressWarnings("rawtypes")
- List myList = fquery.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
- synchronized(IMFLUSHLOCK){
- et.commit();
- }
- if(logger.isDebugEnabled()){
- logger.debug("getAllForwardProgressEntity: myList.size(): " + myList.size());
- }
- if(!myList.isEmpty()){
- for(int i = 0; i < myList.size(); i++){
- if(logger.isDebugEnabled()){
- logger.debug("getAllForwardProgressEntity: myList.get(" + i +").getResourceName()"
- + ": " + ((ForwardProgressEntity)myList.get(i)).getResourceName());
- }
- fpList.add((ForwardProgressEntity) myList.get(i));
- }
- }
- synchronized(IMFLUSHLOCK){
- if(et.isActive()){
- et.commit();
- }
- }
- } catch (Exception e) {
- // log an error and continue
- String msg = "getAllForwardProgessEntity DB read failed with exception: " + e;
- logger.error(msg);
- synchronized(IMFLUSHLOCK){
- if(et.isActive()){
- et.rollback();
- }
- }
- }
- return fpList;
- }
-
-
- private String jmxCheck(String dep) {
- logger.debug("checking health of dependent by calling test() via JMX on resource: " + dep);
-
- String error_msg = null;
-
- // get the JMX URL from the database
- String jmxUrl = null;
- // Start a transaction
- EntityTransaction et = em.getTransaction();
- et.begin();
- try {
- // query if ResourceRegistration entry exists for resourceName
- Query rquery = em.createQuery("Select r from ResourceRegistrationEntity r where r.resourceName=:rn");
- rquery.setParameter("rn", dep);
-
- @SuppressWarnings("rawtypes")
- List rrList = rquery.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
- ResourceRegistrationEntity rrx = null;
-
- if (!rrList.isEmpty()) {
- //ignores multiple results
- rrx = (ResourceRegistrationEntity) rrList.get(0);
- // refresh the object from DB in case cached data was returned
- em.refresh(rrx);
- jmxUrl = rrx.getResourceUrl();
- logger.debug("Dependent Resource=" + dep + ", url=" + jmxUrl + ", createdDate=" + rrx.getCreatedDate());
- } else {
- error_msg = dep + ": resource not found in ResourceRegistrationEntity table in the DB";
- logger.error(error_msg);
- }
-
- synchronized(IMFLUSHLOCK){
- et.commit();
- }
- } catch (Exception e) {
- error_msg = dep + ": ResourceRegistrationEntity DB read failed with exception: " + e;
- logger.error(error_msg);
- synchronized(IMFLUSHLOCK){
- if(et.isActive()){
- et.rollback();
- }
- }
- }
-
-
- if (jmxUrl != null) {
- JmxAgentConnection jmxAgentConnection = null;
- try {
- jmxAgentConnection = new JmxAgentConnection(jmxUrl);
- MBeanServerConnection mbeanServer = jmxAgentConnection.getMBeanConnection();
- ComponentAdminMBean admin = JMX.newMXBeanProxy(mbeanServer, ComponentAdmin.getObjectName(dep),
- ComponentAdminMBean.class);
-
- // invoke the test method via the jmx proxy
- admin.test();
- logger.debug("Dependent resource " + dep + " sanity test passed");
- } catch (Exception e) {
- error_msg = dep + ": resource sanity test failed with exception: " + e;
- logger.error(error_msg);
- // TODO: extract real error message from exception which may be nested
- } finally {
- // close the JMX connector
- if (jmxAgentConnection != null) {
- jmxAgentConnection.disconnect();
- }
- }
- }
-
- return error_msg;
- }
-
- private String dependencyCheck() {
- logger.debug("dependencyCheck: entry - checking health of dependent groups and setting resource's state");
- synchronized(dependencyCheckLock){
-
- // Start with the error message empty
- String error_msg = "";
- boolean dependencyFailure = false;
-
-
- // Check the sanity of dependents for lead subcomponents
- if (dep_groups != null && dep_groups.length > 0) {
- // check state of resources in dependency groups
- for (String group : dep_groups) {
- group = group.trim();
- if (group.isEmpty()) {
- // ignore empty group
- continue;
- }
- String [] dependencies = group.split(",");
- logger.debug("group dependencies = " + Arrays.toString(dependencies));
- int real_dep_count = 0;
- int fail_dep_count = 0;
- for (String dep : dependencies) {
- dep = dep.trim();
- if (dep.isEmpty()) {
- // ignore empty dependency
- continue;
- }
- real_dep_count++; // this is a valid dependency whose state is tracked
- String fail_msg = fpCheck(dep); // if a resource is down, its FP count will not be incremented
- if (fail_msg == null) {
- if (testViaJmx) {
- fail_msg = jmxCheck(dep);
- } else {
- fail_msg = stateCheck(dep);
- }
- }
- if (fail_msg != null) {
- fail_dep_count++;
- if (!error_msg.isEmpty()) {
- error_msg = error_msg.concat(", ");
- }
- error_msg = error_msg.concat(fail_msg);
- }
- }// end for (String dep : dependencies)
-
- // if all dependencies in a group are failed, set this resource's state to disable dependency
- if ((real_dep_count > 0) && (fail_dep_count == real_dep_count)) {
- dependencyFailure=true;
- try {
- logger.info("All dependents in group " + group + " have failed their health check. Updating this resource's state to disableDependency");
- if( !( (stateManager.getAvailStatus()).equals(StateManagement.DEPENDENCY) ||
- (stateManager.getAvailStatus()).equals(StateManagement.DEPENDENCY_FAILED) ) ){
- // Note: redundant calls are made by refreshStateAudit
- this.stateManager.disableDependency();
- }else //corruption has occurred - This will not be corrected by the refreshStateAudit
- if(!(stateManager.getOpState()).equals(StateManagement.DISABLED)){
- // Note: redundant calls are made by refreshStateAudit
- this.stateManager.disableDependency();
- }
- } catch (Exception e) {
- if (!error_msg.isEmpty()) {
- error_msg = error_msg.concat(",");
- }
- error_msg = error_msg.concat(resourceName + ": Failed to disable dependency");
- break; // break out on failure and skip checking other groups
- }
- }
- //check the next group
-
- }//end for (String group : dep_groups)
-
- /*
- * We have checked all the dependency groups. If all are ok, dependencyFailure == false
- */
- if(!dependencyFailure){
- try {
- logger.debug("All dependency groups have at least one viable member. Updating this resource's state to enableNoDependency");
- if( ( (stateManager.getAvailStatus()).equals(StateManagement.DEPENDENCY) ||
- (stateManager.getAvailStatus()).equals(StateManagement.DEPENDENCY_FAILED) ) ){
- // Note: redundant calls are made by refreshStateAudit
- this.stateManager.enableNoDependency();
- } // The refreshStateAudit will catch the case where it is disabled but availStatus != failed
- } catch (Exception e) {
- if (!error_msg.isEmpty()) {
- error_msg = error_msg.concat(",");
- }
- error_msg = error_msg.concat(resourceName + ": Failed to enable no dependency");
- }
- }
- }else{
- /*
- * This is put here to clean up when no dependency group should exist, but one was erroneously
- * added which caused the state to be disabled/dependency/coldstandby and later removed. We saw
- * this happen in the lab, but is not very likely in a production environment...but you never know.
- */
- try {
- logger.debug("There are no dependents. Updating this resource's state to enableNoDependency");
- if( ( (stateManager.getAvailStatus()).equals(StateManagement.DEPENDENCY) ||
- (stateManager.getAvailStatus()).equals(StateManagement.DEPENDENCY_FAILED) ) ){
- // Note: redundant calls are made by refreshStateAudit
- this.stateManager.enableNoDependency();
- }// The refreshStateAudit will catch the case where it is disabled but availStatus != failed
- } catch (Exception e) {
- if (!error_msg.isEmpty()) {
- error_msg = error_msg.concat(",");
- }
- error_msg = error_msg.concat(resourceName + ": Failed to enable no dependency");
- }
- }
-
- /*
- * We have checked dependency groups and if there were none, we set enableNoDependency. If there were some
- * but they are all ok, we set enableNoDependency. So, the recovery from a disabled dependency state
- * is handled above. We only need to set disableDependency if the subsystemTest fails.
- */
- try {
- //Test any subsystems that are not covered under the dependency relationship
- subsystemTest();
- }catch (Exception e){
- //This indicates a subsystemTest failure
- try {
- logger.info(resourceName + ": There has been a subsystemTest failure with error: " + e.getMessage() + " Updating this resource's state to disableDependency");
- //Capture the subsystemTest failure info
- if(!error_msg.isEmpty()){
- error_msg = error_msg.concat(",");
- }
- error_msg = error_msg.concat(resourceName + ": " + e.getMessage());
- this.stateManager.disableDependency();
- } catch (Exception ex) {
- if (!error_msg.isEmpty()) {
- error_msg = error_msg.concat(",");
- }
- error_msg = error_msg.concat("\n" + resourceName + ": Failed to disable dependency after subsystemTest failure due to: " + ex.getMessage());
- }
- }
-
- if (!error_msg.isEmpty()) {
- logger.error("Sanity failure detected in a dependent resource: " + error_msg);
-
- }
-
- dependencyCheckErrorMsg = error_msg;
- lastDependencyCheckTime = System.currentTimeMillis();
- return error_msg;
- }
- }
-
- /**
- * Execute a test transaction. It is called when the test transaction timer fires.
- * It could be overridden to provide additional test functionality. If overridden,
- * the overriding method must invoke startTransaction() and endTransaction()
- */
- public void testTransaction() {
- synchronized (testTransactionLock){
- logger.debug("testTransaction called...");
- // start Transaction - resets transaction timer and check admin state
- try {
- startTransaction();
- } catch (AdministrativeStateException e) {
- // ignore
- } catch (StandbyStatusException e) {
- // ignore
- }
-
- // TODO: add test functionality if needed
-
- // end transaction - increments local FP counter
- endTransaction();
- }
- }
-
- /**
- * Additional testing for subsystems that do not have a /test interface (for ex. 3rd party
- * processes like elk). This method would be overridden by the subsystem.
- */
- public void subsystemTest() throws Exception {
- // Testing provided by subsystem
- logger.debug("IntegrityMonitor subsystemTest() OK");
- }
-
- /**
- * Checks admin state and resets transaction timer.
- * Called by application at the start of a transaction.
- * @throws AdministrativeStateException throws admin state exception if resource is locked
- * @throws StandbyStatusException
- */
- public void startTransaction() throws AdministrativeStateException, StandbyStatusException {
-
- synchronized(startTransactionLock){
- // check admin state and throw exception if locked
- if ((stateManager.getAdminState() != null) && stateManager.getAdminState().equals(StateManagement.LOCKED)) {
- String msg = "Resource " + resourceName + " is administratively locked";
- // logger.debug(msg);
- throw new AdministrativeStateException("IntegrityMonitor Admin State Exception: " + msg);
- }
- // check standby state and throw exception if locked
-
- if ((stateManager.getStandbyStatus() != null) &&
- (stateManager.getStandbyStatus().equals(StateManagement.HOT_STANDBY) ||
- stateManager.getStandbyStatus().equals(StateManagement.COLD_STANDBY))){
- String msg = "Resource " + resourceName + " is standby";
- //logger.debug(msg);
- throw new StandbyStatusException("IntegrityMonitor Standby Status Exception: " + msg);
- }
-
- // reset transactionTimer so it will not fire
- elapsedTestTransTime = 0;
- }
- }
-
- /**
- * Increment the local forward progress counter. Called by application at the
- * end of each transaction (successful or not).
- */
- public void endTransaction() {
- synchronized(endTransactionLock){
- // increment local FPC
- fpCounter++;
- }
- }
-
- // update FP count in DB with local FP count
- private void writeFpc() throws Exception {
-
- // Start a transaction
- EntityTransaction et = em.getTransaction();
-
- if(!et.isActive()){
- et.begin();
- }
-
- try {
- // query if ForwardProgress entry exists for resourceName
- Query fquery = em.createQuery("Select f from ForwardProgressEntity f where f.resourceName=:rn");
- fquery.setParameter("rn", resourceName);
-
- @SuppressWarnings("rawtypes")
- List fpList = fquery.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
- ForwardProgressEntity fpx = null;
- if(!fpList.isEmpty()) {
- //ignores multiple results
- fpx = (ForwardProgressEntity) fpList.get(0);
- // refresh the object from DB in case cached data was returned
- em.refresh(fpx);
- logger.debug("Updating FP entry: Resource=" + resourceName + ", fpcCount=" + fpx.getFpcCount() +
- ", lastUpdated=" + fpx.getLastUpdated() + ", new fpcCount=" + fpCounter);
- fpx.setFpcCount(fpCounter);
- em.persist(fpx);
- // flush to the DB and commit
- synchronized(IMFLUSHLOCK){
- et.commit();
- }
- }
- else {
- // Error - FP entry does not exist
- String msg = "FP entry not found in database for resource " + resourceName;
- throw new Exception(msg);
- }
- } catch (Exception e) {
- try {
- synchronized(IMFLUSHLOCK){
- if (et.isActive()) {
- et.rollback();
- }
- }
- } catch (Exception e1) {
- // ignore
- }
- logger.error("writeFpc DB table commit failed with exception: " + e);
- throw e;
- }
- }
-
- // retrieve state manager reference
- public final StateManagement getStateManager() {
- return this.stateManager;
- }
-
- /**
- * Read and validate properties
- * @throws Exception
- */
- private static void validateProperties(Properties prop) throws IntegrityMonitorPropertiesException {
-
- if (prop.getProperty(IntegrityMonitorProperties.DB_DRIVER)== null){
- String msg = IntegrityMonitorProperties.DB_DRIVER + " property is null";
- logger.error(msg);
- throw new IntegrityMonitorPropertiesException("IntegrityMonitor Property Exception: " + msg);
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.DB_URL)== null){
- String msg = IntegrityMonitorProperties.DB_URL + " property is null";
- logger.error(msg);
- throw new IntegrityMonitorPropertiesException("IntegrityMonitor Property Exception: " + msg);
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.DB_USER)== null){
- String msg = IntegrityMonitorProperties.DB_USER + " property is null";
- logger.error(msg);
- throw new IntegrityMonitorPropertiesException("IntegrityMonitor Property Exception: " + msg);
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.DB_PWD)== null){
- String msg = IntegrityMonitorProperties.DB_PWD + " property is null";
- logger.error(msg);
- throw new IntegrityMonitorPropertiesException("IntegrityMonitor Property Exception: " + msg);
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.FP_MONITOR_INTERVAL) != null) {
- try {
- monitorInterval = Integer.parseInt(prop.getProperty(IntegrityMonitorProperties.FP_MONITOR_INTERVAL).trim());
- } catch (NumberFormatException e) {
- logger.warn("Ignored invalid property: " + IntegrityMonitorProperties.FP_MONITOR_INTERVAL);
- }
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.FAILED_COUNTER_THRESHOLD) != null) {
- try {
- failedCounterThreshold = Integer.parseInt(prop.getProperty(IntegrityMonitorProperties.FAILED_COUNTER_THRESHOLD).trim());
- } catch (NumberFormatException e) {
- logger.warn("Ignored invalid property: " + IntegrityMonitorProperties.FAILED_COUNTER_THRESHOLD);
- }
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.TEST_TRANS_INTERVAL) != null) {
- try {
- testTransInterval = Integer.parseInt(prop.getProperty(IntegrityMonitorProperties.TEST_TRANS_INTERVAL).trim());
- } catch (NumberFormatException e) {
- logger.warn("Ignored invalid property: " + IntegrityMonitorProperties.TEST_TRANS_INTERVAL);
- }
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.WRITE_FPC_INTERVAL) != null) {
- try {
- writeFpcInterval = Integer.parseInt(prop.getProperty(IntegrityMonitorProperties.WRITE_FPC_INTERVAL).trim());
- } catch (NumberFormatException e) {
- logger.warn("Ignored invalid property: " + IntegrityMonitorProperties.WRITE_FPC_INTERVAL);
- }
- }
-
- // dependency_groups are a semi-colon separated list of groups
- // each group is a comma separated list of resource names
- // For ex. dependency_groups = site_1.pap_1,site_1.pap_2 ; site_1.pdp_1, site_1.pdp_2
- if (prop.getProperty(IntegrityMonitorProperties.DEPENDENCY_GROUPS) != null) {
- try {
- dep_groups = prop.getProperty(IntegrityMonitorProperties.DEPENDENCY_GROUPS).split(";");
- logger.info("dependency groups property = " + Arrays.toString(dep_groups));
- } catch (Exception e) {
- logger.warn("Ignored invalid property: " + IntegrityMonitorProperties.DEPENDENCY_GROUPS);
- }
- }
-
- site_name = prop.getProperty(IntegrityMonitorProperties.SITE_NAME);
- if (site_name == null) {
- String msg = IntegrityMonitorProperties.SITE_NAME + " property is null";
- logger.error(msg);
- throw new IntegrityMonitorPropertiesException("IntegrityMonitor Property Exception: " + msg);
- }else{
- site_name = site_name.trim();
- }
-
- node_type = prop.getProperty(IntegrityMonitorProperties.NODE_TYPE);
- if (node_type == null) {
- String msg = IntegrityMonitorProperties.NODE_TYPE + " property is null";
- logger.error(msg);
- throw new IntegrityMonitorPropertiesException("IntegrityMonitor Property Exception: " + msg);
- } else {
- node_type = node_type.trim();
- if (!isNodeTypeEnum(node_type)) {
- String msg = IntegrityMonitorProperties.NODE_TYPE + " property " + node_type + " is invalid";
- logger.error(msg);
- throw new IntegrityMonitorPropertiesException("IntegrityMonitor Property Exception: " + msg);
- }
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.TEST_VIA_JMX) != null) {
- String jmx_test = prop.getProperty(IntegrityMonitorProperties.TEST_VIA_JMX).trim();
- testViaJmx = Boolean.parseBoolean(jmx_test);
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.JMX_FQDN) != null) {
- jmxFqdn = prop.getProperty(IntegrityMonitorProperties.JMX_FQDN).trim();
- if (jmxFqdn.isEmpty()) {
- jmxFqdn = null;
- }
- }
-
-
- if (prop.getProperty(IntegrityMonitorProperties.MAX_FPC_UPDATE_INTERVAL) != null) {
- try {
- maxFpcUpdateInterval = Integer.parseInt(prop.getProperty(IntegrityMonitorProperties.MAX_FPC_UPDATE_INTERVAL).trim());
- } catch (NumberFormatException e) {
- logger.warn("Ignored invalid property: " + IntegrityMonitorProperties.MAX_FPC_UPDATE_INTERVAL);
- }
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.STATE_AUDIT_INTERVAL_MS) != null){
- try{
- stateAuditIntervalMs = Long.parseLong(prop.getProperty(IntegrityMonitorProperties.STATE_AUDIT_INTERVAL_MS));
- }catch(NumberFormatException e){
- logger.warn("Ignored invalid property: " + IntegrityMonitorProperties.STATE_AUDIT_INTERVAL_MS);
- }
- }
-
- if (prop.getProperty(IntegrityMonitorProperties.REFRESH_STATE_AUDIT_INTERVAL_MS) != null){
- try{
- refreshStateAuditIntervalMs = Long.parseLong(prop.getProperty(IntegrityMonitorProperties.REFRESH_STATE_AUDIT_INTERVAL_MS));
- }catch(NumberFormatException e){
- logger.warn("Ignored invalid property: " + IntegrityMonitorProperties.REFRESH_STATE_AUDIT_INTERVAL_MS);
- }
- }
-
- return;
- }
-
- public static void updateProperties(Properties newprop) {
- if (isUnitTesting) {
- try {
- validateProperties(newprop);
- } catch (IntegrityMonitorPropertiesException e) {
- // ignore
- }
- }
- else {
- logger.info("Update integrity monitor properties not allowed");
- }
- }
-
- private static boolean isNodeTypeEnum(String nodeType) {
- for (NodeType n : NodeType.values()) {
- if (n.toString().equals(nodeType)) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Look for "Forward Progress" -- if the 'FPMonitor' is stalled
- * for too long, the operational state is changed to 'Disabled',
- * and an alarm is set. The state is restored when forward
- * progress continues.
- */
- private void fpMonitorCycle() {
- synchronized(fpMonitorCycleLock){
- // monitoring interval checks
- if (monitorInterval <= 0) {
- elapsedTime = 0;
- return; // monitoring is disabled
- }
-
- elapsedTime = elapsedTime + TimeUnit.MILLISECONDS.toSeconds(CYCLE_INTERVAL_MILLIS);
- if (elapsedTime < monitorInterval) {
- return; // monitoring interval not reached
- }
-
- elapsedTime = 0; // reset elapsed time
-
- // TODO: check if alarm exists
-
- try {
- if (fpCounter == lastFpCounter) {
- // no forward progress
- missedCycles += 1;
- if (missedCycles >= failedCounterThreshold && !alarmExists) {
- logger.info("Forward progress not detected for resource " + resourceName + ". Setting state to disable failed.");
- if(!(stateManager.getOpState()).equals(StateManagement.DISABLED)){
- // Note: The refreshStateAudit will make redundant calls
- stateManager.disableFailed();
- }// The refreshStateAudit will catch the case where opStat = disabled and availState ! failed/dependency.failed
- // TODO: raise alarm or Nagios alert
- alarmExists = true;
- }
- } else {
- // forward progress has occurred
- lastFpCounter = fpCounter;
- missedCycles = 0;
- // set op state to enabled
- logger.debug("Forward progress detected for resource " + resourceName + ". Setting state to enable not failed.");
- if(!(stateManager.getOpState()).equals(StateManagement.ENABLED)){
- // Note: The refreshStateAudit will make redundant calls
- stateManager.enableNotFailed();
- }// The refreshStateAudit will catch the case where opState=enabled and availStatus != null
-
- // TODO: clear alarm or Nagios alert
- alarmExists = false;
- }
- } catch (Exception e) {
- // log error
- logger.error("FP Monitor encountered error. ", e);
- }
- }
- }
-
- /**
- * Look for "Forward Progress" on other nodes. If they are not making forward progress,
- * check their operational state. If it is not disabled, then disable them.
- */
- public void stateAudit() {
-
- if (stateAuditIntervalMs <= 0) {
- return; // stateAudit is disabled
- }
-
- //Only run from nodes that are operational
- if(stateManager.getOpState().equals(StateManagement.DISABLED)){
- return;
- }
- if(stateManager.getAdminState().equals(StateManagement.LOCKED)){
- return;
- }
- if(!stateManager.getStandbyStatus().equals(StateManagement.NULL_VALUE) &&
- stateManager.getStandbyStatus()!= null){
- if(!stateManager.getStandbyStatus().equals(StateManagement.PROVIDING_SERVICE)){
- return;
- }
- }
-
- Date date = new Date();
- long timeSinceLastStateAudit = date.getTime() - lastStateAuditTime.getTime();
- if (timeSinceLastStateAudit < stateAuditIntervalMs){
- return;
- }
-
- // Get all entries in the forwardprogressentity table
- ArrayList<ForwardProgressEntity> fpList = getAllForwardProgressEntity();
-
- // Check if each forwardprogressentity entry is current
- for(ForwardProgressEntity fpe : fpList){
- //If the this is my ForwardProgressEntity, continue
- if(fpe.getResourceName().equals(IntegrityMonitor.resourceName)){
- continue;
- }
- //Make sure you are not getting a cached version
- em.refresh(fpe);
- long diffMs = date.getTime() - fpe.getLastUpdated().getTime();
- logger.debug("IntegrityMonitor.stateAudit(): diffMs = " + diffMs);
-
- //Threshold for a stale entry
- long staleMs = maxFpcUpdateInterval * 1000;
- logger.debug("IntegrityMonitor.stateAudit(): staleMs = " + staleMs);
-
- if(diffMs > staleMs){
- //ForwardProgress is stale. Disable it
- // Start a transaction
- EntityTransaction et = em.getTransaction();
- et.begin();
- StateManagementEntity sme = null;
- try {
- // query if StateManagement entry exists for fpe resource
- Query query = em.createQuery("Select p from StateManagementEntity p where p.resourceName=:resource");
- query.setParameter("resource", fpe.getResourceName());
-
- @SuppressWarnings("rawtypes")
- List smList = query.setLockMode(
- LockModeType.NONE).setFlushMode(FlushModeType.COMMIT).getResultList();
- if (!smList.isEmpty()) {
- // exists
- sme = (StateManagementEntity) smList.get(0);
- // refresh the object from DB in case cached data was returned
- em.refresh(sme);
- logger.debug("IntegrityMonitor.stateAudit(): Found entry in StateManagementEntity table for Resource=" + sme.getResourceName());
- } else {
- String msg = "IntegrityMonitor.stateAudit(): " + fpe.getResourceName() + ": resource not found in state management entity database table";
- logger.debug(msg);
- logger.error(msg);
- }
- synchronized(IMFLUSHLOCK){
- et.commit();
- }
- } catch (Exception e) {
- // log an error
- String msg = "IntegrityMonitor.stateAudit(): " + fpe.getResourceName() + ": StateManagementEntity DB read failed with exception: " + e;
- logger.debug(msg);
- logger.error(msg);
- synchronized(IMFLUSHLOCK){
- if(et.isActive()){
- et.rollback();
- }
- }
- }
-
- if(sme != null){
- if(!sme.getOpState().equals(StateManagement.DISABLED)){
- logger.debug("IntegrityMonitor.stateAudit(): Changing OpStat = disabled for " + sme.getResourceName());
- try {
- stateManager.disableFailed(sme.getResourceName());
- } catch (Exception e) {
- String msg = "IntegrityMonitor.stateAudit(): Failed to disable " + sme.getResourceName();
- logger.debug(msg);
- logger.error(msg);
- }
- }
- }
- }// end if(diffMs > staleMs)
- }// end for(ForwardProgressEntity fpe : fpList)
- lastStateAuditTime = date;
- }// end stateAudit()
-
- /**
- * Execute a test transaction when test transaction interval has elapsed.
- */
- private void checkTestTransaction() {
- synchronized(checkTestTransactionLock){
-
- // test transaction timer checks
- if (testTransInterval <= 0) {
- elapsedTestTransTime = 0;
- return; // test transaction is disabled
- }
-
- elapsedTestTransTime = elapsedTestTransTime + TimeUnit.MILLISECONDS.toSeconds(CYCLE_INTERVAL_MILLIS);
- if (elapsedTestTransTime < testTransInterval) {
- return; // test transaction interval not reached
- }
-
- elapsedTestTransTime = 0; // reset elapsed time
-
- // execute test transaction
- testTransaction();
- }
- }
-
- /**
- * Updates Fpc counter in database when write Fpc interval has elapsed.
- */
- private void checkWriteFpc() {
- synchronized(checkWriteFpcLock){
-
- // test transaction timer checks
- if (writeFpcInterval <= 0) {
- elapsedWriteFpcTime = 0;
- return; // write Fpc is disabled
- }
-
- elapsedWriteFpcTime = elapsedWriteFpcTime + TimeUnit.MILLISECONDS.toSeconds(CYCLE_INTERVAL_MILLIS);
- if (elapsedWriteFpcTime < writeFpcInterval) {
- return; // write Fpc interval not reached
- }
-
- elapsedWriteFpcTime = 0; // reset elapsed time
-
- // write Fpc to database
- try {
- writeFpc();
- } catch (Exception e) {
- // ignore
- }
- }
- }
-
- /**
- * Execute a dependency health check periodically which also updates this resource's state.
- */
- private void checkDependentHealth() {
- logger.debug("checkDependentHealth: entry");
-
- long currTime = System.currentTimeMillis();
- logger.debug("checkDependentHealth currTime - lastDependencyCheckTime = " + (currTime - lastDependencyCheckTime));
- if ((currTime - lastDependencyCheckTime) > (1000 * IntegrityMonitorProperties.DEFAULT_TEST_INTERVAL)) {
- // execute dependency check and update this resource's state
-
- dependencyCheck();
- }
- }
-
- /*
- * This is a simple refresh audit which is periodically run to assure that the states and status
- * attributes are aligned and notifications are sent to any listeners. It is possible for state/status
- * to get out of synch and notified systems to be out of synch due to database corruption (manual or
- * otherwise) or because a node became isolated.
- *
- * When the operation (lock/unlock) is called, it will cause a re-evaluation of the state and
- * send a notification to all registered observers.
- */
- private void refreshStateAudit(){
- if(refreshStateAuditIntervalMs <=0){
- // The audit is deactivated
- return;
- }
- synchronized(refreshStateAuditLock){
- logger.debug("refreshStateAudit: entry");
- Date now = new Date();
- long nowMs = now.getTime();
- long lastTimeMs = refreshStateAuditLastRunDate.getTime();
- logger.debug("refreshStateAudit: ms since last run = " + (nowMs - lastTimeMs));
-
- if((nowMs - lastTimeMs) > refreshStateAuditIntervalMs){
- String adminState = stateManager.getAdminState();
- logger.debug("refreshStateAudit: adminState = " + adminState);
- if(adminState.equals(StateManagement.LOCKED)){
- try {
- logger.debug("refreshStateAudit: calling lock()");
- stateManager.lock();
- } catch (Exception e) {
- logger.error("refreshStateAudit: caught unexpected exception from stateManager.lock(): " + e );
- System.out.println(new Date() + " refreshStateAudit: caught unexpected exception "
- + "from stateManager.lock()");
- e.printStackTrace();
- }
- }else{//unlocked
- try {
- logger.debug("refreshStateAudit: calling unlock()");
- stateManager.unlock();;
- } catch (Exception e) {
- logger.error("refreshStateAudit: caught unexpected exception from stateManager.unlock(): " + e );
- System.out.println(new Date() + " refreshStateAudit: caught unexpected exception "
- + "from stateManager.unlock()");
- e.printStackTrace();
- }
- }
- refreshStateAuditLastRunDate = new Date();
- logger.debug("refreshStateAudit: exit");
- }
- }
- }
-
- /**
- * The following nested class periodically performs the forward progress check,
- * checks dependencies, does a refresh state audit and runs the stateAudit.
- */
- class FPManager extends Thread {
-
- // Constructor - start FP manager thread
- FPManager() {
- // set now as the last time the refreshStateAudit ran
- IntegrityMonitor.this.refreshStateAuditLastRunDate = new Date();
- // start thread
- this.start();
- }
-
- public void run() {
- logger.info("FPManager thread running");
- while (true) {
- try {
- Thread.sleep(CYCLE_INTERVAL_MILLIS);
- } catch (InterruptedException e) {
- // The 'sleep' call was interrupted
- continue;
- }
-
- try {
- if(logger.isDebugEnabled()){
- logger.debug("FPManager calling fpMonitorCycle()");
- }
- // check forward progress timer
- IntegrityMonitor.this.fpMonitorCycle();
-
- if(logger.isDebugEnabled()){
- logger.debug("FPManager calling checkTestTransaction()");
- }
- // check test transaction timer
- IntegrityMonitor.this.checkTestTransaction();
-
- if(logger.isDebugEnabled()){
- logger.debug("FPManager calling checkWriteFpc()");
- }
- // check write Fpc timer
- IntegrityMonitor.this.checkWriteFpc();
-
- if(logger.isDebugEnabled()){
- logger.debug("FPManager calling checkDependentHealth()");
- }
- // check dependency health
- IntegrityMonitor.this.checkDependentHealth();
-
- if(logger.isDebugEnabled()){
- logger.debug("FPManager calling refreshStateAudit()");
- }
- // check if it is time to run the refreshStateAudit
- IntegrityMonitor.this.refreshStateAudit();
-
- if(logger.isDebugEnabled()){
- logger.debug("FPManager calling stateAudit()");
- }
- // check if it is time to run the stateAudit
- IntegrityMonitor.this.stateAudit();
-
- } catch (Exception e) {
- logger.debug("Ignore FPManager thread processing timer(s) exception: " + e);
- }
- }
- }
-
- }
-
-}
-