diff options
Diffstat (limited to 'feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java')
-rw-r--r-- | feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java b/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java new file mode 100644 index 00000000..ce62bf89 --- /dev/null +++ b/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java @@ -0,0 +1,345 @@ +/*- + * ============LICENSE_START======================================================= + * feature-active-standby-management + * ================================================================================ + * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.activestandby; + +/* + * Per MultiSite_v1-10.ppt: + * + * Extends the StateChangeNotifier class and overwrites the abstract handleStateChange() method to get state changes + * and do the following: + * + * When the Standby Status changes (from providingservice) to hotstandby or coldstandby, + * the Active/Standby selection algorithm must stand down if the PDP-D is currently the lead/active node + * and allow another PDP-D to take over. It must also call lock on all engines in the engine management. + * + * When the Standby Status changes from (hotstandby) to coldstandby, the Active/Standby algorithm must NOT assume + * the active/lead role. + * + * When the Standby Status changes (from coldstandby or providingservice) to hotstandby, + * the Active/Standby algorithm may assume the active/lead role if the active/lead fails. + * + * When the Standby Status changes to providingservice (from hotstandby or coldstandby) call unlock on all + * engines in the engine management layer. + */ +import java.util.Date; +import java.util.Timer; +import java.util.TimerTask; + +import org.onap.policy.common.im.StateChangeNotifier; +import org.onap.policy.common.im.StateManagement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.onap.policy.drools.system.PolicyEngine; + +/* + * Some background: + * + * Originally, there was a "StandbyStateChangeNotifier" that belonged to policy-core, and this class's handleStateChange() method + * used to take care of invoking conn.standDownPdp(). But testing revealed that when a state change to hot standby occurred + * from a demote() operation, first the PMStandbyStateChangeNotifier.handleStateChange() method would be invoked and then the + * StandbyStateChangeNotifier.handleStateChange() method would be invoked, and this ordering was creating the following problem: + * + * When PMStandbyStateChangeNotifier.handleStateChange() was invoked it would take a long time to finish, because it would result + * in SingleThreadedUebTopicSource.stop() being invoked, which can potentially do a 5 second sleep for each controller being stopped. + * Meanwhile, as these controller stoppages and their associated sleeps were occurring, the election handler would discover the + * demoted PDP in hotstandby (but still designated!) and promote it, resulting in the standbyStatus going from hotstandby + * to providingservice. So then, by the time that PMStandbyStateChangeNotifier.handleStateChange() finished its work and + * StandbyStateChangeNotifier.handleStateChange() started executing, the standbyStatus was no longer hotstandby (as effected by + * the demote), but providingservice (as reset by the election handling logic) and conn.standDownPdp() would not get called! + * + * To fix this bug, we consolidated StandbyStateChangeNotifier and PMStandbyStateChangeNotifier, with the standDownPdp() always + * being invoked prior to the TopicEndpoint.manager.lock(). In this way, when the election handling logic is invoked + * during the controller stoppages, the PDP is in hotstandby and the standdown occurs. + * + */ +public class PMStandbyStateChangeNotifier extends StateChangeNotifier { + // get an instance of logger + private static final Logger logger = LoggerFactory.getLogger(PMStandbyStateChangeNotifier.class); + private Timer delayActivateTimer; + private int pdpUpdateInterval; + private boolean isWaitingForActivation; + private long startTimeWaitingForActivationMs; + private long waitInterval; + private boolean isNowActivating; + private String previousStandbyStatus; + public static String NONE = "none"; + public static String UNSUPPORTED = "unsupported"; + public static String HOTSTANDBY_OR_COLDSTANDBY = "hotstandby_or_coldstandby"; + + public PMStandbyStateChangeNotifier(){ + pdpUpdateInterval = Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_UPDATE_INTERVAL)); + isWaitingForActivation = false; + startTimeWaitingForActivationMs = new Date().getTime(); + //delay the activate so the DesignatedWaiter can run twice - give it an extra 2 seconds + waitInterval = 2*pdpUpdateInterval + 2000; + isNowActivating=false; + previousStandbyStatus = PMStandbyStateChangeNotifier.NONE; + } + + @Override + public void handleStateChange() { + /* + * A note on synchronization: This method is not synchronized because the caller, stateManagememt, + * has synchronize all of its methods. Only one stateManagement operation can occur at a time. Thus, + * only one handleStateChange() call will ever be made at a time. + */ + if(logger.isInfoEnabled()){ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: Entering, message={}, standbyStatus={}", + super.getMessage(), super.getStateManagement().getStandbyStatus()); + } + } + String standbyStatus = super.getStateManagement().getStandbyStatus(); + String pdpId = ActiveStandbyProperties + .getProperty(ActiveStandbyProperties.NODE_NAME); + + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: previousStandbyStatus = {}" + + "; standbyStatus = {}", previousStandbyStatus, standbyStatus); + } + + if (standbyStatus == null || standbyStatus.equals(StateManagement.NULL_VALUE)) { + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: standbyStatus is null; standing down PDP={}", pdpId); + } + if(previousStandbyStatus.equals(StateManagement.NULL_VALUE)){ + //We were just here and did this successfully + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: Is returning because standbyStatus is null and was previously 'null'; PDP={}", pdpId); + } + return; + } + isWaitingForActivation = false; + try{ + try{ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: null: cancelling delayActivationTimer."); + } + delayActivateTimer.cancel(); + }catch(Exception e){ + if(logger.isInfoEnabled()){ + logger.info("handleStateChange: null no delayActivationTimer existed.", e); + } + //If you end of here, there was no active timer + } + //Only want to lock the endpoints, not the controllers. + PolicyEngine.manager.deactivate(); + //The operation was fully successful, but you cannot assign it a real null value + //because later we might try to execute previousStandbyStatus.equals() and get + //a null pointer exception. + previousStandbyStatus = StateManagement.NULL_VALUE; + }catch(Exception e){ + logger.warn("handleStateChange: standbyStatus == null caught exception: ", e); + } + } else if (standbyStatus.equals(StateManagement.HOT_STANDBY) || standbyStatus.equals(StateManagement.COLD_STANDBY)) { + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: standbyStatus={}; standing down PDP={}", standbyStatus, pdpId); + } + if(previousStandbyStatus.equals(PMStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY)){ + //We were just here and did this successfully + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: Is returning because standbyStatus is {}" + + " and was previously {}; PDP= {}", standbyStatus, previousStandbyStatus, pdpId); + } + return; + } + isWaitingForActivation = false; + try{ + try{ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: HOT_STNDBY || COLD_STANDBY: cancelling delayActivationTimer."); + } + delayActivateTimer.cancel(); + }catch(Exception e){ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: HOT_STANDBY || COLD_STANDBY no delayActivationTimer existed.", e); + } + //If you end of here, there was no active timer + } + //Only want to lock the endpoints, not the controllers. + PolicyEngine.manager.deactivate(); + //The operation was fully successful + previousStandbyStatus = PMStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY; + }catch(Exception e){ + logger.warn("handleStateChange: standbyStatus = {} caught exception: {}", standbyStatus, e.getMessage(), e); + } + + } else if (standbyStatus.equals(StateManagement.PROVIDING_SERVICE)) { + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: standbyStatus= {} " + + "scheduling activation of PDP={}",standbyStatus, pdpId); + } + if(previousStandbyStatus.equals(StateManagement.PROVIDING_SERVICE)){ + //We were just here and did this successfully + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: Is returning because standbyStatus is {}" + + "and was previously {}; PDP={}", standbyStatus, previousStandbyStatus, pdpId); + } + return; + } + try{ + //UnLock all the endpoints + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: standbyStatus={}; controllers must be unlocked.",standbyStatus ); + } + /* + * Only endpoints should be unlocked. Controllers have not been locked. + * Because, sometimes, it is possible for more than one PDP-D to become active (race conditions) + * we need to delay the activation of the topic endpoint interfaces to give the election algorithm + * time to resolve the conflict. + */ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation= {}", isWaitingForActivation); + } + + //Delay activation for 2*pdpUpdateInterval+2000 ms in case of an election handler conflict. + //You could have multiple election handlers thinking they can take over. + + // First let's check that the timer has not died + if(isWaitingForActivation){ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation = {}", isWaitingForActivation); + } + long now = new Date().getTime(); + long waitTimeMs = now - startTimeWaitingForActivationMs; + if(waitTimeMs > 3*waitInterval){ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer may be hung," + + " waitTimeMs = {} and allowable waitInterval = {}" + + " Checking whether it is currently in activation. isNowActivating = {}", + waitTimeMs, waitInterval, isNowActivating); + } + //Now check that it is not currently executing an activation + if(!isNowActivating){ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer died"); + } + // This will assure the timer is cancelled and rescheduled. + isWaitingForActivation = false; + } + } + + } + + if(!isWaitingForActivation){ + try{ + //Just in case there is an old timer hanging around + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: PROVIDING_SERVICE cancelling delayActivationTimer."); + } + delayActivateTimer.cancel(); + }catch(Exception e){ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: PROVIDING_SERVICE no delayActivationTimer existed."); + } + //If you end of here, there was no active timer + } + delayActivateTimer = new Timer(); + //delay the activate so the DesignatedWaiter can run twice + delayActivateTimer.schedule(new DelayActivateClass(), waitInterval); + isWaitingForActivation = true; + startTimeWaitingForActivationMs = new Date().getTime(); + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: PROVIDING_SERVICE scheduling delayActivationTimer in {} ms", waitInterval); + } + }else{ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: PROVIDING_SERVICE delayActivationTimer is waiting for activation."); + } + } + + }catch(Exception e){ + logger.warn("handleStateChange: PROVIDING_SERVICE standbyStatus == providingservice caught exception: ", e); + } + + } else { + logger.error("handleStateChange: Unsupported standbyStatus={}; standing down PDP={}", standbyStatus, pdpId); + if(previousStandbyStatus.equals(PMStandbyStateChangeNotifier.UNSUPPORTED)){ + //We were just here and did this successfully + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: Is returning because standbyStatus is " + + "UNSUPPORTED and was previously {}; PDP={}", previousStandbyStatus, pdpId); + } + return; + } + //Only want to lock the endpoints, not the controllers. + isWaitingForActivation = false; + try{ + try{ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: unsupported standbystatus: cancelling delayActivationTimer."); + } + delayActivateTimer.cancel(); + }catch(Exception e){ + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: unsupported standbystatus: no delayActivationTimer existed.", e); + } + //If you end of here, there was no active timer + } + PolicyEngine.manager.deactivate(); + //We know the standbystatus is unsupported + previousStandbyStatus = PMStandbyStateChangeNotifier.UNSUPPORTED; + }catch(Exception e){ + logger.warn("handleStateChange: Unsupported standbyStatus = {} " + + "caught exception: {} ",standbyStatus, e.getMessage(), e); + } + } + if(logger.isDebugEnabled()){ + logger.debug("handleStateChange: Exiting"); + } + } + + private class DelayActivateClass extends TimerTask{ + + private Object delayActivateLock = new Object(); + + + @Override + public void run() { + isNowActivating = true; + try{ + if(logger.isDebugEnabled()){ + logger.debug("DelayActivateClass.run: entry"); + } + synchronized(delayActivateLock){ + PolicyEngine.manager.activate(); + // The state change fully succeeded + previousStandbyStatus = StateManagement.PROVIDING_SERVICE; + // We want to set this to false here because the activate call can take a while + isWaitingForActivation = false; + isNowActivating = false; + } + if(logger.isDebugEnabled()){ + logger.debug("DelayActivateClass.run.exit"); + } + }catch(Exception e){ + isWaitingForActivation = false; + isNowActivating = false; + logger.warn("DelayActivateClass.run: caught an unexpected exception " + + "calling PolicyEngine.manager.activate: ", e); + } + } + } + + public String getPreviousStandbyStatus(){ + return previousStandbyStatus; + } +} |