summaryrefslogtreecommitdiffstats
path: root/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java
diff options
context:
space:
mode:
Diffstat (limited to 'feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java')
-rw-r--r--feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java345
1 files changed, 345 insertions, 0 deletions
diff --git a/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java b/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java
new file mode 100644
index 00000000..ce62bf89
--- /dev/null
+++ b/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PMStandbyStateChangeNotifier.java
@@ -0,0 +1,345 @@
+/*-
+ * ============LICENSE_START=======================================================
+ * feature-active-standby-management
+ * ================================================================================
+ * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
+ * ================================================================================
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ============LICENSE_END=========================================================
+ */
+
+package org.onap.policy.drools.activestandby;
+
+/*
+ * Per MultiSite_v1-10.ppt:
+ *
+ * Extends the StateChangeNotifier class and overwrites the abstract handleStateChange() method to get state changes
+ * and do the following:
+ *
+ * When the Standby Status changes (from providingservice) to hotstandby or coldstandby,
+ * the Active/Standby selection algorithm must stand down if the PDP-D is currently the lead/active node
+ * and allow another PDP-D to take over. It must also call lock on all engines in the engine management.
+ *
+ * When the Standby Status changes from (hotstandby) to coldstandby, the Active/Standby algorithm must NOT assume
+ * the active/lead role.
+ *
+ * When the Standby Status changes (from coldstandby or providingservice) to hotstandby,
+ * the Active/Standby algorithm may assume the active/lead role if the active/lead fails.
+ *
+ * When the Standby Status changes to providingservice (from hotstandby or coldstandby) call unlock on all
+ * engines in the engine management layer.
+ */
+import java.util.Date;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import org.onap.policy.common.im.StateChangeNotifier;
+import org.onap.policy.common.im.StateManagement;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.onap.policy.drools.system.PolicyEngine;
+
+/*
+ * Some background:
+ *
+ * Originally, there was a "StandbyStateChangeNotifier" that belonged to policy-core, and this class's handleStateChange() method
+ * used to take care of invoking conn.standDownPdp(). But testing revealed that when a state change to hot standby occurred
+ * from a demote() operation, first the PMStandbyStateChangeNotifier.handleStateChange() method would be invoked and then the
+ * StandbyStateChangeNotifier.handleStateChange() method would be invoked, and this ordering was creating the following problem:
+ *
+ * When PMStandbyStateChangeNotifier.handleStateChange() was invoked it would take a long time to finish, because it would result
+ * in SingleThreadedUebTopicSource.stop() being invoked, which can potentially do a 5 second sleep for each controller being stopped.
+ * Meanwhile, as these controller stoppages and their associated sleeps were occurring, the election handler would discover the
+ * demoted PDP in hotstandby (but still designated!) and promote it, resulting in the standbyStatus going from hotstandby
+ * to providingservice. So then, by the time that PMStandbyStateChangeNotifier.handleStateChange() finished its work and
+ * StandbyStateChangeNotifier.handleStateChange() started executing, the standbyStatus was no longer hotstandby (as effected by
+ * the demote), but providingservice (as reset by the election handling logic) and conn.standDownPdp() would not get called!
+ *
+ * To fix this bug, we consolidated StandbyStateChangeNotifier and PMStandbyStateChangeNotifier, with the standDownPdp() always
+ * being invoked prior to the TopicEndpoint.manager.lock(). In this way, when the election handling logic is invoked
+ * during the controller stoppages, the PDP is in hotstandby and the standdown occurs.
+ *
+ */
+public class PMStandbyStateChangeNotifier extends StateChangeNotifier {
+ // get an instance of logger
+ private static final Logger logger = LoggerFactory.getLogger(PMStandbyStateChangeNotifier.class);
+ private Timer delayActivateTimer;
+ private int pdpUpdateInterval;
+ private boolean isWaitingForActivation;
+ private long startTimeWaitingForActivationMs;
+ private long waitInterval;
+ private boolean isNowActivating;
+ private String previousStandbyStatus;
+ public static String NONE = "none";
+ public static String UNSUPPORTED = "unsupported";
+ public static String HOTSTANDBY_OR_COLDSTANDBY = "hotstandby_or_coldstandby";
+
+ public PMStandbyStateChangeNotifier(){
+ pdpUpdateInterval = Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_UPDATE_INTERVAL));
+ isWaitingForActivation = false;
+ startTimeWaitingForActivationMs = new Date().getTime();
+ //delay the activate so the DesignatedWaiter can run twice - give it an extra 2 seconds
+ waitInterval = 2*pdpUpdateInterval + 2000;
+ isNowActivating=false;
+ previousStandbyStatus = PMStandbyStateChangeNotifier.NONE;
+ }
+
+ @Override
+ public void handleStateChange() {
+ /*
+ * A note on synchronization: This method is not synchronized because the caller, stateManagememt,
+ * has synchronize all of its methods. Only one stateManagement operation can occur at a time. Thus,
+ * only one handleStateChange() call will ever be made at a time.
+ */
+ if(logger.isInfoEnabled()){
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: Entering, message={}, standbyStatus={}",
+ super.getMessage(), super.getStateManagement().getStandbyStatus());
+ }
+ }
+ String standbyStatus = super.getStateManagement().getStandbyStatus();
+ String pdpId = ActiveStandbyProperties
+ .getProperty(ActiveStandbyProperties.NODE_NAME);
+
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: previousStandbyStatus = {}"
+ + "; standbyStatus = {}", previousStandbyStatus, standbyStatus);
+ }
+
+ if (standbyStatus == null || standbyStatus.equals(StateManagement.NULL_VALUE)) {
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: standbyStatus is null; standing down PDP={}", pdpId);
+ }
+ if(previousStandbyStatus.equals(StateManagement.NULL_VALUE)){
+ //We were just here and did this successfully
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: Is returning because standbyStatus is null and was previously 'null'; PDP={}", pdpId);
+ }
+ return;
+ }
+ isWaitingForActivation = false;
+ try{
+ try{
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: null: cancelling delayActivationTimer.");
+ }
+ delayActivateTimer.cancel();
+ }catch(Exception e){
+ if(logger.isInfoEnabled()){
+ logger.info("handleStateChange: null no delayActivationTimer existed.", e);
+ }
+ //If you end of here, there was no active timer
+ }
+ //Only want to lock the endpoints, not the controllers.
+ PolicyEngine.manager.deactivate();
+ //The operation was fully successful, but you cannot assign it a real null value
+ //because later we might try to execute previousStandbyStatus.equals() and get
+ //a null pointer exception.
+ previousStandbyStatus = StateManagement.NULL_VALUE;
+ }catch(Exception e){
+ logger.warn("handleStateChange: standbyStatus == null caught exception: ", e);
+ }
+ } else if (standbyStatus.equals(StateManagement.HOT_STANDBY) || standbyStatus.equals(StateManagement.COLD_STANDBY)) {
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
+ }
+ if(previousStandbyStatus.equals(PMStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY)){
+ //We were just here and did this successfully
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: Is returning because standbyStatus is {}"
+ + " and was previously {}; PDP= {}", standbyStatus, previousStandbyStatus, pdpId);
+ }
+ return;
+ }
+ isWaitingForActivation = false;
+ try{
+ try{
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: HOT_STNDBY || COLD_STANDBY: cancelling delayActivationTimer.");
+ }
+ delayActivateTimer.cancel();
+ }catch(Exception e){
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: HOT_STANDBY || COLD_STANDBY no delayActivationTimer existed.", e);
+ }
+ //If you end of here, there was no active timer
+ }
+ //Only want to lock the endpoints, not the controllers.
+ PolicyEngine.manager.deactivate();
+ //The operation was fully successful
+ previousStandbyStatus = PMStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY;
+ }catch(Exception e){
+ logger.warn("handleStateChange: standbyStatus = {} caught exception: {}", standbyStatus, e.getMessage(), e);
+ }
+
+ } else if (standbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: standbyStatus= {} "
+ + "scheduling activation of PDP={}",standbyStatus, pdpId);
+ }
+ if(previousStandbyStatus.equals(StateManagement.PROVIDING_SERVICE)){
+ //We were just here and did this successfully
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: Is returning because standbyStatus is {}"
+ + "and was previously {}; PDP={}", standbyStatus, previousStandbyStatus, pdpId);
+ }
+ return;
+ }
+ try{
+ //UnLock all the endpoints
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: standbyStatus={}; controllers must be unlocked.",standbyStatus );
+ }
+ /*
+ * Only endpoints should be unlocked. Controllers have not been locked.
+ * Because, sometimes, it is possible for more than one PDP-D to become active (race conditions)
+ * we need to delay the activation of the topic endpoint interfaces to give the election algorithm
+ * time to resolve the conflict.
+ */
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation= {}", isWaitingForActivation);
+ }
+
+ //Delay activation for 2*pdpUpdateInterval+2000 ms in case of an election handler conflict.
+ //You could have multiple election handlers thinking they can take over.
+
+ // First let's check that the timer has not died
+ if(isWaitingForActivation){
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation = {}", isWaitingForActivation);
+ }
+ long now = new Date().getTime();
+ long waitTimeMs = now - startTimeWaitingForActivationMs;
+ if(waitTimeMs > 3*waitInterval){
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer may be hung,"
+ + " waitTimeMs = {} and allowable waitInterval = {}"
+ + " Checking whether it is currently in activation. isNowActivating = {}",
+ waitTimeMs, waitInterval, isNowActivating);
+ }
+ //Now check that it is not currently executing an activation
+ if(!isNowActivating){
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer died");
+ }
+ // This will assure the timer is cancelled and rescheduled.
+ isWaitingForActivation = false;
+ }
+ }
+
+ }
+
+ if(!isWaitingForActivation){
+ try{
+ //Just in case there is an old timer hanging around
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: PROVIDING_SERVICE cancelling delayActivationTimer.");
+ }
+ delayActivateTimer.cancel();
+ }catch(Exception e){
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: PROVIDING_SERVICE no delayActivationTimer existed.");
+ }
+ //If you end of here, there was no active timer
+ }
+ delayActivateTimer = new Timer();
+ //delay the activate so the DesignatedWaiter can run twice
+ delayActivateTimer.schedule(new DelayActivateClass(), waitInterval);
+ isWaitingForActivation = true;
+ startTimeWaitingForActivationMs = new Date().getTime();
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: PROVIDING_SERVICE scheduling delayActivationTimer in {} ms", waitInterval);
+ }
+ }else{
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: PROVIDING_SERVICE delayActivationTimer is waiting for activation.");
+ }
+ }
+
+ }catch(Exception e){
+ logger.warn("handleStateChange: PROVIDING_SERVICE standbyStatus == providingservice caught exception: ", e);
+ }
+
+ } else {
+ logger.error("handleStateChange: Unsupported standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
+ if(previousStandbyStatus.equals(PMStandbyStateChangeNotifier.UNSUPPORTED)){
+ //We were just here and did this successfully
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: Is returning because standbyStatus is "
+ + "UNSUPPORTED and was previously {}; PDP={}", previousStandbyStatus, pdpId);
+ }
+ return;
+ }
+ //Only want to lock the endpoints, not the controllers.
+ isWaitingForActivation = false;
+ try{
+ try{
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: unsupported standbystatus: cancelling delayActivationTimer.");
+ }
+ delayActivateTimer.cancel();
+ }catch(Exception e){
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: unsupported standbystatus: no delayActivationTimer existed.", e);
+ }
+ //If you end of here, there was no active timer
+ }
+ PolicyEngine.manager.deactivate();
+ //We know the standbystatus is unsupported
+ previousStandbyStatus = PMStandbyStateChangeNotifier.UNSUPPORTED;
+ }catch(Exception e){
+ logger.warn("handleStateChange: Unsupported standbyStatus = {} "
+ + "caught exception: {} ",standbyStatus, e.getMessage(), e);
+ }
+ }
+ if(logger.isDebugEnabled()){
+ logger.debug("handleStateChange: Exiting");
+ }
+ }
+
+ private class DelayActivateClass extends TimerTask{
+
+ private Object delayActivateLock = new Object();
+
+
+ @Override
+ public void run() {
+ isNowActivating = true;
+ try{
+ if(logger.isDebugEnabled()){
+ logger.debug("DelayActivateClass.run: entry");
+ }
+ synchronized(delayActivateLock){
+ PolicyEngine.manager.activate();
+ // The state change fully succeeded
+ previousStandbyStatus = StateManagement.PROVIDING_SERVICE;
+ // We want to set this to false here because the activate call can take a while
+ isWaitingForActivation = false;
+ isNowActivating = false;
+ }
+ if(logger.isDebugEnabled()){
+ logger.debug("DelayActivateClass.run.exit");
+ }
+ }catch(Exception e){
+ isWaitingForActivation = false;
+ isNowActivating = false;
+ logger.warn("DelayActivateClass.run: caught an unexpected exception "
+ + "calling PolicyEngine.manager.activate: ", e);
+ }
+ }
+ }
+
+ public String getPreviousStandbyStatus(){
+ return previousStandbyStatus;
+ }
+}