aboutsummaryrefslogtreecommitdiffstats
path: root/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PmStandbyStateChangeNotifier.java
blob: 3f4ae557774463ec896142b99809eb7709d79937 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
/*
 * ============LICENSE_START=======================================================
 * feature-active-standby-management
 * ================================================================================
 * Copyright (C) 2017-2019 AT&T Intellectual Property. All rights reserved.
 * ================================================================================
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * ============LICENSE_END=========================================================
 */

package org.onap.policy.drools.activestandby;

/*
 * Per MultiSite_v1-10.ppt:
 *
 * Extends the StateChangeNotifier class and overwrites the abstract handleStateChange() method to get state changes
 * and do the following:
 *
 * When the Standby Status changes (from providingservice) to hotstandby or coldstandby,
 * the Active/Standby selection algorithm must stand down if the PDP-D is currently the lead/active node
 * and allow another PDP-D to take over.  It must also call lock on all engines in the engine management.
 *
 * When the Standby Status changes from (hotstandby) to coldstandby, the Active/Standby algorithm must NOT assume
 * the active/lead role.
 *
 * When the Standby Status changes (from coldstandby or providingservice) to hotstandby,
 * the Active/Standby algorithm may assume the active/lead role if the active/lead fails.
 *
 * When the Standby Status changes to providingservice (from hotstandby or coldstandby) call unlock on all
 * engines in the engine management layer.
 */
import java.util.Date;
import java.util.Timer;
import java.util.TimerTask;

import org.onap.policy.common.im.StateChangeNotifier;
import org.onap.policy.common.im.StateManagement;
import org.onap.policy.drools.system.PolicyEngine;
import org.onap.policy.drools.system.PolicyEngineConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/*
 * Some background:
 *
 * Originally, there was a "StandbyStateChangeNotifier" that belonged to policy-core, and this class's
 * handleStateChange() method used to take care of invoking conn.standDownPdp().
 *
 * But testing revealed that when a state change to hot standby
 * occurred from a demote() operation, first the PMStandbyStateChangeNotifier.handleStateChange() method
 * would be invoked and then the StandbyStateChangeNotifier.handleStateChange() method would be invoked,
 * and this ordering was creating the following problem:
 *
 * When PMStandbyStateChangeNotifier.handleStateChange() was invoked it would take a long time to finish,
 * because it would result in SingleThreadedUebTopicSource.stop() being invoked, which can potentially do a
 * 5 second sleep for each controller being stopped.
 *
 * Meanwhile, as these controller stoppages and their associated sleeps were occurring, the election handler
 * would discover the demoted PDP in hotstandby (but still designated!) and promote it, resulting in the
 * standbyStatus going from hotstandby to providingservice.  So then, by the time that
 * PMStandbyStateChangeNotifier.handleStateChange() finished its work and
 * StandbyStateChangeNotifier.handleStateChange() started executing, the standbyStatus was no longer hotstandby
 * (as effected by the demote), but providingservice (as reset by the election handling logic) and
 * conn.standDownPdp() would not get called!
 *
 * To fix this bug, we consolidated StandbyStateChangeNotifier and PMStandbyStateChangeNotifier,
 * with the standDownPdp() always
 * being invoked prior to the TopicEndpoint.manager.lock().  In this way, when the election handling logic is invoked
 * during the controller stoppages, the PDP is in hotstandby and the standdown occurs.
 *
 */
public class PmStandbyStateChangeNotifier extends StateChangeNotifier {
    // get an instance of logger
    private static final Logger logger = LoggerFactory.getLogger(PmStandbyStateChangeNotifier.class);
    private Timer delayActivateTimer;
    private int pdpUpdateInterval;
    private boolean isWaitingForActivation;
    private long startTimeWaitingForActivationMs;
    private long waitInterval;
    private boolean isNowActivating;
    private String previousStandbyStatus;
    public static final String NONE = "none";
    public static final String UNSUPPORTED = "unsupported";
    public static final String HOTSTANDBY_OR_COLDSTANDBY = "hotstandby_or_coldstandby";

    /**
     * Constructor.
     *
     */
    public PmStandbyStateChangeNotifier() {
        pdpUpdateInterval =
                Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_UPDATE_INTERVAL));
        isWaitingForActivation = false;
        startTimeWaitingForActivationMs = new Date().getTime();
        // delay the activate so the DesignatedWaiter can run twice - give it an extra 2 seconds
        waitInterval = 2 * pdpUpdateInterval + 2000L;
        isNowActivating = false;
        previousStandbyStatus = PmStandbyStateChangeNotifier.NONE;
    }

    @Override
    public void handleStateChange() {
        /*
         * A note on synchronization: This method is not synchronized because the caller,
         * stateManagememt, has synchronize all of its methods. Only one stateManagement operation
         * can occur at a time. Thus, only one handleStateChange() call will ever be made at a time.
         */
        logger.debug("handleStateChange: Entering, message={}, standbyStatus={}", super.getMessage(),
                        super.getStateManagement().getStandbyStatus());
        String standbyStatus = super.getStateManagement().getStandbyStatus();
        String pdpId = ActiveStandbyProperties.getProperty(ActiveStandbyProperties.NODE_NAME);

        logger.debug("handleStateChange: previousStandbyStatus = {}; standbyStatus = {}",
                previousStandbyStatus, standbyStatus);

        if (standbyStatus == null || standbyStatus.equals(StateManagement.NULL_VALUE)) {
            logger.debug("handleStateChange: standbyStatus is null; standing down PDP={}", pdpId);
            if (previousStandbyStatus.equals(StateManagement.NULL_VALUE)) {
                // We were just here and did this successfully
                logger.debug("handleStateChange: "
                                + "Is returning because standbyStatus is null and was previously 'null'; PDP={}",
                                pdpId);
                return;
            }
            isWaitingForActivation = false;
            try {
                logger.debug("handleStateChange: null:  cancelling delayActivationTimer.");
                cancelTimer();
                // Only want to lock the endpoints, not the controllers.
                getPolicyEngineManager().deactivate();
                // The operation was fully successful, but you cannot assign it a real null value
                // because later we might try to execute previousStandbyStatus.equals() and get
                // a null pointer exception.
                previousStandbyStatus = StateManagement.NULL_VALUE;
            } catch (Exception e) {
                logger.warn("handleStateChange: standbyStatus == null caught exception: ", e);
            }
        } else if (standbyStatus.equals(StateManagement.HOT_STANDBY)
                || standbyStatus.equals(StateManagement.COLD_STANDBY)) {
            logger.debug("handleStateChange: standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
            if (previousStandbyStatus.equals(PmStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY)) {
                // We were just here and did this successfully
                logger.debug("handleStateChange: Is returning because standbyStatus is {}"
                                + " and was previously {}; PDP= {}", standbyStatus, previousStandbyStatus, pdpId);
                return;
            }
            isWaitingForActivation = false;
            try {
                logger.debug("handleStateChange: HOT_STNDBY || COLD_STANDBY:  cancelling delayActivationTimer.");
                cancelTimer();
                // Only want to lock the endpoints, not the controllers.
                getPolicyEngineManager().deactivate();
                // The operation was fully successful
                previousStandbyStatus = PmStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY;
            } catch (Exception e) {
                logger.warn("handleStateChange: standbyStatus = {} caught exception: {}", standbyStatus, e.getMessage(),
                        e);
            }

        } else if (standbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
            logger.debug("handleStateChange: standbyStatus= {} scheduling activation of PDP={}", standbyStatus,
                            pdpId);
            if (previousStandbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
                // We were just here and did this successfully
                logger.debug("handleStateChange: Is returning because standbyStatus is {}"
                                + "and was previously {}; PDP={}", standbyStatus, previousStandbyStatus, pdpId);
                return;
            }
            try {
                // UnLock all the endpoints
                logger.debug("handleStateChange: standbyStatus={}; controllers must be unlocked.", standbyStatus);
                /*
                 * Only endpoints should be unlocked. Controllers have not been locked. Because,
                 * sometimes, it is possible for more than one PDP-D to become active (race
                 * conditions) we need to delay the activation of the topic endpoint interfaces to
                 * give the election algorithm time to resolve the conflict.
                 */
                logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation= {}",
                                isWaitingForActivation);

                // Delay activation for 2*pdpUpdateInterval+2000 ms in case of an election handler
                // conflict.
                // You could have multiple election handlers thinking they can take over.

                // First let's check that the timer has not died
                if (isWaitingForActivation) {
                    logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation = {}",
                                    isWaitingForActivation);
                    long now = new Date().getTime();
                    long waitTimeMs = now - startTimeWaitingForActivationMs;
                    if (waitTimeMs > 3 * waitInterval) {
                        logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer "
                                        + "may be hung, waitTimeMs = {} and allowable waitInterval = {}"
                                        + " Checking whether it is currently in activation. isNowActivating = {}",
                                        waitTimeMs, waitInterval, isNowActivating);
                        // Now check that it is not currently executing an activation
                        if (!isNowActivating) {
                            logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation "
                                            + "wait timer died");
                            // This will assure the timer is cancelled and rescheduled.
                            isWaitingForActivation = false;
                        }
                    }

                }

                if (!isWaitingForActivation) {
                    // Just in case there is an old timer hanging around
                    logger.debug("handleStateChange: PROVIDING_SERVICE cancelling delayActivationTimer.");
                    cancelTimer();
                    delayActivateTimer = makeTimer();
                    // delay the activate so the DesignatedWaiter can run twice
                    delayActivateTimer.schedule(new DelayActivateClass(), waitInterval);
                    isWaitingForActivation = true;
                    startTimeWaitingForActivationMs = new Date().getTime();
                    logger.debug("handleStateChange: PROVIDING_SERVICE scheduling delayActivationTimer in {} ms",
                                    waitInterval);
                } else {
                    logger.debug("handleStateChange: PROVIDING_SERVICE delayActivationTimer is "
                                    + "waiting for activation.");
                }

            } catch (Exception e) {
                logger.warn("handleStateChange: PROVIDING_SERVICE standbyStatus == providingservice caught exception: ",
                        e);
            }

        } else {
            logger.error("handleStateChange: Unsupported standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
            if (previousStandbyStatus.equals(PmStandbyStateChangeNotifier.UNSUPPORTED)) {
                // We were just here and did this successfully
                logger.debug("handleStateChange: Is returning because standbyStatus is "
                                + "UNSUPPORTED and was previously {}; PDP={}", previousStandbyStatus, pdpId);
                return;
            }
            // Only want to lock the endpoints, not the controllers.
            isWaitingForActivation = false;
            try {
                logger.debug("handleStateChange: unsupported standbystatus:  cancelling delayActivationTimer.");
                cancelTimer();
                getPolicyEngineManager().deactivate();
                // We know the standbystatus is unsupported
                previousStandbyStatus = PmStandbyStateChangeNotifier.UNSUPPORTED;
            } catch (Exception e) {
                logger.warn("handleStateChange: Unsupported standbyStatus = {} " + "caught exception: {} ",
                        standbyStatus, e.getMessage(), e);
            }
        }
        logger.debug("handleStateChange: Exiting");
    }

    private void cancelTimer() {
        if (delayActivateTimer != null) {
            delayActivateTimer.cancel();
        }
    }

    private class DelayActivateClass extends TimerTask {

        private Object delayActivateLock = new Object();


        @Override
        public void run() {
            isNowActivating = true;
            try {
                logger.debug("DelayActivateClass.run: entry");
                synchronized (delayActivateLock) {
                    getPolicyEngineManager().activate();
                    // The state change fully succeeded
                    previousStandbyStatus = StateManagement.PROVIDING_SERVICE;
                    // We want to set this to false here because the activate call can take a while
                    isWaitingForActivation = false;
                    isNowActivating = false;
                }
                logger.debug("DelayActivateClass.run.exit");
            } catch (Exception e) {
                isWaitingForActivation = false;
                isNowActivating = false;
                logger.warn("DelayActivateClass.run: caught an unexpected exception "
                        + "calling PolicyEngineConstants.getManager().activate: ", e);
            }
        }
    }

    public String getPreviousStandbyStatus() {
        return previousStandbyStatus;
    }

    // these may be overridden by junit tests

    protected PolicyEngine getPolicyEngineManager() {
        return PolicyEngineConstants.getManager();
    }

    protected Timer makeTimer() {
        return new Timer();
    }
}