aboutsummaryrefslogtreecommitdiffstats
path: root/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/PmStandbyStateChangeNotifier.java
blob: 776b70ee022a34597b6fc673b7a70a7efd9903ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
/*
 * ============LICENSE_START=======================================================
 * feature-active-standby-management
 * ================================================================================
 * Copyright (C) 2017-2019, 2021 AT&T Intellectual Property. All rights reserved.
 * ================================================================================
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * ============LICENSE_END=========================================================
 */

package org.onap.policy.drools.activestandby;

import java.util.Timer;
import java.util.TimerTask;
import lombok.Getter;
import org.onap.policy.common.im.MonitorTime;
import org.onap.policy.common.im.StateChangeNotifier;
import org.onap.policy.common.im.StateManagement;
import org.onap.policy.common.utils.time.CurrentTime;
import org.onap.policy.drools.system.PolicyEngine;
import org.onap.policy.drools.system.PolicyEngineConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/*
 * Some background:
 *
 * Originally, there was a "StandbyStateChangeNotifier" that belonged to policy-core, and this class's
 * handleStateChange() method used to take care of invoking conn.standDownPdp().
 *
 * But testing revealed that when a state change to hot standby
 * occurred from a demote() operation, first the PMStandbyStateChangeNotifier.handleStateChange() method
 * would be invoked and then the StandbyStateChangeNotifier.handleStateChange() method would be invoked,
 * and this ordering was creating the following problem:
 *
 * When PMStandbyStateChangeNotifier.handleStateChange() was invoked it would take a long time to finish,
 * because it would result in SingleThreadedUebTopicSource.stop() being invoked, which can potentially do a
 * 5 second sleep for each controller being stopped.
 *
 * Meanwhile, as these controller stoppages and their associated sleeps were occurring, the election handler
 * would discover the demoted PDP in hotstandby (but still designated!) and promote it, resulting in the
 * standbyStatus going from hotstandby to providingservice.  So then, by the time that
 * PMStandbyStateChangeNotifier.handleStateChange() finished its work and
 * StandbyStateChangeNotifier.handleStateChange() started executing, the standbyStatus was no longer hotstandby
 * (as effected by the demote), but providingservice (as reset by the election handling logic) and
 * conn.standDownPdp() would not get called!
 *
 * To fix this bug, we consolidated StandbyStateChangeNotifier and PMStandbyStateChangeNotifier,
 * with the standDownPdp() always
 * being invoked prior to the TopicEndpoint.manager.lock().  In this way, when the election handling logic is invoked
 * during the controller stoppages, the PDP is in hotstandby and the standdown occurs.
 *
 */
public class PmStandbyStateChangeNotifier extends StateChangeNotifier {
    // get an instance of logger
    private static final Logger logger = LoggerFactory.getLogger(PmStandbyStateChangeNotifier.class);
    private Timer delayActivateTimer;
    private boolean isWaitingForActivation;
    private long startTimeWaitingForActivationMs;
    private long waitInterval;
    private boolean isNowActivating;
    @Getter
    private String previousStandbyStatus;
    private final CurrentTime currentTime = MonitorTime.getInstance();
    private final Factory timerFactory = Factory.getInstance();
    public static final String NONE = "none";
    public static final String UNSUPPORTED = "unsupported";
    public static final String HOTSTANDBY_OR_COLDSTANDBY = "hotstandby_or_coldstandby";

    /**
     * Constructor.
     *
     */
    public PmStandbyStateChangeNotifier() {
        int pdpUpdateInterval =
            Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_UPDATE_INTERVAL));
        isWaitingForActivation = false;
        startTimeWaitingForActivationMs = currentTime.getMillis();
        // delay the activate so the DesignatedWaiter can run twice - give it an extra 2 seconds
        waitInterval = 2 * pdpUpdateInterval + 2000L;
        isNowActivating = false;
        previousStandbyStatus = PmStandbyStateChangeNotifier.NONE;
    }

    @Override
    public void handleStateChange() {
        /*
         * A note on synchronization: This method is not synchronized because the caller,
         * stateManagememt, has synchronize all of its methods. Only one stateManagement operation
         * can occur at a time. Thus, only one handleStateChange() call will ever be made at a time.
         */
        logger.debug("handleStateChange: Entering, message={}, standbyStatus={}", super.getMessage(),
                        super.getStateManagement().getStandbyStatus());
        String standbyStatus = super.getStateManagement().getStandbyStatus();
        String pdpId = ActiveStandbyProperties.getProperty(ActiveStandbyProperties.NODE_NAME);

        logger.debug("handleStateChange: previousStandbyStatus = {}; standbyStatus = {}",
                previousStandbyStatus, standbyStatus);

        if (standbyStatus == null || standbyStatus.equals(StateManagement.NULL_VALUE)) {
            logger.debug("handleStateChange: standbyStatus is null; standing down PDP={}", pdpId);
            standDownPdpNull(pdpId);

        } else if (standbyStatus.equals(StateManagement.HOT_STANDBY)
                || standbyStatus.equals(StateManagement.COLD_STANDBY)) {
            logger.debug("handleStateChange: standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
            standDownPdp(pdpId, standbyStatus);

        } else if (standbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
            logger.debug("handleStateChange: standbyStatus= {} scheduling activation of PDP={}", standbyStatus,
                            pdpId);
            schedulePdpActivation(pdpId, standbyStatus);

        } else {
            logger.error("handleStateChange: Unsupported standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
            standDownPdpUnsupported(pdpId, standbyStatus);
        }

        logger.debug("handleStateChange: Exiting");
    }

    private void standDownPdpNull(String pdpId) {
        if (previousStandbyStatus.equals(StateManagement.NULL_VALUE)) {
            // We were just here and did this successfully
            logger.debug("handleStateChange: "
                            + "Is returning because standbyStatus is null and was previously 'null'; PDP={}",
                            pdpId);
            return;
        }

        isWaitingForActivation = false;
        try {
            logger.debug("handleStateChange: null:  cancelling delayActivationTimer.");
            cancelTimer();
            // Only want to lock the endpoints, not the controllers.
            getPolicyEngineManager().deactivate();
            // The operation was fully successful, but you cannot assign it a real null value
            // because later we might try to execute previousStandbyStatus.equals() and get
            // a null pointer exception.
            previousStandbyStatus = StateManagement.NULL_VALUE;
        } catch (Exception e) {
            logger.warn("handleStateChange: standbyStatus == null caught exception: ", e);
        }
    }

    private void standDownPdp(String pdpId, String standbyStatus) {
        if (previousStandbyStatus.equals(PmStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY)) {
            // We were just here and did this successfully
            logger.debug("handleStateChange: Is returning because standbyStatus is {}"
                            + " and was previously {}; PDP= {}", standbyStatus, previousStandbyStatus, pdpId);
            return;
        }

        isWaitingForActivation = false;
        try {
            logger.debug("handleStateChange: HOT_STNDBY || COLD_STANDBY:  cancelling delayActivationTimer.");
            cancelTimer();
            // Only want to lock the endpoints, not the controllers.
            getPolicyEngineManager().deactivate();
            // The operation was fully successful
            previousStandbyStatus = PmStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY;
        } catch (Exception e) {
            logger.warn("handleStateChange: standbyStatus = {} caught exception: {}", standbyStatus, e.getMessage(),
                    e);
        }
    }

    private void schedulePdpActivation(String pdpId, String standbyStatus) {
        if (previousStandbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
            // We were just here and did this successfully
            logger.debug("handleStateChange: Is returning because standbyStatus is {}"
                            + "and was previously {}; PDP={}", standbyStatus, previousStandbyStatus, pdpId);
            return;
        }

        try {
            // UnLock all the endpoints
            logger.debug("handleStateChange: standbyStatus={}; controllers must be unlocked.", standbyStatus);
            /*
             * Only endpoints should be unlocked. Controllers have not been locked. Because,
             * sometimes, it is possible for more than one PDP-D to become active (race
             * conditions) we need to delay the activation of the topic endpoint interfaces to
             * give the election algorithm time to resolve the conflict.
             */
            logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation= {}",
                            isWaitingForActivation);

            // Delay activation for 2*pdpUpdateInterval+2000 ms in case of an election handler
            // conflict.
            // You could have multiple election handlers thinking they can take over.

            // First let's check that the timer has not died
            checkTimerStatus();

            if (!isWaitingForActivation) {
                // Just in case there is an old timer hanging around
                logger.debug("handleStateChange: PROVIDING_SERVICE cancelling delayActivationTimer.");
                cancelTimer();
                delayActivateTimer = timerFactory.makeTimer();
                // delay the activate so the DesignatedWaiter can run twice
                delayActivateTimer.schedule(new DelayActivateClass(), waitInterval);
                isWaitingForActivation = true;
                startTimeWaitingForActivationMs = currentTime.getMillis();
                logger.debug("handleStateChange: PROVIDING_SERVICE scheduling delayActivationTimer in {} ms",
                                waitInterval);
            } else {
                logger.debug("handleStateChange: PROVIDING_SERVICE delayActivationTimer is "
                                + "waiting for activation.");
            }

        } catch (Exception e) {
            logger.warn("handleStateChange: PROVIDING_SERVICE standbyStatus == providingservice caught exception: ",
                    e);
        }
    }

    private void checkTimerStatus() {
        if (isWaitingForActivation) {
            logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation = {}",
                            isWaitingForActivation);
            long now = currentTime.getMillis();
            long waitTimeMs = now - startTimeWaitingForActivationMs;
            if (waitTimeMs > 3 * waitInterval) {
                logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer "
                                + "may be hung, waitTimeMs = {} and allowable waitInterval = {}"
                                + " Checking whether it is currently in activation. isNowActivating = {}",
                                waitTimeMs, waitInterval, isNowActivating);
                // Now check that it is not currently executing an activation
                if (!isNowActivating) {
                    logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation "
                                    + "wait timer died");
                    // This will assure the timer is cancelled and rescheduled.
                    isWaitingForActivation = false;
                }
            }
        }
    }

    private void standDownPdpUnsupported(String pdpId, String standbyStatus) {
        if (previousStandbyStatus.equals(PmStandbyStateChangeNotifier.UNSUPPORTED)) {
            // We were just here and did this successfully
            logger.debug("handleStateChange: Is returning because standbyStatus is "
                            + "UNSUPPORTED and was previously {}; PDP={}", previousStandbyStatus, pdpId);
            return;
        }

        // Only want to lock the endpoints, not the controllers.
        isWaitingForActivation = false;
        try {
            logger.debug("handleStateChange: unsupported standbystatus:  cancelling delayActivationTimer.");
            cancelTimer();
            getPolicyEngineManager().deactivate();
            // We know the standbystatus is unsupported
            previousStandbyStatus = PmStandbyStateChangeNotifier.UNSUPPORTED;
        } catch (Exception e) {
            logger.warn("handleStateChange: Unsupported standbyStatus = {} " + "caught exception: {} ",
                    standbyStatus, e.getMessage(), e);
        }
    }

    private void cancelTimer() {
        if (delayActivateTimer != null) {
            delayActivateTimer.cancel();
        }
    }

    private class DelayActivateClass extends TimerTask {

        private Object delayActivateLock = new Object();


        @Override
        public void run() {
            isNowActivating = true;
            try {
                logger.debug("DelayActivateClass.run: entry");
                synchronized (delayActivateLock) {
                    getPolicyEngineManager().activate();
                    // The state change fully succeeded
                    previousStandbyStatus = StateManagement.PROVIDING_SERVICE;
                    // We want to set this to false here because the activate call can take a while
                    isWaitingForActivation = false;
                    isNowActivating = false;
                }
                logger.debug("DelayActivateClass.run.exit");
            } catch (Exception e) {
                isWaitingForActivation = false;
                isNowActivating = false;
                logger.warn("DelayActivateClass.run: caught an unexpected exception "
                        + "calling PolicyEngineConstants.getManager().activate: ", e);
            }
        }
    }

    // these may be overridden by junit tests

    protected PolicyEngine getPolicyEngineManager() {
        return PolicyEngineConstants.getManager();
    }
}