From bc8c8286645f74753d175eee7ca62d989555c96c Mon Sep 17 00:00:00 2001 From: Kevin McKiou Date: Wed, 13 Dec 2017 15:26:59 -0600 Subject: Fix issues blocking election handler thread This bug tracks the AT&T bug 355533. The symptom was that drools pdps that were backing each other up were becoming stuck in a standby state. The cause was that the election handler thread was being hung by a call to PolicyEngine.manager.deactivate which shuts down the topic endpoints when the drools pdp operational state transitions to disabled. Related problems were that the election handler heartbeat was NOT blocked when the main thread was blocked and the IntegrityMonitor forward progress counter was NOT blocked from incrementing when the election handler thread was blocked. This prevented the correct failover of the drools pdp to another healthy one. This change fixes the two causes of the thread blockage, moves the election handler heartbeat to the main thread and adds an interface (AllSeemsWell) which is called when the election handler has stalled/resumed. The AllSeemsWell interface will block forward progress counter increments when ALLNOTWELL and will resume forward progress counter increments when ALLSEEMSWELL. In addition, it reduces the run time of the StandbyStateManagementTest from approximately 8 minutes to approximately 2 minutes. Since this changes classes also changed by POLICY-444, this change must be merged before POLICY-444 can be merged. Issue-ID: POLICY-501 Change-Id: I7b8180d11077ccf59b21b6484cb58b5522a3df8f Signed-off-by: Kevin McKiou --- .../activestandby/DroolsPdpsElectionHandler.java | 174 ++++-------- .../drools/controller/test/AllSeemsWellTest.java | 316 +++++++++++++++++++++ .../test/StandbyStateManagementTest.java | 17 +- .../feature-active-standby-management.properties | 39 +++ .../asw/feature-state-management.properties | 81 ++++++ .../feature-active-standby-management.properties | 5 +- .../resources/feature-state-management.properties | 14 +- 7 files changed, 509 insertions(+), 137 deletions(-) create mode 100644 feature-active-standby-management/src/test/java/org/onap/policy/drools/controller/test/AllSeemsWellTest.java create mode 100644 feature-active-standby-management/src/test/resources/asw/feature-active-standby-management.properties create mode 100644 feature-active-standby-management/src/test/resources/asw/feature-state-management.properties (limited to 'feature-active-standby-management/src') diff --git a/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/DroolsPdpsElectionHandler.java b/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/DroolsPdpsElectionHandler.java index e9526eb7..e7f40772 100644 --- a/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/DroolsPdpsElectionHandler.java +++ b/feature-active-standby-management/src/main/java/org/onap/policy/drools/activestandby/DroolsPdpsElectionHandler.java @@ -35,8 +35,6 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { // get an instance of logger private final static Logger logger = LoggerFactory.getLogger(DroolsPdpsElectionHandler.class); private DroolsPdpsConnector pdpsConnector; - private Object pdpsConnectorLock = new Object(); - private Object checkUpdateWorkerLock = new Object(); private Object checkWaitTimerLock = new Object(); private Object designationWaiterLock = new Object(); @@ -49,18 +47,42 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { private DesignationWaiter designationWaiter; private Timer updateWorker; private Timer waitTimer; - private Date updateWorkerLastRunDate; private Date waitTimerLastRunDate; + + // The interval between checks of the DesignationWaiter to be sure it is running. private int pdpCheckInterval; + + // The interval between runs of the DesignationWaiter private int pdpUpdateInterval; + private volatile boolean isDesignated; private String pdpdNowActive; private String pdpdLastActive; + private Boolean allSeemsWell=true; + private StateManagementFeatureAPI stateManagementFeature; + private static boolean isUnitTesting = false; + public static void setIsUnitTesting(boolean val){ + isUnitTesting = val; + } + private static boolean isStalled = false; + public static void setIsStalled(boolean val){ + isStalled = val; + } + public DroolsPdpsElectionHandler(DroolsPdpsConnector pdps, DroolsPdp myPdp){ + if (pdps == null) { + logger.error("DroolsPdpsElectinHandler(): pdpsConnector==null"); + throw new IllegalArgumentException("DroolsPdpsElectinHandler(): pdpsConnector==null"); + } + if (myPdp == null){ + logger.error("DroolsPdpsElectinHandler(): droolsPdp==null"); + throw new IllegalArgumentException("DroolsPdpsElectinHandler(): DroolsPdp==null"); + } + pdpdNowActive = null; pdpdLastActive = null; this.pdpsConnector = pdps; @@ -71,14 +93,14 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { pdpCheckInterval = Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_CHECK_INVERVAL)); }catch(Exception e){ logger.error - ("Could not get pdpCheckInterval property. Using default", e); + ("Could not get pdpCheckInterval property. Using default {}",pdpCheckInterval, e); } pdpUpdateInterval = 2000; try{ pdpUpdateInterval = Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_UPDATE_INTERVAL)); }catch(Exception e){ logger.error - ("Could not get pdpUpdateInterval property. Using default", e); + ("Could not get pdpUpdateInterval property. Using default {} ", pdpUpdateInterval, e); } Date now = new Date(); @@ -90,11 +112,10 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { // This is the heartbeat updateWorker = new Timer(); - // Schedule the heartbeat to start in 100 ms and run at pdpCheckInterval ms thereafter + // Schedule the TimerUpdateClass to run at 100 ms and run at pdpCheckInterval ms thereafter // NOTE: The first run of the TimerUpdateClass results in myPdp being added to the // drools droolsPdpEntity table. updateWorker.scheduleAtFixedRate(new TimerUpdateClass(), 100, pdpCheckInterval); - updateWorkerLastRunDate = new Date(nowMs + 100); // Create the timer which will run the election algorithm waitTimer = new Timer(); @@ -151,13 +172,11 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { ("DesignatedWaiter.run: Entering"); } - // just here initially so code still works - if (pdpsConnector == null) { - waitTimerLastRunDate = new Date(); + //This is for testing the checkWaitTimer + if(isUnitTesting && isStalled){ if(logger.isDebugEnabled()){ - logger.debug("DesignatedWaiter.run (pdpsConnector==null) waitTimerLastRunDate = {}", waitTimerLastRunDate); + logger.debug("DesignatedWaiter.run: isUnitTesting = {} isStalled = {}", isUnitTesting, isStalled); } - return; } @@ -168,8 +187,6 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { ("DesignatedWaiter.run: Entering synchronized block"); } - checkUpdateWorkerTimer(); - //It is possible that multiple PDPs are designated lead. So, we will make a list of all designated //PDPs and then decide which one really should be designated at the end. ArrayList listOfDesignated = new ArrayList(); @@ -519,6 +536,8 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { if(logger.isDebugEnabled()){ logger.debug("DesignatedWaiter.run (designatedPdp == null) waitTimerLastRunDate = {}", waitTimerLastRunDate); } + myPdp.setUpdatedDate(waitTimerLastRunDate); + pdpsConnector.update(myPdp); return; @@ -580,6 +599,8 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { logger.debug("DesignatedWaiter.run (designatedPdp.getPdpId().equals(myPdp.getPdpId())) " + "waitTimerLastRunDate = " + waitTimerLastRunDate); } + myPdp.setUpdatedDate(waitTimerLastRunDate); + pdpsConnector.update(myPdp); return; } @@ -598,6 +619,8 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { } waitTimerLastRunDate = tmpDate; + myPdp.setUpdatedDate(waitTimerLastRunDate); + pdpsConnector.update(myPdp); }catch(Exception e){ logger.error("DesignatedWaiter.run caught an unexpected exception: ", e); @@ -896,92 +919,19 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { logger.debug("TimerUpdateClass.run: entry"); } checkWaitTimer(); - synchronized(pdpsConnectorLock){ - - myPdp.setUpdatedDate(new Date()); - /* - Redundant with DesignationWaiter and this updates the date every - cycle instead of just when the state changes. - if(myPdp.isDesignated()){ - myPdp.setDesignatedDate(new Date()); - } - */ - pdpsConnector.update(myPdp); - - Date tmpDate = new Date(); - if(logger.isDebugEnabled()){ - logger.debug("TimerUpdateClass.run: updateWorkerLastRunDate = {}", tmpDate); - } - - updateWorkerLastRunDate = tmpDate; - } - if(logger.isDebugEnabled()){ - logger.debug("TimerUpdateClass.run.exit"); - } }catch(Exception e){ logger.error("TimerUpdateClass.run caught an unexpected exception: ", e); } + if(logger.isDebugEnabled()){ + logger.debug("TimerUpdateClass.run.exit"); + } } } @Override public void checkThreadStatus() { - checkUpdateWorkerTimer(); checkWaitTimer(); } - private void checkUpdateWorkerTimer(){ - synchronized(checkUpdateWorkerLock){ - try{ - if(logger.isDebugEnabled()){ - logger.debug("checkUpdateWorkerTimer: entry"); - } - Date now = new Date(); - long nowMs = now.getTime(); - long updateWorkerMs = updateWorkerLastRunDate.getTime(); - //give it 2 second cushion - if((nowMs - updateWorkerMs) > pdpCheckInterval + 2000){ - logger.error("checkUpdateWorkerTimer: nowMs - updateWorkerMs = {} " - + ", exceeds pdpCheckInterval + 2000 = {} " - + "Will reschedule updateWorker timer",(nowMs - updateWorkerMs), (pdpCheckInterval + 2000)); - - try{ - updateWorker.cancel(); - // Recalculate the time because this is a synchronized section and the thread could have - // been blocked. - now = new Date(); - nowMs = now.getTime(); - updateWorker = new Timer(); - // reset the updateWorkerLastRunDate - updateWorkerLastRunDate = new Date(nowMs + 100); - //execute the first time in 100 ms - updateWorker.scheduleAtFixedRate(new TimerUpdateClass(), 100, pdpCheckInterval); - if(logger.isDebugEnabled()){ - logger.debug("checkUpdateWorkerTimer: Scheduling updateWorker timer to start in 100 ms "); - } - }catch(Exception e){ - logger.error("checkUpdateWorkerTimer: Caught unexpected Exception: ", e); - // Recalculate the time because this is a synchronized section and the thread could have - // been blocked. - now = new Date(); - nowMs = now.getTime(); - updateWorker = new Timer(); - updateWorkerLastRunDate = new Date(nowMs + 100); - updateWorker.scheduleAtFixedRate(new TimerUpdateClass(), 100, pdpCheckInterval); - if(logger.isDebugEnabled()){ - logger.debug("checkUpdateWorkerTimer: Attempting to schedule updateWorker timer in 100 ms"); - } - } - - } - if(logger.isDebugEnabled()){ - logger.debug("checkUpdateWorkerTimer: exit"); - } - }catch(Exception e){ - logger.error("checkUpdateWorkerTimer: caught unexpected exception: ", e); - } - } - } - private void checkWaitTimer(){ synchronized(checkWaitTimerLock){ try{ @@ -994,38 +944,24 @@ public class DroolsPdpsElectionHandler implements ThreadRunningChecker { //give it 10 times leeway if((nowMs - waitTimerMs) > 10*pdpUpdateInterval){ - logger.error("checkWaitTimer: nowMs - waitTimerMs = {}" - + ", exceeds 10* pdpUpdateInterval = {}" - + "Will reschedule waitTimer timer", (nowMs - waitTimerMs), (10*pdpUpdateInterval)); - - try{ - // Recalculate since the thread could have been stalled on the synchronize() - nowMs = (new Date()).getTime(); - // Time to the start of the next pdpUpdateInterval multiple - long startMs = getDWaiterStartMs(); - waitTimer.cancel(); - designationWaiter = new DesignationWaiter(); - waitTimer = new Timer(); - waitTimerLastRunDate = new Date(nowMs + startMs); - waitTimer.scheduleAtFixedRate(designationWaiter, startMs, pdpUpdateInterval); - if(logger.isDebugEnabled()){ - logger.debug("checkWaitTimer: Scheduling waitTimer timer to start in {} ms", startMs); - } - }catch(Exception e){ - logger.error("checkWaitTimer: Caught unexpected Exception: ", e); - // Recalculate since the thread could have been stalled on the synchronize() - nowMs = (new Date()).getTime(); - // Time to the start of the next pdpUpdateInterval multiple - long startMs = getDWaiterStartMs(); - designationWaiter = new DesignationWaiter(); - waitTimer = new Timer(); - waitTimerLastRunDate = new Date(nowMs + startMs); - waitTimer.scheduleAtFixedRate(designationWaiter, startMs, pdpUpdateInterval); + if(allSeemsWell){ + allSeemsWell = false; if(logger.isDebugEnabled()){ - logger.debug("checkWaitTimer: Scheduling waitTimer timer in {} ms", startMs); + logger.debug("checkWaitTimer: calling allSeemsWell with ALLNOTWELL param"); } + stateManagementFeature.allSeemsWell(this.getClass().getName(), + StateManagementFeatureAPI.ALLNOTWELL, + "DesignationWaiter/ElectionHandler has STALLED"); } - + logger.error("checkWaitTimer: nowMs - waitTimerMs = {}" + + ", exceeds 10* pdpUpdateInterval = {}" + + " DesignationWaiter is STALLED!", (nowMs - waitTimerMs), (10*pdpUpdateInterval)); + }else if(!allSeemsWell){ + allSeemsWell = true; + stateManagementFeature.allSeemsWell(this.getClass().getName(), + StateManagementFeatureAPI.ALLSEEMSWELL, + "DesignationWaiter/ElectionHandler has RESUMED"); + logger.info("DesignationWaiter/ElectionHandler has RESUMED"); } if(logger.isDebugEnabled()){ logger.debug("checkWaitTimer: exit"); diff --git a/feature-active-standby-management/src/test/java/org/onap/policy/drools/controller/test/AllSeemsWellTest.java b/feature-active-standby-management/src/test/java/org/onap/policy/drools/controller/test/AllSeemsWellTest.java new file mode 100644 index 00000000..0318bed6 --- /dev/null +++ b/feature-active-standby-management/src/test/java/org/onap/policy/drools/controller/test/AllSeemsWellTest.java @@ -0,0 +1,316 @@ +/* + * ============LICENSE_START======================================================= + * feature-active-standby-management + * ================================================================================ + * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.controller.test; + +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.util.Date; +import java.util.Properties; + +import javax.persistence.EntityManager; +import javax.persistence.EntityManagerFactory; +import javax.persistence.EntityTransaction; +import javax.persistence.Persistence; + +import org.apache.commons.lang3.time.DateUtils; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; +import org.onap.policy.common.im.StateManagement; +import org.onap.policy.drools.activestandby.ActiveStandbyFeatureAPI; +import org.onap.policy.drools.activestandby.ActiveStandbyProperties; +import org.onap.policy.drools.activestandby.DroolsPdpEntity; +import org.onap.policy.drools.activestandby.DroolsPdpImpl; +import org.onap.policy.drools.activestandby.DroolsPdpsConnector; +import org.onap.policy.drools.activestandby.DroolsPdpsElectionHandler; +import org.onap.policy.drools.activestandby.JpaDroolsPdpsConnector; +import org.onap.policy.drools.core.PolicySessionFeatureAPI; +import org.onap.policy.drools.statemanagement.StateManagementFeatureAPI; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * Testing the allSeemsWell interface to verify that it correctly affects the + * operational state. + */ + +public class AllSeemsWellTest { + private static final Logger logger = LoggerFactory.getLogger(AllSeemsWellTest.class); + /* + * Currently, the DroolsPdpsElectionHandler.DesignationWaiter is invoked every 1 seconds, starting + * at the start of the next multiple of pdpUpdateInterval, but with a minimum of 5 sec cushion + * to ensure that we wait for the DesignationWaiter to do its job, before + * checking the results. Add a few seconds for safety + */ + + long sleepTime = 10000; + + /* + * DroolsPdpsElectionHandler runs every 1 seconds, so it takes 10 seconds for the + * checkWaitTimer() method to time out and call allSeemsWell which then requires + * the forward progress counter to go stale which should add an additional 5 sec. + */ + + long stalledElectionHandlerSleepTime = 15000; + + /* + * As soon as the election hander successfully runs, it will resume the forward progress. + * If the election handler runs ever 1 sec and test transaction is run every 1 sec and + * then fpc is written every 1 sec and then the fpc is checked every 2 sec, that could + * take a total of 5 sec to recognize the resumption of progress. So, add 1 for safety. + */ + long resumedElectionHandlerSleepTime = 6000; + + private static EntityManagerFactory emfx; + private static EntityManagerFactory emfd; + private static EntityManager emx; + private static EntityManager emd; + private static EntityTransaction et; + + private final String configDir = "src/test/resources/asw"; + + /* + * See the IntegrityMonitor.getJmxUrl() method for the rationale behind this jmx related processing. + */ + + @BeforeClass + public static void setUpClass() throws Exception { + + String userDir = System.getProperty("user.dir"); + logger.debug("setUpClass: userDir={}", userDir); + System.setProperty("com.sun.management.jmxremote.port", "9980"); + System.setProperty("com.sun.management.jmxremote.authenticate","false"); + } + + @AfterClass + public static void tearDownClass() throws Exception { + } + + @Before + public void setUp() throws Exception { + //Create teh data access for xaml db + Properties stateManagementProperties = new Properties(); + stateManagementProperties.load(new FileInputStream(new File( + configDir + "/feature-state-management.properties"))); + + emfx = Persistence.createEntityManagerFactory("junitXacmlPU", stateManagementProperties); + + // Create an entity manager to use the DB + emx = emfx.createEntityManager(); + + //Create the data access for drools db + Properties activeStandbyProperties = new Properties(); + activeStandbyProperties.load(new FileInputStream(new File( + configDir + "/feature-active-standby-management.properties"))); + + emfd = Persistence.createEntityManagerFactory("junitDroolsPU", activeStandbyProperties); + + // Create an entity manager to use the DB + emd = emfd.createEntityManager(); + + DroolsPdpsElectionHandler.setIsUnitTesting(true); + } + + @After + public void tearDown() throws Exception { + + } + + public void cleanXacmlDb(){ + et = emx.getTransaction(); + + et.begin(); + // Make sure we leave the DB clean + emx.createQuery("DELETE FROM StateManagementEntity").executeUpdate(); + emx.createQuery("DELETE FROM ResourceRegistrationEntity").executeUpdate(); + emx.createQuery("DELETE FROM ForwardProgressEntity").executeUpdate(); + emx.flush(); + et.commit(); + } + + public void cleanDroolsDb(){ + et = emd.getTransaction(); + + et.begin(); + // Make sure we leave the DB clean + emd.createQuery("DELETE FROM DroolsPdpEntity").executeUpdate(); + emd.flush(); + et.commit(); + } + + + // Tests hot standby when there is only one PDP. + + //@Ignore + @Test + public void testAllSeemsWell() throws Exception { + + logger.debug("\n\ntestAllSeemsWell: Entering\n\n"); + cleanXacmlDb(); + cleanDroolsDb(); + + logger.debug("testAllSeemsWell: Reading stateManagementProperties"); + Properties stateManagementProperties = new Properties(); + stateManagementProperties.load(new FileInputStream(new File( + configDir + "/feature-state-management.properties"))); + + logger.debug("testAllSeemsWell: Creating emfXacml"); + EntityManagerFactory emfXacml = Persistence.createEntityManagerFactory( + "junitXacmlPU", stateManagementProperties); + + logger.debug("testAllSeemsWell: Reading activeStandbyProperties"); + Properties activeStandbyProperties = new Properties(); + activeStandbyProperties.load(new FileInputStream(new File( + configDir + "/feature-active-standby-management.properties"))); + String thisPdpId = activeStandbyProperties + .getProperty(ActiveStandbyProperties.NODE_NAME); + + logger.debug("testAllSeemsWell: Creating emfDrools"); + EntityManagerFactory emfDrools = Persistence.createEntityManagerFactory( + "junitDroolsPU", activeStandbyProperties); + + DroolsPdpsConnector conn = new JpaDroolsPdpsConnector(emfDrools); + + logger.debug("testAllSeemsWell: Cleaning up tables"); + conn.deleteAllPdps(); + + /* + * Insert this PDP as not designated. Initial standby state will be + * either null or cold standby. Demoting should transit state to + * hot standby. + */ + + logger.debug("testAllSeemsWell: Inserting PDP={} as not designated", thisPdpId); + Date yesterday = DateUtils.addDays(new Date(), -1); + DroolsPdpImpl pdp = new DroolsPdpImpl(thisPdpId, false, 4, yesterday); + conn.insertPdp(pdp); + DroolsPdpEntity droolsPdpEntity = conn.getPdp(thisPdpId); + logger.debug("testAllSeemsWell: After insertion, PDP={} has DESIGNATED={}", + thisPdpId, droolsPdpEntity.isDesignated()); + assertTrue(droolsPdpEntity.isDesignated() == false); + + logger.debug("testAllSeemsWell: Instantiating stateManagement object"); + StateManagement sm = new StateManagement(emfXacml, "dummy"); + sm.deleteAllStateManagementEntities(); + + + // Now we want to create a StateManagementFeature and initialize it. It will be + // discovered by the ActiveStandbyFeature when the election handler initializes. + + StateManagementFeatureAPI smf = null; + for (StateManagementFeatureAPI feature : StateManagementFeatureAPI.impl.getList()) + { + ((PolicySessionFeatureAPI) feature).globalInit(null, configDir); + smf = feature; + logger.debug("testAllSeemsWell stateManagementFeature.getResourceName(): {}", smf.getResourceName()); + break; + } + if(smf == null){ + logger.error("testAllSeemsWell failed to initialize. " + + "Unable to get instance of StateManagementFeatureAPI " + + "with resourceID: {}", thisPdpId); + logger.debug("testAllSeemsWell failed to initialize. " + + "Unable to get instance of StateManagementFeatureAPI " + + "with resourceID: {}", thisPdpId); + } + + // Create an ActiveStandbyFeature and initialize it. It will discover the StateManagementFeature + // that has been created. + ActiveStandbyFeatureAPI activeStandbyFeature = null; + for (ActiveStandbyFeatureAPI feature : ActiveStandbyFeatureAPI.impl.getList()) + { + ((PolicySessionFeatureAPI) feature).globalInit(null, configDir); + activeStandbyFeature = feature; + logger.debug("testAllSeemsWell activeStandbyFeature.getResourceName(): {}", activeStandbyFeature.getResourceName()); + break; + } + if(activeStandbyFeature == null){ + logger.error("testAllSeemsWell failed to initialize. " + + "Unable to get instance of ActiveStandbyFeatureAPI " + + "with resourceID: {}", thisPdpId); + logger.debug("testAllSeemsWell failed to initialize. " + + "Unable to get instance of ActiveStandbyFeatureAPI " + + "with resourceID: {}", thisPdpId); + } + + + logger.debug("testAllSeemsWell: Demoting PDP={}", thisPdpId); + // demoting should cause state to transit to hotstandby + smf.demote(); + + + logger.debug("testAllSeemsWell: Sleeping {} ms, to allow JpaDroolsPdpsConnector " + + "time to check droolspdpentity table", sleepTime); + Thread.sleep(sleepTime); + + + // Verify that this formerly un-designated PDP in HOT_STANDBY is now designated and providing service. + + droolsPdpEntity = conn.getPdp(thisPdpId); + logger.debug("testAllSeemsWell: After sm.demote() invoked, DESIGNATED= {} " + + "for PDP= {}", droolsPdpEntity.isDesignated(), thisPdpId); + assertTrue(droolsPdpEntity.isDesignated() == true); + String standbyStatus = smf.getStandbyStatus(thisPdpId); + logger.debug("testAllSeemsWell: After demotion, PDP= {} " + + "has standbyStatus= {}", thisPdpId, standbyStatus); + assertTrue(standbyStatus != null && standbyStatus.equals(StateManagement.PROVIDING_SERVICE)); + + //Now we want to stall the election handler and see the if AllSeemsWell will make the + //standbystatus = coldstandby + + DroolsPdpsElectionHandler.setIsStalled(true); + + logger.debug("testAllSeemsWell: Sleeping {} ms, to allow checkWaitTimer to recognize " + + "the election handler has stalled and for the testTransaction to fail to " + + "increment forward progress and for the lack of forward progress to be recognized.", + stalledElectionHandlerSleepTime); + + + //It takes 10x the update interval (1 sec) before the watcher will declare the election handler dead + //and that just stops forward progress counter. So, the fp monitor must then run to determine + //if the fpc has stalled. That will take about another 5 sec. + Thread.sleep(stalledElectionHandlerSleepTime); + + logger.debug("testAllSeemsWell: After isStalled=true, PDP= {} " + + "has standbyStatus= {}", thisPdpId, smf.getStandbyStatus(thisPdpId)); + + assertTrue(smf.getStandbyStatus().equals(StateManagement.COLD_STANDBY)); + + //Now lets resume the election handler + DroolsPdpsElectionHandler.setIsStalled(false); + + Thread.sleep(resumedElectionHandlerSleepTime); + + logger.debug("testAllSeemsWell: After isStalled=false, PDP= {} " + + "has standbyStatus= {}", thisPdpId, smf.getStandbyStatus(thisPdpId)); + + assertTrue(smf.getStandbyStatus().equals(StateManagement.PROVIDING_SERVICE)); + + //resumedElectionHandlerSleepTime = 5000; + logger.debug("\n\ntestAllSeemsWell: Exiting\n\n"); + + } +} diff --git a/feature-active-standby-management/src/test/java/org/onap/policy/drools/controller/test/StandbyStateManagementTest.java b/feature-active-standby-management/src/test/java/org/onap/policy/drools/controller/test/StandbyStateManagementTest.java index 4f7469cc..876cd99a 100644 --- a/feature-active-standby-management/src/test/java/org/onap/policy/drools/controller/test/StandbyStateManagementTest.java +++ b/feature-active-standby-management/src/test/java/org/onap/policy/drools/controller/test/StandbyStateManagementTest.java @@ -38,6 +38,7 @@ import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.onap.policy.common.im.AdministrativeStateException; import org.onap.policy.common.im.IntegrityMonitor; @@ -71,23 +72,23 @@ import org.slf4j.LoggerFactory; public class StandbyStateManagementTest { private static final Logger logger = LoggerFactory.getLogger(StandbyStateManagementTest.class); /* - * Currently, the DroolsPdpsElectionHandler.DesignationWaiter is invoked every ten seconds, starting - * at ten seconds after the minute boundary (e.g. 13:05:10). So, an 80 second sleep should be - * sufficient to ensure that we wait for the DesignationWaiter to do its job, before - * checking the results. + * Currently, the DroolsPdpsElectionHandler.DesignationWaiter is invoked every 1 seconds, starting + * at the start of the next multiple of pdpUpdateInterval, but with a minimum of 5 sec cushion + * to ensure that we wait for the DesignationWaiter to do its job, before + * checking the results. Add a few seconds for safety */ - long sleepTime = 80000; + long sleepTime = 10000; /* - * DroolsPdpsElectionHandler runs every ten seconds, so a 15 second sleep should be + * DroolsPdpsElectionHandler runs every 1 seconds, so a 6 second sleep should be * plenty to ensure it has time to re-promote this PDP. */ - long electionWaitSleepTime = 15000; + long electionWaitSleepTime = 6000; /* - * Sleep 5 seconds after each test to allow interrupt (shutdown) recovery. + * Sleep 1 seconds after each test to allow interrupt (shutdown) recovery. */ long interruptRecoveryTime = 5000; diff --git a/feature-active-standby-management/src/test/resources/asw/feature-active-standby-management.properties b/feature-active-standby-management/src/test/resources/asw/feature-active-standby-management.properties new file mode 100644 index 00000000..f0711e6c --- /dev/null +++ b/feature-active-standby-management/src/test/resources/asw/feature-active-standby-management.properties @@ -0,0 +1,39 @@ +### +# ============LICENSE_START======================================================= +# feature-active-standby-management +# ================================================================================ +# Copyright (C) 2017 AT&T Intellectual Property. All rights reserved. +# ================================================================================ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============LICENSE_END========================================================= +### + +# DB properties +javax.persistence.jdbc.driver = org.h2.Driver +javax.persistence.jdbc.url = jdbc:h2:file:./sql/activestandbymanagement +javax.persistence.jdbc.user = sa +javax.persistence.jdbc.password = + +# Must be unique across the system +resource.name=pdp1 +# Name of the site in which this node is hosted +site_name=pdp_1 + +# Needed by DroolsPdpsElectionHandler +pdp.checkInterval=1500 +pdp.updateInterval=1000 + +# Need long timeout, because testTransaction is only run every 1 seconds. +pdp.timeout=3000 +#how long do we wait for the pdp table to populate on initial startup +pdp.initialWait=1000 \ No newline at end of file diff --git a/feature-active-standby-management/src/test/resources/asw/feature-state-management.properties b/feature-active-standby-management/src/test/resources/asw/feature-state-management.properties new file mode 100644 index 00000000..2629c63d --- /dev/null +++ b/feature-active-standby-management/src/test/resources/asw/feature-state-management.properties @@ -0,0 +1,81 @@ +### +# ============LICENSE_START======================================================= +# feature-active-standby-management +# ================================================================================ +# Copyright (C) 2017 AT&T Intellectual Property. All rights reserved. +# ================================================================================ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============LICENSE_END========================================================= +### + +# DB properties +javax.persistence.jdbc.driver = org.h2.Driver +javax.persistence.jdbc.url = jdbc:h2:file:./sql/statemanagement +javax.persistence.jdbc.user = sa +javax.persistence.jdbc.password = + +# DroolsPDPIntegrityMonitor Properties + +http.server.services=TEST +http.server.services.TEST.host=0.0.0.0 +http.server.services.TEST.port=9981 +#These properties will default to the following if no other values are provided: +# http.server.services.TEST.restClasses=org.onap.policy.drools.statemanagement.IntegrityMonitorRestManager +# http.server.services.TEST.managed=false +# http.server.services.TEST.swagger=true + +#IntegrityMonitor Properties + +# Must be unique across the system +resource.name=pdp1 +# Name of the site in which this node is hosted +site_name = pdp_1 +# How often in sec the forward progress monitor checks for forward progress +fp_monitor_interval = 2 +# Failed counter threshold before failover +failed_counter_threshold = 1 +# Interval between test transactions when no traffic seconds +test_trans_interval = 1 +# Interval between writes of the FPC to the DB seconds +write_fpc_interval = 1 +# Node type Note: Make sure you don't leave any trailing spaces, or you'll get an 'invalid node type' error! +node_type = pdp_drools +# Dependency groups are groups of resources upon which a node operational state is dependent upon. +# Each group is a comma-separated list of resource names and groups are separated by a semicolon. For example: +# dependency_groups=site_1.astra_1,site_1.astra_2;site_1.brms_1,site_1.brms_2;site_1.logparser_1;site_1.pypdp_1 +dependency_groups= +# When set to true, dependent health checks are performed by using JMX to invoke test() on the dependent. +# The default false is to use state checks for health. +test_via_jmx=true +# This is the max number of seconds beyond which a non incrementing FPC is considered a failure +max_fpc_update_interval=5 +# Run the state audit every 60 seconds (60000 ms). The state audit finds stale DB entries in the +# forwardprogressentity table and marks the node as disabled/failed in the statemanagemententity +# table. NOTE! It will only run on nodes that have a standbystatus = providingservice. +# A value of <= 0 will turn off the state audit. +state_audit_interval_ms= -1 +# The refresh state audit is run every (default) 10 minutes (600000 ms) to clean up any state corruption in the +# DB statemanagemententity table. It only refreshes the DB state entry for the local node. That is, it does not +# refresh the state of any other nodes. A value <= 0 will turn the audit off. Any other value will override +# the default of 600000 ms. +refresh_state_audit_interval_ms=-1 + + +# Repository audit properties +# Flag to control the execution of the subsystemTest for the Nexus Maven repository +repository.audit.is.active=false +repository.audit.ignore.errors=true + +# DB Audit Properties +# Flag to control the execution of the subsystemTest for the Database +db.audit.is.active=false diff --git a/feature-active-standby-management/src/test/resources/feature-active-standby-management.properties b/feature-active-standby-management/src/test/resources/feature-active-standby-management.properties index bbae5d98..827d2e17 100644 --- a/feature-active-standby-management/src/test/resources/feature-active-standby-management.properties +++ b/feature-active-standby-management/src/test/resources/feature-active-standby-management.properties @@ -32,8 +32,7 @@ site_name=pdp_1 # Needed by DroolsPdpsElectionHandler pdp.checkInterval=1500 pdp.updateInterval=1000 -#pdp.timeout=3000 # Need long timeout, because testTransaction is only run every 10 seconds. -pdp.timeout=15000 +pdp.timeout=3000 #how long do we wait for the pdp table to populate on initial startup -pdp.initialWait=20000 \ No newline at end of file +pdp.initialWait=1000 \ No newline at end of file diff --git a/feature-active-standby-management/src/test/resources/feature-state-management.properties b/feature-active-standby-management/src/test/resources/feature-state-management.properties index 7856d251..3dd88473 100644 --- a/feature-active-standby-management/src/test/resources/feature-state-management.properties +++ b/feature-active-standby-management/src/test/resources/feature-state-management.properties @@ -40,13 +40,13 @@ resource.name=pdp1 # Name of the site in which this node is hosted site_name = pdp_1 # Forward Progress Monitor update interval seconds -fp_monitor_interval = 30 +fp_monitor_interval = 2 # Failed counter threshold before failover -failed_counter_threshold = 3 +failed_counter_threshold = 1 # Interval between test transactions when no traffic seconds -test_trans_interval = 10 +test_trans_interval = 1 # Interval between writes of the FPC to the DB seconds -write_fpc_interval = 5 +write_fpc_interval = 1 # Node type Note: Make sure you don't leave any trailing spaces, or you'll get an 'invalid node type' error! node_type = pdp_drools # Dependency groups are groups of resources upon which a node operational state is dependent upon. @@ -57,17 +57,17 @@ dependency_groups= # The default false is to use state checks for health. test_via_jmx=true # This is the max number of seconds beyond which a non incrementing FPC is considered a failure -max_fpc_update_interval=120 +max_fpc_update_interval=5 # Run the state audit every 60 seconds (60000 ms). The state audit finds stale DB entries in the # forwardprogressentity table and marks the node as disabled/failed in the statemanagemententity # table. NOTE! It will only run on nodes that have a standbystatus = providingservice. # A value of <= 0 will turn off the state audit. -state_audit_interval_ms=60000 +state_audit_interval_ms=-1 # The refresh state audit is run every (default) 10 minutes (600000 ms) to clean up any state corruption in the # DB statemanagemententity table. It only refreshes the DB state entry for the local node. That is, it does not # refresh the state of any other nodes. A value <= 0 will turn the audit off. Any other value will override # the default of 600000 ms. -refresh_state_audit_interval_ms=600000 +refresh_state_audit_interval_ms=-1 # Repository audit properties -- cgit 1.2.3-korg