aboutsummaryrefslogtreecommitdiffstats
path: root/feature-active-standby-management/src/test/resources
diff options
context:
space:
mode:
authorKevin McKiou <km097d@att.com>2017-12-13 15:26:59 -0600
committerKevin McKiou <km097d@att.com>2017-12-13 15:27:31 -0600
commitbc8c8286645f74753d175eee7ca62d989555c96c (patch)
treea94d177a5e6ef1908cb56f1153864737b7b42c15 /feature-active-standby-management/src/test/resources
parent4884099eff44975eee57e6748823ff73f965e332 (diff)
Fix issues blocking election handler thread
This bug tracks the AT&T bug 355533. The symptom was that drools pdps that were backing each other up were becoming stuck in a standby state. The cause was that the election handler thread was being hung by a call to PolicyEngine.manager.deactivate which shuts down the topic endpoints when the drools pdp operational state transitions to disabled. Related problems were that the election handler heartbeat was NOT blocked when the main thread was blocked and the IntegrityMonitor forward progress counter was NOT blocked from incrementing when the election handler thread was blocked. This prevented the correct failover of the drools pdp to another healthy one. This change fixes the two causes of the thread blockage, moves the election handler heartbeat to the main thread and adds an interface (AllSeemsWell) which is called when the election handler has stalled/resumed. The AllSeemsWell interface will block forward progress counter increments when ALLNOTWELL and will resume forward progress counter increments when ALLSEEMSWELL. In addition, it reduces the run time of the StandbyStateManagementTest from approximately 8 minutes to approximately 2 minutes. Since this changes classes also changed by POLICY-444, this change must be merged before POLICY-444 can be merged. Issue-ID: POLICY-501 Change-Id: I7b8180d11077ccf59b21b6484cb58b5522a3df8f Signed-off-by: Kevin McKiou <km097d@att.com>
Diffstat (limited to 'feature-active-standby-management/src/test/resources')
-rw-r--r--feature-active-standby-management/src/test/resources/asw/feature-active-standby-management.properties39
-rw-r--r--feature-active-standby-management/src/test/resources/asw/feature-state-management.properties81
-rw-r--r--feature-active-standby-management/src/test/resources/feature-active-standby-management.properties5
-rw-r--r--feature-active-standby-management/src/test/resources/feature-state-management.properties14
4 files changed, 129 insertions, 10 deletions
diff --git a/feature-active-standby-management/src/test/resources/asw/feature-active-standby-management.properties b/feature-active-standby-management/src/test/resources/asw/feature-active-standby-management.properties
new file mode 100644
index 00000000..f0711e6c
--- /dev/null
+++ b/feature-active-standby-management/src/test/resources/asw/feature-active-standby-management.properties
@@ -0,0 +1,39 @@
+###
+# ============LICENSE_START=======================================================
+# feature-active-standby-management
+# ================================================================================
+# Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
+# ================================================================================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============LICENSE_END=========================================================
+###
+
+# DB properties
+javax.persistence.jdbc.driver = org.h2.Driver
+javax.persistence.jdbc.url = jdbc:h2:file:./sql/activestandbymanagement
+javax.persistence.jdbc.user = sa
+javax.persistence.jdbc.password =
+
+# Must be unique across the system
+resource.name=pdp1
+# Name of the site in which this node is hosted
+site_name=pdp_1
+
+# Needed by DroolsPdpsElectionHandler
+pdp.checkInterval=1500
+pdp.updateInterval=1000
+
+# Need long timeout, because testTransaction is only run every 1 seconds.
+pdp.timeout=3000
+#how long do we wait for the pdp table to populate on initial startup
+pdp.initialWait=1000 \ No newline at end of file
diff --git a/feature-active-standby-management/src/test/resources/asw/feature-state-management.properties b/feature-active-standby-management/src/test/resources/asw/feature-state-management.properties
new file mode 100644
index 00000000..2629c63d
--- /dev/null
+++ b/feature-active-standby-management/src/test/resources/asw/feature-state-management.properties
@@ -0,0 +1,81 @@
+###
+# ============LICENSE_START=======================================================
+# feature-active-standby-management
+# ================================================================================
+# Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
+# ================================================================================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============LICENSE_END=========================================================
+###
+
+# DB properties
+javax.persistence.jdbc.driver = org.h2.Driver
+javax.persistence.jdbc.url = jdbc:h2:file:./sql/statemanagement
+javax.persistence.jdbc.user = sa
+javax.persistence.jdbc.password =
+
+# DroolsPDPIntegrityMonitor Properties
+
+http.server.services=TEST
+http.server.services.TEST.host=0.0.0.0
+http.server.services.TEST.port=9981
+#These properties will default to the following if no other values are provided:
+# http.server.services.TEST.restClasses=org.onap.policy.drools.statemanagement.IntegrityMonitorRestManager
+# http.server.services.TEST.managed=false
+# http.server.services.TEST.swagger=true
+
+#IntegrityMonitor Properties
+
+# Must be unique across the system
+resource.name=pdp1
+# Name of the site in which this node is hosted
+site_name = pdp_1
+# How often in sec the forward progress monitor checks for forward progress
+fp_monitor_interval = 2
+# Failed counter threshold before failover
+failed_counter_threshold = 1
+# Interval between test transactions when no traffic seconds
+test_trans_interval = 1
+# Interval between writes of the FPC to the DB seconds
+write_fpc_interval = 1
+# Node type Note: Make sure you don't leave any trailing spaces, or you'll get an 'invalid node type' error!
+node_type = pdp_drools
+# Dependency groups are groups of resources upon which a node operational state is dependent upon.
+# Each group is a comma-separated list of resource names and groups are separated by a semicolon. For example:
+# dependency_groups=site_1.astra_1,site_1.astra_2;site_1.brms_1,site_1.brms_2;site_1.logparser_1;site_1.pypdp_1
+dependency_groups=
+# When set to true, dependent health checks are performed by using JMX to invoke test() on the dependent.
+# The default false is to use state checks for health.
+test_via_jmx=true
+# This is the max number of seconds beyond which a non incrementing FPC is considered a failure
+max_fpc_update_interval=5
+# Run the state audit every 60 seconds (60000 ms). The state audit finds stale DB entries in the
+# forwardprogressentity table and marks the node as disabled/failed in the statemanagemententity
+# table. NOTE! It will only run on nodes that have a standbystatus = providingservice.
+# A value of <= 0 will turn off the state audit.
+state_audit_interval_ms= -1
+# The refresh state audit is run every (default) 10 minutes (600000 ms) to clean up any state corruption in the
+# DB statemanagemententity table. It only refreshes the DB state entry for the local node. That is, it does not
+# refresh the state of any other nodes. A value <= 0 will turn the audit off. Any other value will override
+# the default of 600000 ms.
+refresh_state_audit_interval_ms=-1
+
+
+# Repository audit properties
+# Flag to control the execution of the subsystemTest for the Nexus Maven repository
+repository.audit.is.active=false
+repository.audit.ignore.errors=true
+
+# DB Audit Properties
+# Flag to control the execution of the subsystemTest for the Database
+db.audit.is.active=false
diff --git a/feature-active-standby-management/src/test/resources/feature-active-standby-management.properties b/feature-active-standby-management/src/test/resources/feature-active-standby-management.properties
index bbae5d98..827d2e17 100644
--- a/feature-active-standby-management/src/test/resources/feature-active-standby-management.properties
+++ b/feature-active-standby-management/src/test/resources/feature-active-standby-management.properties
@@ -32,8 +32,7 @@ site_name=pdp_1
# Needed by DroolsPdpsElectionHandler
pdp.checkInterval=1500
pdp.updateInterval=1000
-#pdp.timeout=3000
# Need long timeout, because testTransaction is only run every 10 seconds.
-pdp.timeout=15000
+pdp.timeout=3000
#how long do we wait for the pdp table to populate on initial startup
-pdp.initialWait=20000 \ No newline at end of file
+pdp.initialWait=1000 \ No newline at end of file
diff --git a/feature-active-standby-management/src/test/resources/feature-state-management.properties b/feature-active-standby-management/src/test/resources/feature-state-management.properties
index 7856d251..3dd88473 100644
--- a/feature-active-standby-management/src/test/resources/feature-state-management.properties
+++ b/feature-active-standby-management/src/test/resources/feature-state-management.properties
@@ -40,13 +40,13 @@ resource.name=pdp1
# Name of the site in which this node is hosted
site_name = pdp_1
# Forward Progress Monitor update interval seconds
-fp_monitor_interval = 30
+fp_monitor_interval = 2
# Failed counter threshold before failover
-failed_counter_threshold = 3
+failed_counter_threshold = 1
# Interval between test transactions when no traffic seconds
-test_trans_interval = 10
+test_trans_interval = 1
# Interval between writes of the FPC to the DB seconds
-write_fpc_interval = 5
+write_fpc_interval = 1
# Node type Note: Make sure you don't leave any trailing spaces, or you'll get an 'invalid node type' error!
node_type = pdp_drools
# Dependency groups are groups of resources upon which a node operational state is dependent upon.
@@ -57,17 +57,17 @@ dependency_groups=
# The default false is to use state checks for health.
test_via_jmx=true
# This is the max number of seconds beyond which a non incrementing FPC is considered a failure
-max_fpc_update_interval=120
+max_fpc_update_interval=5
# Run the state audit every 60 seconds (60000 ms). The state audit finds stale DB entries in the
# forwardprogressentity table and marks the node as disabled/failed in the statemanagemententity
# table. NOTE! It will only run on nodes that have a standbystatus = providingservice.
# A value of <= 0 will turn off the state audit.
-state_audit_interval_ms=60000
+state_audit_interval_ms=-1
# The refresh state audit is run every (default) 10 minutes (600000 ms) to clean up any state corruption in the
# DB statemanagemententity table. It only refreshes the DB state entry for the local node. That is, it does not
# refresh the state of any other nodes. A value <= 0 will turn the audit off. Any other value will override
# the default of 600000 ms.
-refresh_state_audit_interval_ms=600000
+refresh_state_audit_interval_ms=-1
# Repository audit properties