From 4c058689904178714211f38bc5e5e083eaffadbf Mon Sep 17 00:00:00 2001 From: Satoshi Fujii Date: Fri, 15 Oct 2021 05:57:24 +0000 Subject: Fix cbs polling process startup failure cbs polling process sometimes failed to start by `relation "hb_common" does not exist` error. The polling process tries to read hb_common table on startup, but the process is created before the parent process creates hb_common table. So the error may occur in race condition. cbs_polling process must be started after `hb_common` table populated. Signed-off-by: Satoshi Fujii Issue-ID: DCAEGEN2-2944 Change-Id: Iee682b62a385dfaf2ec1355e781cda9c3def23c0 --- Changelog.md | 1 + miss_htbt_service/cbs_polling.py | 16 +++++++++------- miss_htbt_service/misshtbtd.py | 30 ++++++++++++++---------------- miss_htbt_service/mod/trapd_vnf_table.py | 8 ++------ tests/test_trapd_vnf_table.py | 5 ++--- 5 files changed, 28 insertions(+), 32 deletions(-) diff --git a/Changelog.md b/Changelog.md index ea8694a..a737ec8 100644 --- a/Changelog.md +++ b/Changelog.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - [DCAEGEN2-2939] Removed unused code (config\_notif.py) ### Fixed - [DCAEGEN2-2832] Pod become unready state +- [DCAEGEN2-2944] cbs polling process startup failure ## [2.3.1.] - 2021/06/19 diff --git a/miss_htbt_service/cbs_polling.py b/miss_htbt_service/cbs_polling.py index 8d5af09..3832b71 100644 --- a/miss_htbt_service/cbs_polling.py +++ b/miss_htbt_service/cbs_polling.py @@ -33,7 +33,7 @@ import get_logger _logger = get_logger.get_logger(__name__) -def pollCBS(current_pid): +def poll_cbs(current_pid: int) -> None: jsfile = db.fetch_json_file() ip_address, port_num, user_name, password, db_name, cbs_polling_required, cbs_polling_interval = db.read_hb_properties(jsfile) hbc_pid, hbc_state, hbc_srcName, hbc_time = db.read_hb_common(user_name, password, ip_address, port_num, db_name) @@ -50,18 +50,20 @@ def pollCBS(current_pid): hbc_pid, hbc_state, hbc_srcName, hbc_time = db.read_hb_common(user_name, password, ip_address, port_num, db_name) source_name = socket.gethostname() source_name = source_name + "-" + str(os.getenv('SERVICE_NAME', "")) - result = True - if int(current_pid) == int(hbc_pid) and source_name == hbc_srcName and hbc_state == "RUNNING": + if current_pid == int(hbc_pid) and source_name == hbc_srcName and hbc_state == "RUNNING": _logger.info("CBSP:ACTIVE Instance:Change the state to RECONFIGURATION") state = "RECONFIGURATION" update_flg = 1 db.create_update_hb_common(update_flg, hbc_pid, state, user_name, password, ip_address, port_num, db_name) else: _logger.info("CBSP:Inactive instance or hb_common state is not RUNNING") - return result -if __name__ == "__main__": - current_pid = sys.argv[1] +def cbs_polling_loop(current_pid: int): while True: - pollCBS(current_pid) + poll_cbs(current_pid) + + +if __name__ == "__main__": + parent_pid = int(sys.argv[1]) + cbs_polling_loop(parent_pid) diff --git a/miss_htbt_service/misshtbtd.py b/miss_htbt_service/misshtbtd.py index ff2f3fe..aeb5694 100644 --- a/miss_htbt_service/misshtbtd.py +++ b/miss_htbt_service/misshtbtd.py @@ -43,6 +43,7 @@ from pathlib import Path import check_health import htbtworker as heartbeat import get_logger +import cbs_polling from mod import trapd_settings as tds from mod.trapd_get_cbs_config import get_cbs_config @@ -50,7 +51,6 @@ hb_properties_file = path.abspath(path.join(__file__, "../config/hbproperties.ya ABSOLUTE_PATH1 = path.abspath(path.join(__file__, "../htbtworker.py")) ABSOLUTE_PATH2 = path.abspath(path.join(__file__, "../db_monitoring.py")) -ABSOLUTE_PATH4 = path.abspath(path.join(__file__, "../cbs_polling.py")) def create_database(update_db, jsfile, ip_address, port_num, user_name, password, db_name): @@ -190,13 +190,6 @@ def create_update_vnf_table_1(jsfile, update_db, connection_db): _logger.info("MSHBT:Updated vnf_table_1 as per the json configuration file") -def hb_cbs_polling_process(pid_current): - subprocess.call([ABSOLUTE_PATH4, str(pid_current)]) - sys.stdout.flush() - _logger.info("MSHBT:Creaated CBS polling process") - return - - def hb_worker_process(config_file_path): subprocess.call([ABSOLUTE_PATH1, config_file_path]) sys.stdout.flush() @@ -333,7 +326,9 @@ _logger = get_logger.get_logger(__name__) def main(): + pid_current = os.getpid() hc_proc = multiprocessing.Process(target=check_health.start_health_check_server) + cbs_polling_proc = multiprocessing.Process(target=cbs_polling.cbs_polling_loop, args=(pid_current,)) try: _logger.info("MSHBD:Execution Started") # Start health check server @@ -341,21 +336,20 @@ def main(): _logger.info("MSHBD: Started health check server. PID=%d", hc_proc.pid) job_list = [] - pid_current = os.getpid() jsfile = fetch_json_file() ip_address, port_num, user_name, password, db_name, cbs_polling_required, cbs_polling_interval = read_hb_properties(jsfile) msg = "MSHBT:HB Properties -", ip_address, port_num, user_name, password, db_name, cbs_polling_required, cbs_polling_interval _logger.info(msg) - if cbs_polling_required == 'True': - p3 = multiprocessing.Process(target=hb_cbs_polling_process, args=(pid_current,)) - p3.start() update_db = 0 create_update_db(update_db, jsfile, ip_address, port_num, user_name, password, db_name) state = "RECONFIGURATION" update_flg = 0 create_update_hb_common(update_flg, pid_current, state, user_name, password, ip_address, port_num, db_name) - msg = "MSHBD:Current process id is", pid_current - _logger.info(msg) + if cbs_polling_required == 'True': + # note: cbs_polling process must be started after `hb_common` table created + cbs_polling_proc.start() + _logger.info("MSHBD: Started CBS polling process. PID=%d", cbs_polling_proc.pid) + _logger.info("MSHBD:Current process id is %d", pid_current) _logger.info("MSHBD:Now be in a continuous loop") i = 0 while True: @@ -428,9 +422,9 @@ def main(): i = i + 1 if i > 5: _logger.info("Terminating main process for pytest") - p3.terminate() + cbs_polling_proc.terminate() time.sleep(1) - p3.join() + cbs_polling_proc.join() if len(job_list) > 0: job_list[0].terminate() time.sleep(1) @@ -454,6 +448,10 @@ def main(): if hc_proc.pid is not None: hc_proc.terminate() hc_proc.join() + # Stop CBS polling process + if cbs_polling_proc.pid is not None: + cbs_polling_proc.terminate() + cbs_polling_proc.join() if __name__ == '__main__': diff --git a/miss_htbt_service/mod/trapd_vnf_table.py b/miss_htbt_service/mod/trapd_vnf_table.py index ffef262..ff738f0 100644 --- a/miss_htbt_service/mod/trapd_vnf_table.py +++ b/miss_htbt_service/mod/trapd_vnf_table.py @@ -91,14 +91,10 @@ def verify_DB_creation_hb_common(user_name,password,ip_address,port_num,db_name) def verify_cbspolling(): os.environ['pytest']='test' os.environ['SERVICE_NAME']='mvp-dcaegen2-heartbeat-static' - try: - _cbspolling=cbs.pollCBS(10) - except Exception as e: - return None - + cbs.poll_cbs(10) os.unsetenv('pytest') os.unsetenv('SERVICE_NAME') - return _cbspolling + def verify_fetch_json_file(): os.environ['pytest']='test' diff --git a/tests/test_trapd_vnf_table.py b/tests/test_trapd_vnf_table.py index 4f6e5e4..2110324 100644 --- a/tests/test_trapd_vnf_table.py +++ b/tests/test_trapd_vnf_table.py @@ -57,9 +57,8 @@ class test_vnf_tables(unittest.TestCase): self.assertEqual(result, True) def test_cbspolling(self): - result= verify_cbspolling() - _logger.info(result) - self.assertEqual(result, True) + # Check if no exception thrown + verify_cbspolling() def test_fetch_json_file(self): result= verify_fetch_json_file() -- cgit 1.2.3-korg