From ecf4fe63616d2c65a846dc8339109d7773c60424 Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Fri, 24 Nov 2023 15:11:43 +0100 Subject: [PATCH] Improve reliability of wait_for_generate We encountered race conditions in QA where generate had finished, but while we checked for generate_offline a new generate was scheduled. Do both at the same time, now. --- src/arnied_wrapper.py | 36 +++++++++++++++++++++++++++++------- 1 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/arnied_wrapper.py b/src/arnied_wrapper.py index e2176e5..53eb54e 100644 --- a/src/arnied_wrapper.py +++ b/src/arnied_wrapper.py @@ -347,11 +347,13 @@ def wait_for_run(program, timeout=300, retries=10, vm=None): + program.upper() check_scheduled = run_cmd(cmd=cmd, ignore_errors=True, vm=vm) if check_scheduled.returncode == 0: - break + break # is scheduled or already running time.sleep(1) - else: + else: # always returned 1, so neither scheduled nor running log.warning("The program %s was not scheduled and is not running", program) - return + return # no need to wait for it to finish since it's not running + + # Wait for a scheduled or running program to end: cmd = f"{BIN_ARNIED_HELPER} --wait-for-program-end " \ f"{program.upper()} --wait-for-program-timeout {timeout}" # add one second to make sure arnied_helper is finished when we expire @@ -377,11 +379,31 @@ def wait_for_arnied(timeout=60, vm=None): # Configuration functionality -def wait_for_generate(timeout=300, vm=None): +def wait_for_generate(timeout: int = 300, vm=None) -> bool: """ Wait for the 'generate' program to complete. - Arguments are similar to the ones from :py:func:`wait_for_run`. + At the end of this function call, there will be no `generate` or `generate_offline` be + scheduled or running, except if any of those took longer than `timeout`. Will return `False` + in those cases, `True` otherwise + + :param timeout: max time to wait for this function to finish + :returns: True if no runs of generate are underway or scheduled, False if `timeout` was not + enough """ - wait_for_run('generate', timeout=timeout, retries=1, vm=vm) - wait_for_run('generate_offline', timeout=timeout, retries=1, vm=vm) + # To avoid races (which we did encounter), do not wait_for_run("generate") and then for + # "generate_offline", but do both "simultaneously" here. + # Since generate may well cause a generate-offline to be scheduled right afterwards, check + # in this order + cmd1 = f"{BIN_ARNIED_HELPER} --is-scheduled-or-running GENERATE" + cmd2 = f"{BIN_ARNIED_HELPER} --is-scheduled-or-running GENERATE_OFFLINE" + end_time = time.monotonic() + timeout - 0.5 + while run_cmd(cmd=cmd1, ignore_errors=True, vm=vm).returncode == 0 \ + or run_cmd(cmd=cmd2, ignore_errors=True, vm=vm).returncode == 0: + # one of them is scheduled or running, so check timeout and wait + if time.monotonic() > end_time: + log.warning("Timeout waiting for generate to start/finish") + return False + log.debug("Waiting for generate to start/finish...") + time.sleep(1) + return True \ No newline at end of file -- 1.7.1