From 3ea7a792b0c8d29cd4f43ec007e9919d1d698b1f Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 20 Jul 2023 12:31:43 +0000 Subject: [PATCH] fastmodel: Add option to retry licence server connection. We're seeing some occasional connection timeouts in CI, possibly when we aggressively hit the license server, so let's add a parameter to retry the connection a few times. Also, print the time required to connect to the server to help debug issues. Change-Id: I804af28f79f893fcdca615d7bf82dd9b8686a74c --- src/arch/arm/fastmodel/arm_fast_model.py | 79 +++++++++++++++--------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/src/arch/arm/fastmodel/arm_fast_model.py b/src/arch/arm/fastmodel/arm_fast_model.py index 5a38eb132b..45a97d7957 100644 --- a/src/arch/arm/fastmodel/arm_fast_model.py +++ b/src/arch/arm/fastmodel/arm_fast_model.py @@ -23,6 +23,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import datetime import logging import os import socket @@ -44,11 +45,46 @@ def set_armlmd_license_file(force=False): os.environ[ARM_LICENSE_ENV] = license_file -def check_armlmd_license(timeout): +def check_armlmd_server(server, timeout): + """Check if the "server" passed as parameter is available. server + can also be a license file""" + if os.path.exists(server): + logging.debug(f"License file {server} exists.") + return True + + tuple = server.split("@") + if len(tuple) != 2: + # Probably not a server, and we know the file doesn't exist. + logging.debug(f'License file "{server}" does not exist.') + return False + + try: + start = datetime.datetime.now() + # Try to connect to license server. This doesn't attempt to + # communicate with it, just checking reachability. + s = socket.create_connection( + (tuple[1], int(tuple[0])), timeout=timeout + ) + end = datetime.datetime.now() + s.close() + time = end - start + logging.info( + f"License server {server} is reachable ({time.total_seconds()} seconds)." + ) + return True + except Exception as e: + logging.debug( + f"Cannot connect to license server {server} ({type(e).__name__}: {e})." + ) + return False + + +def check_armlmd_license(timeout, tries): """Check if any of the provided license server can be reached, or if a license file is provided. This allows to fail early and fast, as fastmodel code makes multiple lengthy attempts to connect to - license server. "timeout" is in seconds. + license server. "timeout" is in seconds. Makes "retries" attempt to + connect. """ servers = os.environ[ARM_LICENSE_ENV].split(":") @@ -62,33 +98,17 @@ def check_armlmd_license(timeout): if extra not in servers: servers.append(extra) - for server in servers: - if os.path.exists(server): - logging.debug(f"License file {server} exists.") - break - - tuple = server.split("@") - if len(tuple) != 2: - # Probably not a server, and we know the file doesn't exist. - logging.debug(f'License file "{server}" does not exist.') - continue - - try: - # Try to connect to license server. This doesn't attempt to - # communicate with it, just checking reachability. - s = socket.create_connection( - (tuple[1], int(tuple[0])), timeout=timeout + for try_count in range(1, tries + 1): + for server in servers: + if check_armlmd_server(server, timeout): + return + if try_count == tries: + raise ConnectionError( + f"Cannot connect to any of the license servers ({', '.join(servers)})." ) - s.close() - logging.debug(f"License server {server} is reachable.") - break - except Exception as e: - logging.debug( - f"Cannot connect to license server {server} ({type(e).__name__}: {e})." - ) - else: - raise ConnectionError( - f"Cannot connect to any of the license servers ({', '.join(servers)})." + # retry + logging.warning( + "Cannot connect to any of the license servers, retrying..." ) @@ -199,10 +219,11 @@ def setup_simulation( exit_on_dmi_warning=False, license_precheck=False, license_precheck_timeout=1, + license_precheck_tries=3, ): set_armlmd_license_file() if license_precheck: - check_armlmd_license(license_precheck_timeout) + check_armlmd_license(license_precheck_timeout, license_precheck_tries) scx_initialize(sim_name) scx_set_min_sync_latency(min_sync_latency) if exit_on_dmi_warning: