fastmodel: Add option to retry licence server connection.

We're seeing some occasional connection timeouts in CI, possibly
when we aggressively hit the license server, so let's add a
parameter to retry the connection a few times.

Also, print the time required to connect to the server to help
debug issues.

Change-Id: I804af28f79f893fcdca615d7bf82dd9b8686a74c
This commit is contained in:
Nicolas Boichat
2023-07-20 12:31:43 +00:00
parent 41dcd3c5d5
commit 3ea7a792b0

View File

@@ -23,6 +23,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import datetime
import logging
import os
import socket
@@ -44,11 +45,46 @@ def set_armlmd_license_file(force=False):
os.environ[ARM_LICENSE_ENV] = license_file
def check_armlmd_license(timeout):
def check_armlmd_server(server, timeout):
"""Check if the "server" passed as parameter is available. server
can also be a license file"""
if os.path.exists(server):
logging.debug(f"License file {server} exists.")
return True
tuple = server.split("@")
if len(tuple) != 2:
# Probably not a server, and we know the file doesn't exist.
logging.debug(f'License file "{server}" does not exist.')
return False
try:
start = datetime.datetime.now()
# Try to connect to license server. This doesn't attempt to
# communicate with it, just checking reachability.
s = socket.create_connection(
(tuple[1], int(tuple[0])), timeout=timeout
)
end = datetime.datetime.now()
s.close()
time = end - start
logging.info(
f"License server {server} is reachable ({time.total_seconds()} seconds)."
)
return True
except Exception as e:
logging.debug(
f"Cannot connect to license server {server} ({type(e).__name__}: {e})."
)
return False
def check_armlmd_license(timeout, tries):
"""Check if any of the provided license server can be reached, or
if a license file is provided. This allows to fail early and fast,
as fastmodel code makes multiple lengthy attempts to connect to
license server. "timeout" is in seconds.
license server. "timeout" is in seconds. Makes "retries" attempt to
connect.
"""
servers = os.environ[ARM_LICENSE_ENV].split(":")
@@ -62,33 +98,17 @@ def check_armlmd_license(timeout):
if extra not in servers:
servers.append(extra)
for server in servers:
if os.path.exists(server):
logging.debug(f"License file {server} exists.")
break
tuple = server.split("@")
if len(tuple) != 2:
# Probably not a server, and we know the file doesn't exist.
logging.debug(f'License file "{server}" does not exist.')
continue
try:
# Try to connect to license server. This doesn't attempt to
# communicate with it, just checking reachability.
s = socket.create_connection(
(tuple[1], int(tuple[0])), timeout=timeout
for try_count in range(1, tries + 1):
for server in servers:
if check_armlmd_server(server, timeout):
return
if try_count == tries:
raise ConnectionError(
f"Cannot connect to any of the license servers ({', '.join(servers)})."
)
s.close()
logging.debug(f"License server {server} is reachable.")
break
except Exception as e:
logging.debug(
f"Cannot connect to license server {server} ({type(e).__name__}: {e})."
)
else:
raise ConnectionError(
f"Cannot connect to any of the license servers ({', '.join(servers)})."
# retry
logging.warning(
"Cannot connect to any of the license servers, retrying..."
)
@@ -199,10 +219,11 @@ def setup_simulation(
exit_on_dmi_warning=False,
license_precheck=False,
license_precheck_timeout=1,
license_precheck_tries=3,
):
set_armlmd_license_file()
if license_precheck:
check_armlmd_license(license_precheck_timeout)
check_armlmd_license(license_precheck_timeout, license_precheck_tries)
scx_initialize(sim_name)
scx_set_min_sync_latency(min_sync_latency)
if exit_on_dmi_warning: