Skip to content

Commit 4b57881

Browse files
enrico-usaidemartinofra
authored andcommitted
Add retry to the retrieve_dcv_session_url function
It could happen that the session is not started yet so we have to retry. Signed-off-by: Enrico Usai <[email protected]>
1 parent 2cdc55b commit 4b57881

File tree

2 files changed

+59
-20
lines changed

2 files changed

+59
-20
lines changed

Diff for: cli/pcluster/dcv/connect.py

+37-20
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,18 @@
2222
get_master_ip_and_username,
2323
get_stack,
2424
get_stack_name,
25+
retry,
2526
)
2627

2728
LOGGER = logging.getLogger(__name__)
2829

2930

31+
class DCVConnectionError(Exception):
32+
"""Error raised with DCV connection fails."""
33+
34+
pass
35+
36+
3037
def _check_command_output(cmd):
3138
return sub.check_output(cmd, shell=True, universal_newlines=True, stderr=sub.STDOUT).strip()
3239

@@ -52,13 +59,35 @@ def dcv_connect(args):
5259
DCV_SHARED_DIR=shared_dir,
5360
)
5461

55-
# Connect by ssh to the master instance and prepare DCV session
5662
try:
57-
LOGGER.debug("SSH command: {0}".format(cmd))
58-
output = _check_command_output(cmd)
63+
url = retry(_retrieve_dcv_session_url, func_args=[cmd, args.cluster_name, master_ip], attempts=4)
64+
url_message = "Please use the following one-time URL in your browser within 30 seconds:\n{0}".format(url)
65+
except DCVConnectionError as e:
66+
error(
67+
"Something went wrong during DCV connection.\n{0}"
68+
"Please check the logs in the /var/log/parallelcluster/ folder "
69+
"of the master instance and submit an issue {1}\n".format(e, PCLUSTER_ISSUES_LINK)
70+
)
71+
72+
if args.show_url:
73+
LOGGER.info(url_message)
74+
return
75+
76+
try:
77+
if not webbrowser.open_new(url):
78+
raise webbrowser.Error("Unable to open the Web browser.")
79+
except webbrowser.Error as e:
80+
LOGGER.info("{0}\n{1}".format(e, url_message))
81+
82+
83+
def _retrieve_dcv_session_url(ssh_cmd, cluster_name, master_ip):
84+
"""Connect by ssh to the master instance, prepare DCV session and return the DCV session URL."""
85+
try:
86+
LOGGER.debug("SSH command: {0}".format(ssh_cmd))
87+
output = _check_command_output(ssh_cmd)
5988
# At first ssh connection, the ssh command alerts it is adding the host to the known hosts list
6089
if re.search("Permanently added .* to the list of known hosts.", output):
61-
output = _check_command_output(cmd)
90+
output = _check_command_output(ssh_cmd)
6291

6392
dcv_parameters = re.search(
6493
r"PclusterDcvServerPort=([\d]+) PclusterDcvSessionId=([\w]+) PclusterDcvSessionToken=([\w-]+)", output
@@ -71,30 +100,18 @@ def dcv_connect(args):
71100
error(
72101
"Something went wrong during DCV connection. Please manually execute the command:\n{0}\n"
73102
"If the problem persists, please check the logs in the /var/log/parallelcluster/ folder "
74-
"of the master instance and submit an issue {1}.".format(cmd, PCLUSTER_ISSUES_LINK)
103+
"of the master instance and submit an issue {1}".format(ssh_cmd, PCLUSTER_ISSUES_LINK)
75104
)
76105

77106
except sub.CalledProcessError as e:
78107
if "{0}: No such file or directory".format(DCV_CONNECT_SCRIPT) in e.output:
79108
error(
80109
"The cluster {0} has been created with an old version of ParallelCluster "
81-
"without the DCV support.".format(args.cluster_name)
110+
"without the DCV support.".format(cluster_name)
82111
)
83112
else:
84-
error("Something went wrong during DCV connection.\n{0}".format(e.output))
113+
raise DCVConnectionError(e.output)
85114

86-
# DCV URL
87-
url = "https://{IP}:{PORT}?authToken={TOKEN}#{SESSION_ID}".format(
115+
return "https://{IP}:{PORT}?authToken={TOKEN}#{SESSION_ID}".format(
88116
IP=master_ip, PORT=dcv_server_port, TOKEN=dcv_session_token, SESSION_ID=dcv_session_id
89117
)
90-
url_message = "Please use the following one-time URL in your browser within 30 seconds:\n{0}".format(url)
91-
92-
if args.show_url:
93-
LOGGER.info(url_message)
94-
return
95-
96-
try:
97-
if not webbrowser.open_new(url):
98-
raise webbrowser.Error("Unable to open the Web browser.")
99-
except webbrowser.Error as e:
100-
LOGGER.info("{0}\n{1}".format(e, url_message))

Diff for: cli/pcluster/utils.py

+22
Original file line numberDiff line numberDiff line change
@@ -463,3 +463,25 @@ def get_master_ip_and_username(cluster_name):
463463

464464
def get_cli_log_file():
465465
return os.path.expanduser(os.path.join("~", ".parallelcluster", "pcluster-cli.log"))
466+
467+
468+
def retry(func, func_args, attempts=1, wait=0):
469+
"""
470+
Call function and re-execute it if it raises an Exception.
471+
472+
:param func: the function to execute.
473+
:param func_args: the positional arguments of the function.
474+
:param attempts: the maximum number of attempts. Default: 1.
475+
:param wait: delay between attempts. Default: 0.
476+
:returns: the result of the function.
477+
"""
478+
while attempts:
479+
try:
480+
return func(*func_args)
481+
except Exception as e:
482+
attempts -= 1
483+
if not attempts:
484+
raise e
485+
486+
LOGGER.debug("{0}, retrying in {1} seconds..".format(e, wait))
487+
time.sleep(wait)

0 commit comments

Comments
 (0)