Skip to content

Commit 60f623f

Browse files
Merge pull request #241 from RedisLabs/heinrich-redislabs-fix-log-collector-RED-79996
RED-79996 - Copy recent log collector changes from operator repo
2 parents ff6181d + 2633a64 commit 60f623f

File tree

1 file changed

+70
-22
lines changed

1 file changed

+70
-22
lines changed

log_collector/log_collector.py

Lines changed: 70 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
KUBCTL_DESCRIBE_RETRIES = 3
3030
KUBCTL_GET_YAML_RETRIES = 3
31+
DEBUG_INFO_PACKAGE_RETRIES = 3
3132

3233
TIMEOUT = 180
3334

@@ -293,19 +294,20 @@ def collect_pod_rs_logs(namespace, output_dir, k8s_cli):
293294
logger.info("Collected rs config from pod marked as not ready, pod name: %s", rs_pod_name)
294295

295296

296-
def debuginfo_attempt_on_pod(namespace, output_dir, pod_name, attempt, k8s_cli):
297+
def create_debug_info_package_on_pod(namespace, pod_name, attempt, k8s_cli):
297298
"""
298-
Execute the rladmin command to get debug info on a specific pod
299-
Returns: true on success, false on failure
299+
Execute the rladmin command to get debug info on a specific pod.
300+
Returns: a tuple of the form (file_path, file_name) in case of success
301+
and None otherwise.
300302
"""
301303
prog = "/opt/redislabs/bin/rladmin"
302304
cmd = "{} -n {} exec {} -c {} {} cluster debug_info path /tmp" \
303305
.format(k8s_cli, namespace, pod_name, RLEC_CONTAINER_NAME, prog)
304306
return_code, out = run_shell_command(cmd)
305-
if "Downloading complete" not in out:
307+
if return_code != 0 or "Downloading complete" not in out:
306308
logger.warning("Failed running rladmin command in pod: %s (attempt %d)",
307309
out.rstrip(), attempt)
308-
return False
310+
return None
309311

310312
# get the debug file name
311313
match = re.search(r'File (/tmp/(.*\.gz))', out)
@@ -314,13 +316,21 @@ def debuginfo_attempt_on_pod(namespace, output_dir, pod_name, attempt, k8s_cli):
314316
debug_file_name = match.group(2)
315317
logger.info("Namespace '%s': debug info created on pod %s in path %s",
316318
namespace, pod_name, debug_file_path)
317-
else:
318-
logger.warning(
319-
"Failed to extract debug info name from output (attempt %d for pod %s) - (%s)",
320-
attempt, pod_name, out)
321-
return False
319+
return (debug_file_path, debug_file_name)
320+
321+
logger.warning(
322+
"Failed to extract debug info name from output (attempt %d for pod %s) - (%s)",
323+
attempt, pod_name, out)
324+
return None
325+
322326

323-
# copy package from RS pod
327+
def download_debug_info_package_from_pod( # pylint: disable=R0913
328+
namespace, output_dir, pod_name, attempt, k8s_cli, debug_file_path, debug_file_name
329+
):
330+
"""
331+
This function attempt to download debug info package from a given pod.
332+
It should only be called once the package is created.
333+
"""
324334
cmd = "cd \"{}\" && {} -n {} cp {}:{} ./{}".format(output_dir,
325335
k8s_cli,
326336
namespace,
@@ -338,6 +348,52 @@ def debuginfo_attempt_on_pod(namespace, output_dir, pod_name, attempt, k8s_cli):
338348
return True
339349

340350

351+
def create_and_download_debug_info_package_from_pod(
352+
namespace, pod_name, output_dir, k8s_cli
353+
):
354+
"""
355+
This function attempts to create a debug info package on a pod and if debug
356+
info package creation was successful, attempts downloading it.
357+
"""
358+
debug_info_path_and_name = None
359+
for attempt in range(DEBUG_INFO_PACKAGE_RETRIES):
360+
debug_info_path_and_name = create_debug_info_package_on_pod(namespace, pod_name, attempt + 1, k8s_cli)
361+
if debug_info_path_and_name is not None:
362+
# We managed to create the debug info package.
363+
break
364+
time.sleep(1)
365+
366+
# If we fail creating a debug info package, there is nothing to download, so we move on to the next pod.
367+
if debug_info_path_and_name is None:
368+
logger.info("Namespace: %s: Failed creating debug info package on pod: %s", namespace, pod_name)
369+
return False
370+
371+
(debug_info_path, debug_info_file_name) = debug_info_path_and_name
372+
for attempt in range(DEBUG_INFO_PACKAGE_RETRIES):
373+
if download_debug_info_package_from_pod(
374+
namespace, output_dir, pod_name, attempt + 1, k8s_cli, debug_info_path, debug_info_file_name
375+
):
376+
logger.info(
377+
"Namespace '%s': Collected Redis Enterprise cluster debug package from pod: %s",
378+
namespace,
379+
pod_name
380+
)
381+
return True
382+
time.sleep(1)
383+
384+
# In case of a failure to fully download the archive from the pod. Make sure that partially downloaded
385+
# archive is deleted.
386+
file_to_delete = "{}/{}".format(output_dir, debug_info_file_name)
387+
logger.info(
388+
"Namespace: %s: Deleting possible partially downloaded debug package: %s",
389+
namespace,
390+
file_to_delete
391+
)
392+
cmd = "rm {}".format(file_to_delete)
393+
run_shell_command(cmd)
394+
return False
395+
396+
341397
def get_redis_enterprise_debug_info(namespace, output_dir, k8s_cli):
342398
"""
343399
Connects to an RS cluster node,
@@ -359,16 +415,8 @@ def get_redis_enterprise_debug_info(namespace, output_dir, k8s_cli):
359415

360416
logger.info("Trying to extract debug info from RS pods: {%s}", pod_names)
361417
for pod_name in pod_names:
362-
for attempt in range(3):
363-
if attempt > 0:
364-
time.sleep(1)
365-
if debuginfo_attempt_on_pod(namespace,
366-
output_dir,
367-
pod_name,
368-
attempt + 1,
369-
k8s_cli):
370-
logger.info("Namespace '%s': Collected Redis Enterprise cluster debug package", namespace)
371-
return
418+
if create_and_download_debug_info_package_from_pod(namespace, pod_name, output_dir, k8s_cli):
419+
break
372420

373421

374422
def collect_resources_list(namespace, output_dir, k8s_cli):
@@ -582,7 +630,7 @@ def get_pod_names(namespace, k8s_cli, selector=""):
582630
pods = get_pods(namespace, k8s_cli, selector)
583631
if not pods:
584632
logger.info("Namespace '%s': Cannot find pods", namespace)
585-
return None
633+
return []
586634
return [pod['metadata']['name'] for pod in pods]
587635

588636

0 commit comments

Comments
 (0)