28
28
29
29
KUBCTL_DESCRIBE_RETRIES = 3
30
30
KUBCTL_GET_YAML_RETRIES = 3
31
+ DEBUG_INFO_PACKAGE_RETRIES = 3
31
32
32
33
TIMEOUT = 180
33
34
@@ -293,19 +294,20 @@ def collect_pod_rs_logs(namespace, output_dir, k8s_cli):
293
294
logger .info ("Collected rs config from pod marked as not ready, pod name: %s" , rs_pod_name )
294
295
295
296
296
- def debuginfo_attempt_on_pod (namespace , output_dir , pod_name , attempt , k8s_cli ):
297
+ def create_debug_info_package_on_pod (namespace , pod_name , attempt , k8s_cli ):
297
298
"""
298
- Execute the rladmin command to get debug info on a specific pod
299
- Returns: true on success, false on failure
299
+ Execute the rladmin command to get debug info on a specific pod.
300
+ Returns: a tuple of the form (file_path, file_name) in case of success
301
+ and None otherwise.
300
302
"""
301
303
prog = "/opt/redislabs/bin/rladmin"
302
304
cmd = "{} -n {} exec {} -c {} {} cluster debug_info path /tmp" \
303
305
.format (k8s_cli , namespace , pod_name , RLEC_CONTAINER_NAME , prog )
304
306
return_code , out = run_shell_command (cmd )
305
- if "Downloading complete" not in out :
307
+ if return_code != 0 or "Downloading complete" not in out :
306
308
logger .warning ("Failed running rladmin command in pod: %s (attempt %d)" ,
307
309
out .rstrip (), attempt )
308
- return False
310
+ return None
309
311
310
312
# get the debug file name
311
313
match = re .search (r'File (/tmp/(.*\.gz))' , out )
@@ -314,13 +316,21 @@ def debuginfo_attempt_on_pod(namespace, output_dir, pod_name, attempt, k8s_cli):
314
316
debug_file_name = match .group (2 )
315
317
logger .info ("Namespace '%s': debug info created on pod %s in path %s" ,
316
318
namespace , pod_name , debug_file_path )
317
- else :
318
- logger .warning (
319
- "Failed to extract debug info name from output (attempt %d for pod %s) - (%s)" ,
320
- attempt , pod_name , out )
321
- return False
319
+ return (debug_file_path , debug_file_name )
320
+
321
+ logger .warning (
322
+ "Failed to extract debug info name from output (attempt %d for pod %s) - (%s)" ,
323
+ attempt , pod_name , out )
324
+ return None
325
+
322
326
323
- # copy package from RS pod
327
+ def download_debug_info_package_from_pod ( # pylint: disable=R0913
328
+ namespace , output_dir , pod_name , attempt , k8s_cli , debug_file_path , debug_file_name
329
+ ):
330
+ """
331
+ This function attempt to download debug info package from a given pod.
332
+ It should only be called once the package is created.
333
+ """
324
334
cmd = "cd \" {}\" && {} -n {} cp {}:{} ./{}" .format (output_dir ,
325
335
k8s_cli ,
326
336
namespace ,
@@ -338,6 +348,52 @@ def debuginfo_attempt_on_pod(namespace, output_dir, pod_name, attempt, k8s_cli):
338
348
return True
339
349
340
350
351
+ def create_and_download_debug_info_package_from_pod (
352
+ namespace , pod_name , output_dir , k8s_cli
353
+ ):
354
+ """
355
+ This function attempts to create a debug info package on a pod and if debug
356
+ info package creation was successful, attempts downloading it.
357
+ """
358
+ debug_info_path_and_name = None
359
+ for attempt in range (DEBUG_INFO_PACKAGE_RETRIES ):
360
+ debug_info_path_and_name = create_debug_info_package_on_pod (namespace , pod_name , attempt + 1 , k8s_cli )
361
+ if debug_info_path_and_name is not None :
362
+ # We managed to create the debug info package.
363
+ break
364
+ time .sleep (1 )
365
+
366
+ # If we fail creating a debug info package, there is nothing to download, so we move on to the next pod.
367
+ if debug_info_path_and_name is None :
368
+ logger .info ("Namespace: %s: Failed creating debug info package on pod: %s" , namespace , pod_name )
369
+ return False
370
+
371
+ (debug_info_path , debug_info_file_name ) = debug_info_path_and_name
372
+ for attempt in range (DEBUG_INFO_PACKAGE_RETRIES ):
373
+ if download_debug_info_package_from_pod (
374
+ namespace , output_dir , pod_name , attempt + 1 , k8s_cli , debug_info_path , debug_info_file_name
375
+ ):
376
+ logger .info (
377
+ "Namespace '%s': Collected Redis Enterprise cluster debug package from pod: %s" ,
378
+ namespace ,
379
+ pod_name
380
+ )
381
+ return True
382
+ time .sleep (1 )
383
+
384
+ # In case of a failure to fully download the archive from the pod. Make sure that partially downloaded
385
+ # archive is deleted.
386
+ file_to_delete = "{}/{}" .format (output_dir , debug_info_file_name )
387
+ logger .info (
388
+ "Namespace: %s: Deleting possible partially downloaded debug package: %s" ,
389
+ namespace ,
390
+ file_to_delete
391
+ )
392
+ cmd = "rm {}" .format (file_to_delete )
393
+ run_shell_command (cmd )
394
+ return False
395
+
396
+
341
397
def get_redis_enterprise_debug_info (namespace , output_dir , k8s_cli ):
342
398
"""
343
399
Connects to an RS cluster node,
@@ -359,16 +415,8 @@ def get_redis_enterprise_debug_info(namespace, output_dir, k8s_cli):
359
415
360
416
logger .info ("Trying to extract debug info from RS pods: {%s}" , pod_names )
361
417
for pod_name in pod_names :
362
- for attempt in range (3 ):
363
- if attempt > 0 :
364
- time .sleep (1 )
365
- if debuginfo_attempt_on_pod (namespace ,
366
- output_dir ,
367
- pod_name ,
368
- attempt + 1 ,
369
- k8s_cli ):
370
- logger .info ("Namespace '%s': Collected Redis Enterprise cluster debug package" , namespace )
371
- return
418
+ if create_and_download_debug_info_package_from_pod (namespace , pod_name , output_dir , k8s_cli ):
419
+ break
372
420
373
421
374
422
def collect_resources_list (namespace , output_dir , k8s_cli ):
@@ -582,7 +630,7 @@ def get_pod_names(namespace, k8s_cli, selector=""):
582
630
pods = get_pods (namespace , k8s_cli , selector )
583
631
if not pods :
584
632
logger .info ("Namespace '%s': Cannot find pods" , namespace )
585
- return None
633
+ return []
586
634
return [pod ['metadata' ]['name' ] for pod in pods ]
587
635
588
636
0 commit comments