81
81
from json import loads
82
82
from scipy .optimize import minimize
83
83
84
- # memory constant functions defined for @resource_allocation_plot
85
- mem_model1 = (lambda x , k , a , b : k * np .log (x ) + x * a + b )
86
- mem_model2 = (lambda x , k , a , b : k * np .log (x ) + b * np .log (x )** 2 + a )
87
- mem_model3 = (lambda x , k , a , b : k * np .log (x ) + b * np .log (x )** 2 +
88
- a * np .log (x )** 3 )
89
- mem_model4 = (lambda x , k , a , b : k * np .log (x ) + b * np .log (x )** 2 +
90
- a * np .log (x )** 2.5 )
91
- MODELS_MEM = [mem_model1 , mem_model2 , mem_model3 , mem_model4 ]
92
-
93
- # time constant functions defined for @resource_allocation_plot
94
- time_model1 = (lambda x , k , a , b : a + b + np .log (x ) * k )
95
- time_model2 = (lambda x , k , a , b : a + b * x + np .log (x ) * k )
96
- time_model3 = (lambda x , k , a , b : a + b * np .log (x )** 2 + np .log (x ) * k )
97
- time_model4 = (lambda x , k , a , b : a * np .log (x )** 3 + b * np .log (x )** 2 +
98
- np .log (x ) * k )
99
-
100
- MODELS_TIME = [time_model1 , time_model2 , time_model3 , time_model4 ]
101
-
102
84
103
85
def get_model_name (model ):
104
- if model == mem_model1 :
86
+ if model == ' mem_model1' :
105
87
return "k * log(x) + x * a + b"
106
- elif model == mem_model2 :
88
+ elif model == ' mem_model2' :
107
89
return "k * log(x) + b * log(x)^2 + a"
108
- elif model == mem_model3 :
90
+ elif model == ' mem_model3' :
109
91
return "k * log(x) + b * log(x)^2 + a * log(x)^3"
110
- elif model == mem_model4 :
92
+ elif model == ' mem_model4' :
111
93
return "k * log(x) + b * log(x)^2 + a * log(x)^2.5"
112
- elif model == time_model1 :
94
+ elif model == ' time_model1' :
113
95
return "a + b + log(x) * k"
114
- elif model == time_model2 :
96
+ elif model == ' time_model2' :
115
97
return "a + b * x + log(x) * k"
116
- elif model == time_model3 :
98
+ elif model == ' time_model3' :
117
99
return "a + b * log(x)^2 + log(x) * k"
118
- elif model == time_model4 :
100
+ elif model == ' time_model4' :
119
101
return "a * log(x)^3 + b * log(x)^2 + log(x) * k"
120
102
else :
121
103
return "Unknown model"
@@ -2387,19 +2369,63 @@ def resource_allocation_plot(df, col_name):
2387
2369
fig , axs = plt .subplots (ncols = 2 , figsize = (10 , 4 ), sharey = False )
2388
2370
2389
2371
ax = axs [0 ]
2372
+ mem_models , time_models = _retrieve_equations ()
2373
+
2390
2374
# models for memory
2391
2375
_resource_allocation_plot_helper (
2392
- df , ax , "MaxRSSRaw" , MODELS_MEM , col_name )
2393
-
2376
+ df , ax , "MaxRSSRaw" , mem_models , col_name )
2394
2377
ax = axs [1 ]
2395
2378
# models for time
2396
2379
_resource_allocation_plot_helper (
2397
- df , ax , "ElapsedRaw" , MODELS_TIME , col_name )
2380
+ df , ax , "ElapsedRaw" , time_models , col_name )
2398
2381
2399
2382
return fig , axs
2400
2383
2401
2384
2385
+ def _retrieve_equations ():
2386
+ '''
2387
+ Helepr function for resource_allocation_plot.
2388
+ Retrieves equations from db. Creates dictionary for memory and time models.
2389
+
2390
+ Returns
2391
+ -------
2392
+ tuple
2393
+ dict
2394
+ memory models - potential memory models for resource allocations
2395
+ dict
2396
+ time models - potential time models for resource allocations
2397
+ '''
2398
+ memory_models = {}
2399
+ time_models = {}
2400
+ with qdb .sql_connection .TRN :
2401
+ sql = ''' SELECT * FROM qiita.allocation_equations; '''
2402
+ qdb .sql_connection .TRN .add (sql )
2403
+ res = qdb .sql_connection .TRN .execute_fetchindex ()
2404
+ for models in res :
2405
+ if 'mem' in models [1 ]:
2406
+ memory_models [models [1 ]] = lambda x , k , a , b : eval (models [2 ])
2407
+ else :
2408
+ time_models [models [2 ]] = lambda x , k , a , b : eval (models [2 ])
2409
+ return (memory_models , time_models )
2410
+
2411
+
2402
2412
def retrieve_resource_data (cname , sname , version , columns ):
2413
+ '''
2414
+ Retrieves resource data from db and constructs a DataFrame with relevant
2415
+ fields.
2416
+
2417
+ Parameters
2418
+ ----------
2419
+ cname - command name for which we retrieve the resources
2420
+ sname - software name for which we retrieve the resources
2421
+ version - version of sftware for whhich we retrieve the resources
2422
+ columns - column names for the DataFrame returned by this function
2423
+
2424
+ Returns
2425
+ -------
2426
+ pd.DataFrame
2427
+ DataFrame with resources.
2428
+ '''
2403
2429
with qdb .sql_connection .TRN :
2404
2430
sql = """
2405
2431
SELECT
@@ -2457,8 +2483,8 @@ def _resource_allocation_plot_helper(
2457
2483
Specifies x axis for the graph
2458
2484
curr: str, required
2459
2485
Either MaxRSSRaw or ElapsedRaw (y axis)
2460
- models: list , required
2461
- List of functions that will be used for visualization
2486
+ models: dictionary , required
2487
+ Dictionary of functions that will be used for visualization
2462
2488
2463
2489
"""
2464
2490
@@ -2494,7 +2520,7 @@ def _resource_allocation_plot_helper(
2494
2520
ax .set_xlabel (col_name )
2495
2521
2496
2522
# 50 - number of maximum iterations, 3 - number of failures we tolerate
2497
- best_model , options = _resource_allocation_calculate (
2523
+ best_model_name , best_model , options = _resource_allocation_calculate (
2498
2524
df , x_data , y_data , models , curr , col_name , 50 , 3 )
2499
2525
k , a , b = options .x
2500
2526
x_plot = np .array (sorted (df [col_name ].unique ()))
@@ -2522,22 +2548,24 @@ def _resource_allocation_plot_helper(
2522
2548
label = "failures" )
2523
2549
success_df ['node_name' ] = success_df ['node_name' ].fillna ('unknown' )
2524
2550
slurm_hosts = set (success_df ['node_name' ].tolist ())
2525
- cmap = colormaps .get_cmap ('Accent' ).resampled (len (slurm_hosts ))
2526
- colors = [cmap (
2527
- i / (len (slurm_hosts ) - 1 )) for i in range (len (slurm_hosts ))]
2551
+ cmap = colormaps .get_cmap ('Accent' )
2552
+ if len (slurm_hosts ) > len (cmap .colors ):
2553
+ raise ValueError (f"""'Accent' colormap only has { len (cmap .colors )}
2554
+ colors, but { len (slurm_hosts )} hosts are provided.""" )
2555
+ colors = cmap .colors [:len (slurm_hosts )]
2528
2556
2529
2557
for i , host in enumerate (slurm_hosts ):
2530
2558
host_df = success_df [success_df ['node_name' ] == host ]
2531
2559
ax .scatter (host_df [col_name ], host_df [curr ], color = colors [i ], s = 3 ,
2532
2560
label = host )
2533
2561
ax .set_title (
2534
2562
f'k||a||b: { k } ||{ a } ||{ b } \n '
2535
- f'model: { get_model_name (best_model )} \n '
2563
+ f'model: { get_model_name (best_model_name )} \n '
2536
2564
f'real: { mini } || { maxi } \n '
2537
2565
f'calculated: { cmin } || { cmax } \n '
2538
2566
f'failures: { failures } ' )
2539
2567
ax .legend (loc = 'upper left' )
2540
- return best_model , options
2568
+ return best_model_name , best_model , options
2541
2569
2542
2570
2543
2571
def _resource_allocation_calculate (
@@ -2555,27 +2583,30 @@ def _resource_allocation_calculate(
2555
2583
current type (e.g. MaxRSSRaw)
2556
2584
col_name: str, required
2557
2585
Specifies x axis for the graph
2558
- models: list , required
2559
- List of functions that will be used for visualization
2586
+ models: dictionary , required
2587
+ Dictionary of functions that will be used for visualization
2560
2588
depth: int, required
2561
2589
Maximum number of iterations in binary search
2562
2590
tolerance: int, required,
2563
2591
Tolerance to number of failures possible to be considered as a model
2564
2592
2565
2593
Returns
2566
2594
----------
2595
+ best_model_name: string
2596
+ the name of the best model from the table
2567
2597
best_model: function
2568
- best fitting function for the current list models
2598
+ best fitting function for the current dictionary models
2569
2599
best_result: object
2570
2600
object containing constants for the best model (e.g. k, a, b in kx+b*a)
2571
2601
"""
2572
2602
2573
2603
init = [1 , 1 , 1 ]
2604
+ best_model_name = None
2574
2605
best_model = None
2575
2606
best_result = None
2576
2607
best_failures = np .inf
2577
2608
best_max = np .inf
2578
- for model in models :
2609
+ for model_name , model in models . items () :
2579
2610
# start values for binary search, where sl is left, sr is right
2580
2611
# penalty weight must be positive & non-zero, hence, sl >= 1.
2581
2612
# the upper bound for error can be an arbitrary large number
@@ -2646,9 +2677,10 @@ def _resource_allocation_calculate(
2646
2677
if min_max <= best_max :
2647
2678
best_failures = prev_failures
2648
2679
best_max = min_max
2680
+ best_model_name = model_name
2649
2681
best_model = model
2650
2682
best_result = res
2651
- return best_model , best_result
2683
+ return best_model_name , best_model , best_result
2652
2684
2653
2685
2654
2686
def _resource_allocation_custom_loss (params , x , y , model , p ):
0 commit comments