Skip to content

Commit 0e9517e

Browse files
committed
Cost Controls scripts
1 parent 0948c6c commit 0e9517e

File tree

2 files changed

+258
-0
lines changed

2 files changed

+258
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: MIT-0
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
5+
# software and associated documentation files (the "Software"), to deal in the Software
6+
# without restriction, including without limitation the rights to use, copy, modify,
7+
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
8+
# permit persons to whom the Software is furnished to do so.
9+
#
10+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
11+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
12+
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
13+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
15+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16+
17+
import boto3, json, sys, os
18+
boto3.compat.filter_python_deprecation_warnings()
19+
from decimal import Decimal
20+
pricing_client = boto3.client('pricing', region_name='us-east-1')
21+
22+
23+
def find_by_key(data, target):
24+
for key, value in data.items():
25+
if isinstance(value, dict):
26+
yield from find_by_key(value, target)
27+
elif key == target:
28+
yield value
29+
30+
31+
def get_compute_type():
32+
33+
with open('/etc/parallelcluster/slurm_plugin/fleet-config.json', 'r') as f:
34+
data = json.load(f)
35+
36+
end_val = find_by_key(data, 'Instances')
37+
for value in end_val:
38+
return value[0]['InstanceType']
39+
40+
41+
def get_instance_type_pricing(instance_type):
42+
#response = pricing_client.describe_services(ServiceCode='AmazonEC2')
43+
44+
#response = pricing_client.get_attribute_values(ServiceCode='AmazonEC2', AttributeName='tenancy')
45+
46+
response = pricing_client.get_products(ServiceCode='AmazonEC2',
47+
Filters=[
48+
{
49+
'Field': 'instanceType',
50+
'Type': 'TERM_MATCH',
51+
'Value': instance_type,
52+
},
53+
{
54+
'Field': 'regionCode',
55+
'Type': 'TERM_MATCH',
56+
'Value': 'eu-north-1',
57+
},
58+
{
59+
'Field': 'operatingSystem',
60+
'Type': 'TERM_MATCH',
61+
'Value': 'Linux',
62+
},
63+
{
64+
'Field': 'tenancy',
65+
'Type': 'TERM_MATCH',
66+
'Value': 'shared',
67+
}
68+
]
69+
)
70+
71+
product_pricing = None
72+
for item in response['PriceList']:
73+
json_item = json.loads(item)
74+
if 'BoxUsage' in json_item['product']['attributes']['usagetype']:
75+
product_pricing = json_item
76+
else:
77+
continue
78+
79+
price = find_by_key(product_pricing['terms']['OnDemand'],'USD')
80+
vcpu_count = int(product_pricing['product']['attributes']['vcpu'])
81+
82+
return Decimal(next(price)) / 60 / vcpu_count
83+
84+
85+
def convert_budget_to_minutes(budget, price_per_minute):
86+
87+
# the budget_padding_factor setting configures a percent threshold against the overall budget to compare against
88+
# for example, .9 means 90% of the budget will be used to set the GrpTRESMins limit
89+
# TODO parameterize this value
90+
budget_padding_factor = Decimal(.9)
91+
return int((Decimal(budget) / price_per_minute) * budget_padding_factor)
92+
93+
94+
def apply_grpstresmins(minutes):
95+
96+
output_code = os.system('sacctmgr modify account pcdefault set GrpTRESMins=cpu={0} -i'.format(minutes))
97+
if output_code == 0:
98+
return
99+
else:
100+
raise Exception('Unable to apply GrpTRESMins via sacctmgr')
101+
102+
103+
if __name__ == '__main__':
104+
105+
budget = sys.argv[1]
106+
107+
# get instance type
108+
instance_type = get_compute_type()
109+
110+
# query pricelist API
111+
price_per_minute = get_instance_type_pricing(instance_type)
112+
113+
# convert price to minutes
114+
total_mins = convert_budget_to_minutes(budget, price_per_minute)
115+
116+
# apply grptresmins
117+
apply_grpstresmins(total_mins)
118+
119+
print('successfully applied {0} minute limit to sacctmgr'.format(total_mins))
120+
121+
# slurm-accounting-db-test-v5-publicdb
122+
123+
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: MIT-0
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
5+
# software and associated documentation files (the "Software"), to deal in the Software
6+
# without restriction, including without limitation the rights to use, copy, modify,
7+
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
8+
# permit persons to whom the Software is furnished to do so.
9+
#
10+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
11+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
12+
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
13+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
15+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16+
17+
import subprocess, json, boto3
18+
boto3.compat.filter_python_deprecation_warnings()
19+
from decimal import Decimal
20+
21+
pricing_client = boto3.client('pricing', region_name='us-east-1')
22+
23+
24+
def find_by_key(data, target):
25+
for key, value in data.items():
26+
if isinstance(value, dict):
27+
yield from find_by_key(value, target)
28+
elif key == target:
29+
yield value
30+
31+
32+
def get_compute_type():
33+
with open('/etc/parallelcluster/slurm_plugin/fleet-config.json', 'r') as f:
34+
data = json.load(f)
35+
36+
end_val = find_by_key(data, 'Instances')
37+
for value in end_val:
38+
return value[0]['InstanceType']
39+
40+
41+
def calculate_node_mins(sacct_output):
42+
node_minutes = 0
43+
44+
for job in sacct_output['jobs']:
45+
if len(job['steps']) == 0:
46+
continue
47+
48+
tmp_time = job['time']['elapsed']
49+
tmp_node = 0
50+
51+
for val in job['tres']['allocated']:
52+
if val['type'] == 'node':
53+
tmp_node = val['count']
54+
55+
node_minutes = node_minutes + (tmp_time * tmp_node)
56+
57+
return node_minutes
58+
59+
60+
def get_instance_type_pricing(instance_type):
61+
# response = pricing_client.describe_services(ServiceCode='AmazonEC2')
62+
63+
# response = pricing_client.get_attribute_values(ServiceCode='AmazonEC2', AttributeName='tenancy')
64+
65+
response = pricing_client.get_products(ServiceCode='AmazonEC2',
66+
Filters=[
67+
{
68+
'Field': 'instanceType',
69+
'Type': 'TERM_MATCH',
70+
'Value': instance_type,
71+
},
72+
{
73+
'Field': 'regionCode',
74+
'Type': 'TERM_MATCH',
75+
'Value': 'eu-north-1',
76+
},
77+
{
78+
'Field': 'operatingSystem',
79+
'Type': 'TERM_MATCH',
80+
'Value': 'Linux',
81+
},
82+
{
83+
'Field': 'tenancy',
84+
'Type': 'TERM_MATCH',
85+
'Value': 'shared',
86+
}
87+
]
88+
)
89+
90+
product_pricing = None
91+
for item in response['PriceList']:
92+
json_item = json.loads(item)
93+
if 'BoxUsage' in json_item['product']['attributes']['usagetype']:
94+
product_pricing = json_item
95+
else:
96+
continue
97+
98+
price = find_by_key(product_pricing['terms']['OnDemand'], 'USD')
99+
100+
return Decimal(next(price)) / 60
101+
102+
103+
if __name__ == '__main__':
104+
# sacct to get job statistics (one week of data)
105+
# output = subprocess.check_output('sacct --allocations --starttime now-7days --json', shell=True)
106+
output = subprocess.check_output('sacct --starttime now-7days --json', shell=True)
107+
108+
json_output = json.loads(output)
109+
node_mins = calculate_node_mins(json_output)
110+
111+
# get instance type
112+
instance_type = get_compute_type()
113+
114+
# query pricelist API
115+
price_per_minute = get_instance_type_pricing(instance_type)
116+
117+
compute_budget_total = Decimal(price_per_minute) * Decimal(node_mins)
118+
119+
print('total cost= {0}'.format(str(compute_budget_total)))
120+
121+
cw_client = boto3.client('cloudwatch', region_name='eu-north-1')
122+
123+
response = cw_client.put_metric_data(
124+
Namespace='ParallelCluster',
125+
MetricData=[
126+
{'MetricName': 'cluster_cost',
127+
'Dimensions': [
128+
{
129+
'Name': 'ClusterName',
130+
'Value': 'hpc'
131+
}
132+
],
133+
'Value': compute_budget_total}
134+
]
135+
)

0 commit comments

Comments
 (0)