-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_google_config.py
388 lines (307 loc) · 16.6 KB
/
create_google_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
##!/usr/bin/env python
import sys, os
from apiclient.discovery import build
# Import GoogleCredentials
from oauth2client.client import GoogleCredentials
import google.auth
from oauth2client.file import Storage
from subprocess import check_output
from google.cloud import resource_manager, storage
# Import utilities
import json#TODO remove? re
import urllib
import datetime, time
import requests
# Import WDL and inputs files for testing
from hello.inputs import * as hello_inputs
from hello.wdl import * as hello_wdl
# IN README: python setup.py install pre-req pre-script
# Google setup
credentials = GoogleCredentials.get_application_default()
# Build a cloud billing API service
billing = build('cloudbilling', 'v1', credentials=credentials)
# Build a cloud resource manager API service
crm = build('cloudresourcemanager', 'v1', credentials=credentials)
# Create storage client
storage = build('storage', 'v1', credentials=credentials)
# Create Service management API service
smgt = build('servicemanagement', 'v1', credentials=credentials)
# Global variables
bucket_name = ""
home = os.path.expanduser("~")
project_name = ""
# The purpose of this script is to create a configuration file for Cromwell to run on Google Cloud with your local data.
def main():
# Ensure that the user has not run this script before, to avoid overwriting an existing configuration file
google_config_check()
print "\nHello and welcome! This script is for users who want to run their WDL scripts on Google Cloud\nusing the Cromwell engine. This script will walk you through the following setup steps:\n(1) Check that you have Google Cloud SDK installed,\n(2) Select an existing Google Project or create a new one,\n(3) Create a Google Bucket for the workflow outputs,\n(4) Create a Configuration file for running Cromwell,\n(5) Enable APIs for running workflows,\n(6) Test your configuration file by downloading Cromwell and running a \"Hello, World\" WDL.\n\nReady? Let's get started.\n"
# Ensure that gcloud SDK is installed, which is necessary to run the rest of the script.
sdk_install_check()
# Select Google project (new or existing), continues on to create bucket
which_google_project()
# Create config
create_config()
# Ensure that the user has not run this script before, to avoid overwriting an existing configuration file
def google_config_check():
# Check for .google_cromwell.config in ~/
existance = os.path.exists(home + '/.google_cromwell.config')
# If there is a configuration file, exit
if existance:
print "\nYou already have a Cromwell configuration file. If you would like to clear this setup\nand create a new file, remove (or rename) the hidden file ~/.google_cromwell.config\n"
sys.exit("Exiting.")
def input_prompt(prompt_text):
# Get user input after prompt
yes_or_no = raw_input(prompt_text).lower()
while not (yes_or_no.startswith("y") or yes_or_no.startswith("n")):
yes_or_no = raw_input('\nPlease answer yes or no: ').lower()
# Return boolean
return yes_or_no.startswith("y")
# Ensure that gcloud SDK is installed, which is necessary to run the rest of the script.
def sdk_install_check():
# If gcloud SDK is not installed, install it.
if os.system('gcloud version') is not 0:
# Ask if user wants to install the SDK
installSdk = input_prompt('\nStep (1): You do not have Google Cloud SDK installed, which you need to run this script.\nDo you want to install gcloud SDK? (yes or no)')
# User chooses to install
if installSdk:
os.system('curl https://sdk.cloud.google.com | bash')
shell = os.path.expanduser("$SHELL")
# Need to create new shell to start using gcloud
os.system('exec -l ' + shell)
os.system('gcloud init')
return
# User chooses not to install SDK, exit the script because they can't continue.
else:
print "The Google Cloud SDK will not be installed. If you would like to install the SDK in the future, you can run this script again."
sys.exit("Exiting.")
# Gcloud SDK is already installed, and user can continue with setup
else:
print "\nStep (1): You already have Google Cloud SDK installed. Step (1) is complete."
return
# Which Google Project to use (new or existing)
def which_google_project():
existing_project = input_prompt('\n\nStep (2): Do you have an existing Google project where you want to run workflows? (yes or no) ')
# User has existing project
if existing_project:
project_name = raw_input('\nEnter your Google project name: ')
create_google_bucket()
# User doesn't have existing project
else:
print "\nIf you do not have a Google project you want to use, this script will generate a new one for you."
create_new_project = input_prompt('\nWould you like to continue? (yes or no) ')
# Create new project
if create_new_project:
#TODO remove?project_name = find_billing_accounts()
find_billing_accounts()
# Which later creates the google project
# Don't create project, and exit
else:
print "\nYou can set up a Google Project outside of this script and then re-run the script.\nThen at step (2), select Yes that you have an existing project and enter the project name to continue with setup."
sys.exit("Exiting.")
return project_name
# Search for user's billing accounts
def find_billing_accounts():
# from https://github.com/lukwam/gcp-tools/blob/master/lib/google.py#L216
# create a request to list billingAccounts
billing_accounts = billing.billingAccounts()
request = billing_accounts.list()
# create a list to hold all the projects
billing_accounts_list = []
# page through the responses
while request is not None:
# execute the request
response = request.execute()
# add projects to the projects list
if 'billingAccounts' in response:
billing_accounts_list.extend(response['billingAccounts'])
request = billing_accounts.list_next(request, response)
if len(billing_accounts_list) == 0:
# User does not have access to any billing accounts
print "You do not have a Google billing account set up. In order to run\nWDLs in the Google cloud you need an account to bill to. See the README\nfor more details.\nTo learn about creating a billing account, see here: \nhttps://cloud.google.com/billing/docs/how-to/manage-billing-account#create_a_new_billing_account"
sys.exit("Exiting.")
else:
print "\nYou have access to the following Google billing accounts: "
# Setup table
headers = "Billing Account ID\tBilling Account Name"
print headers
print '-' * len(headers.expandtabs())
# Iterate and print every billing account
for billing_acct in billing_accounts_list:
print "%s\t%s" % (billing_acct["name"].replace("billingAccounts/",""), billing_acct["displayName"])
print "\nEnter the \"Billing Account ID\" of the billing account you want to use\nto create a new Google project. This will be the billing account that is charged\nfor storage and compute costs."
ex_billing_acct = "002481-B7351F-CD111E"
billing_account_id = raw_input("(IDs are case-sensitive and will look similar to this: %s): " % ex_billing_acct)
while len(billing_account_id) != len(ex_billing_acct):
billing_account_id = raw_input("Please enter a valid billing account: ")
print "\nYou have selected this Billing Account: %s" % billing_account_id
# Project name with datetime stamp (minute and second) and user's email address
# DO NOT PUT AN UNDERSCORE '_' IN THE NAME, it cannot be longer than 30 characters, nor can it have "google"
user_name = check_output(['gcloud', 'config', 'get-value', 'core/account']).rstrip().partition("@")[0]
project_name = "cromwell-%s" % user_name + datetime.datetime.now().strftime("-%M-%S")
create_google_project(billing_account_id)
return project_name
# Create a google project for the user
def create_google_project(billing_account_id):
global project_name
# Create google project
body = {'project_id': '%s' % project_name, 'name': '%s' % project_name, 'labels': None}
crm.projects().create(body=body).execute()
# Check the project is ready
check_project_created()
# Link new project to billing account
enable_billing_account(billing_account_id)
return project_name
def check_project_created():
global project_name
# List projects currently created
result = crm.projects().list().execute()
# Search through list of services to see if the API has been enabled
while True:
if "projects" in result:
for s in result["projects"]:
q = s["name"]
if project_name in q:
print "Project created successfully. View your new project here: https://console.cloud.google.com/home/dashboard?project=%s" % project_name
return False
print "Creating project..."
time.sleep(10)
result = crm.projects().list().execute()
# Link the newly created Google project to the user's chosen billing account
def enable_billing_account(billing_account_id):
global project_name
body = {"project_id": "%s" % project_name, "billing_account_name": "billingAccounts/%s" % billing_account_id, "billing_enabled": "True"}
params = {"name": "projects/%s" % project_name, "body": body}
# Enable billing account
billing.projects().updateBillingInfo(**params).execute()
# Check billing is enabled
check_billing_enabled()
# Then create bucket
create_google_bucket()
def check_billing_enabled():
global project_name
params = {"name": "projects/%s" % project_name, "fields":"billingEnabled"}
# Get current billing info
result = billing.projects().getBillingInfo(**params).execute()
# Search through list of services to see if the API has been enabled
while True:
if "billingEnabled" in result:
q = result["billingEnabled"]
if True == q:
print "Billing is enabled for your project."
# A short pause before returning
time.sleep(10)
return False
print "Linking project to your billing account..."
time.sleep(10)
result = billing.projects().getBillingInfo(**params).execute()
def create_google_bucket():
global project_name
global bucket_name
print "Step (2) is complete.\n\n\nStep (3): Create a Google bucket, starting now..."
bucket_name = "%s-executions" % project_name
body = {"name": "%s" % bucket_name}
params = {"project": "%s" % project_name, "body": body}
storage.buckets().insert(**params).execute()
# Check the bucket was created
check_bucket_created(bucket_name)
return bucket_name
def check_bucket_created(bucket_name):
params = {"bucket": "%s" % bucket_name, "fields":"timeCreated"}
# Get current billing info
result = storage.buckets().get(**params).execute()
# Search through list of services to see if the API has been enabled
while True:
if "timeCreated" in result:
print "Bucket created successfully. View your new bucket here: https://console.cloud.google.com/storage/browser/%s" % bucket_name
return False
print "Creating bucket..."
time.sleep(10)
result = storage.buckets().get(**params).execute()
def create_config():
#TODO: will this work without project or bucket name?
global project_name
global bucket_name
print "Step (3) is complete.\n\nStep (4): Create configuration file, starting now..."
config_contents = "include required(classpath(\"application\"))\n\ngoogle {\n\n\tapplication-name = \"cromwell\"\n\n\tauths = [\n\t\t{\n\t\t\tname = \"application-default\"\n\t\t\tscheme = \"application_default\"\n\t\t}\n\t]\n}\n\nengine {\n\tfilesystems {\n\t\tgcs {\n\t\t\tauth = \"application-default\"\n\t\t}\n\t}\n}\n\nbackend {\n\tdefault = \"JES\"\n\tproviders {\n\t\tJES {\n\t\t\tactor-factory = \"cromwell.backend.impl.jes.JesBackendLifecycleActorFactory\"\n\t\t\tconfig {\n\t\t\t\t// Google project\n\t\t\t\tproject = \"%s\"\n\t\t\t\tcompute-service-account = \"default\"\n\n\t\t\t\t// Base bucket for workflow executions\n\t\t\t\troot = \"gs://%s\"\n\n\t\t\t\t// Polling for completion backs-off gradually for slower-running jobs.\n\t\t\t\t// This is the maximum polling interval (in seconds):\n\t\t\t\tmaximum-polling-interval = 600\n\n\t\t\t\t// Optional Dockerhub Credentials. Can be used to access private docker images.\n\t\t\t\tdockerhub {\n\t\t\t\t\t// account = \"\"\n\t\t\t\t\t// token = \"\"\n\t\t\t\t}\n\n\t\t\t\tgenomics {\n\t\t\t\t\t// A reference to an auth defined in the \`google\` stanza at the top. This auth is used to create\n\t\t\t\t\t// Pipelines and manipulate auth JSONs.\n\t\t\t\t\tauth = \"application-default\"\n\t\t\t\t\t// Endpoint for APIs, no reason to change this unless directed by Google.\n\t\t\t\t\tendpoint-url = \"https://genomics.googleapis.com/\"\n\t\t\t\t}\n\n\t\t\t\tfilesystems {\n\t\t\t\t\tgcs {\n\t\t\t\t\t\t// A reference to a potentially different auth for manipulating files via engine functions.\n\t\t\t\t\t\tauth = \"application-default\"\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}" % (project_name, bucket_name)
# Create configuration file
with open(home + "/.google_cromwell.config","w+") as f:
f.write(config_contents)
print "Your configuration file is ready! It is stored in ~/.google_cromwell.config."
start_cromwell_test()
def start_cromwell_test():
global project_name
print "Step (4) is complete.\n\nStep (5): Enable APIs\nTo use your new configuration you will need to enable the following APIs in your Google project:\nGoogle Cloud Storage, Google Cloud Storage JSON, Google Compute Engine, Google Genomics."
enable_apis = input_prompt('\nWould you like to enable these APIs now? (yes or no) ')
# Enable APIs
if enable_apis:
serviceList = ["compute.googleapis.com", "storage-api.googleapis.com", "genomics.googleapis.com", "storage-component.googleapis.com"]
for service_name in serviceList:
enable_services(service_name)
print "APIs are enabled. View the list of enabled APIs here: https://console.cloud.google.com/apis/dashboard?project=%s" % project_name
# Continue with testing configuration
continue_test = input_prompt('Step (5) is complete.\n\nStep (6): Test your configuration\nDo you want to run a Hello WDL test to check your configuration? (yes or no) ')
if continue_test:
hello_test()
else:
print "You are now ready to use Cromwell to run pipelines on Google Cloud.\nNext you can run a simple WDL with the Five Minute Tutorial here: http://cromwell.readthedocs.io/en/develop/tutorials/FiveMinuteIntro/\n"
sys.exit("Exiting.")
# Don't enable APIs, and exit
else:
print "Don't forget to enable the APIs through the Google Console prior to using the configuration."
sys.exit("Exiting.")
def enable_services(service_name):
global project_name
body = {"consumerId": "project:%s" % project_name}
params = {"serviceName": "%s" % service_name, "body": body}
smgt.services().enable(**params).execute()
# Check that the service is enabled
check_services_enabled(service_name)
def check_services_enabled(service_name):
global project_name
params = {"consumerId": "project:%s" % project_name, "fields":"services/serviceName"}
# List services currently enabled
result = smgt.services().list(**params).execute()
# Search through list of services to see if the API has been enabled
while True:
if "services" in result:
for s in result["services"]:
q = s["serviceName"]
if service_name in q:
return False
print "Enabling APIs..."
time.sleep(20)
result = smgt.services().list(**params).execute()
def hello_test():
# Create WDL
print "Creating WDL file..."
#TODO: is this necessary?
hello_wdl = hello_wdl
with open("hello.wdl","w+") as f:
f.write(hello_wdl)
print "Your WDL file is ready! It is stored as hello.wdl."
# Create Inputs file
print "Creating inputs file..."
#TODO: is this necessary?
hello_inputs = hello_inputs
with open("hello.inputs", "w+") as f:
f.write(hello_inputs)
print "Your inputs file is ready! It is stored as hello.inputs."
# Download latest Cromwell
print "Downloading latest version of Cromwell execution engine..."
r = requests.get('https://api.github.com/repos/broadinstitute/cromwell/releases/latest')
s = r.json()
t = json.dumps(s)
for asset in s["assets"]:
if "cromwell-" in asset["browser_download_url"]:
download_url = asset["browser_download_url"]
urllib.urlretrieve(download_url, "cromwell.jar")
# Run test
test_configuration = "java -Dconfig.file=" + home +"/.google_cromwell.config -jar cromwell.jar run hello.wdl -i hello.inputs"
print "Cromwell is downloaded and ready for operation.\n\nStarting Hello World test...\n\nRunning $ %s\n" % test_configuration
os.system(test_configuration)
#TODO check if actually successful
# Success
print "Workflow succeeded!\nOutputs for this workflow can be found in gs://%s\n\nYou have successfully set up your Google Project, Bucket, and configuration. \nCheck out the WDL website for more information on writing your own workflows: https://software.broadinstitute.org/wdl/documentation/quickstart.\n" % bucket_name
#Tell users what to do (look at docs etc) if failed
if __name__ == "__main__":
main()