Skip to content

Commit fd7a98f

Browse files
authored
GIS KFP example: kfp-related syntax changes; use gcs client libs instead of gsutil (kubeflow#749)
* some mods to accommodate (perhaps temporary) changes in how the kfp sdk works * Use gcs client libs rather than gsutil for a gcs copy; required due to changes in node service account permissions. * more mods to address kfp syntax changes
1 parent b218d2b commit fd7a98f

File tree

6 files changed

+38
-33
lines changed

6 files changed

+38
-33
lines changed

github_issue_summarization/pipelines/components/t2t/t2t-proc/datagen.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ def copy_local_directory_to_gcs(project, local_path, bucket_name, gcs_path):
3939
blob = bucket.blob(remote_path)
4040
blob.upload_from_filename(local_file)
4141

42+
def download_blob(bucket_name, source_blob_name, destination_file_name):
43+
"""Downloads a blob from the bucket."""
44+
storage_client = storage.Client()
45+
bucket = storage_client.bucket(bucket_name)
46+
blob = bucket.blob(source_blob_name)
47+
blob.download_to_filename(destination_file_name)
48+
print("Blob {} downloaded to {}.".format(
49+
source_blob_name, destination_file_name)
50+
)
51+
4252
def main():
4353
parser = argparse.ArgumentParser(description='ML Trainer')
4454
parser.add_argument(
@@ -57,13 +67,8 @@ def main():
5767
local_data_dir = '/ml/t2t_gh_data'
5868
local_source_data_file = '/ml/gh_data/github_issues.csv'
5969

60-
data_copy_command1 = ['gsutil', 'cp',
61-
'gs://aju-dev-demos-codelabs/kubecon/gh_data/github_issues.csv',
62-
local_source_data_file
63-
]
64-
print(data_copy_command1)
65-
result = subprocess.call(data_copy_command1)
66-
print(result)
70+
download_blob('aju-dev-demos-codelabs', 'kubecon/gh_data/github_issues.csv',
71+
local_source_data_file)
6772

6873
datagen_command = ['t2t-datagen', '--data_dir', local_data_dir, '--t2t_usr_dir',
6974
'/ml/ghsumm/trainer',

github_issue_summarization/pipelines/example_pipelines/gh_summ.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import kfp.dsl as dsl
1717
import kfp.gcp as gcp
1818
import kfp.components as comp
19-
from kfp.dsl.types import GCSPath, String
19+
# from kfp.dsl.types import GCSPath, String
2020

2121

2222
COPY_ACTION = 'copy_data'
@@ -43,12 +43,12 @@
4343
)
4444
def gh_summ( #pylint: disable=unused-argument
4545
train_steps: 'Integer' = 2019300,
46-
project: String = 'YOUR_PROJECT_HERE',
47-
github_token: String = 'YOUR_GITHUB_TOKEN_HERE',
48-
working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE',
49-
checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',
50-
deploy_webapp: String = 'true',
51-
data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'
46+
project: str = 'YOUR_PROJECT_HERE',
47+
github_token: str = 'YOUR_GITHUB_TOKEN_HERE',
48+
working_dir: 'GCSPath' = 'gs://YOUR_GCS_DIR_HERE',
49+
checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',
50+
deploy_webapp: str = 'true',
51+
data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'
5252
):
5353

5454

Binary file not shown.

github_issue_summarization/pipelines/example_pipelines/gh_summ_serve.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@
1515

1616
import kfp.dsl as dsl
1717
import kfp.gcp as gcp
18-
from kfp.dsl.types import String
18+
# from kfp.dsl.types import String
1919

2020

2121
@dsl.pipeline(
2222
name='Github issue summarization',
2323
description='Demonstrate Tensor2Tensor-based training and TF-Serving'
2424
)
2525
def gh_summ_serveonly(
26-
github_token: String = 'YOUR_GITHUB_TOKEN_HERE',
26+
github_token: str = 'YOUR_GITHUB_TOKEN_HERE',
2727
):
2828

2929

Binary file not shown.

github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb

+17-17
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,12 @@
173173
")\n",
174174
"def gh_summ( #pylint: disable=unused-argument\n",
175175
" train_steps: 'Integer' = 2019300,\n",
176-
" project: String = 'YOUR_PROJECT_HERE',\n",
177-
" github_token: String = 'YOUR_GITHUB_TOKEN_HERE',\n",
178-
" working_dir: GCSPath = 'YOUR_GCS_DIR_HERE',\n",
179-
" checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
180-
" deploy_webapp: String = 'true',\n",
181-
" data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
176+
" project: str = 'YOUR_PROJECT_HERE',\n",
177+
" github_token: str = 'YOUR_GITHUB_TOKEN_HERE',\n",
178+
" working_dir: 'GCSPath' = 'YOUR_GCS_DIR_HERE',\n",
179+
" checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
180+
" deploy_webapp: str = 'true',\n",
181+
" data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
182182
" ):\n",
183183
"\n",
184184
"\n",
@@ -266,8 +266,8 @@
266266
"source": [
267267
"# You'd uncomment this call to actually run the pipeline. \n",
268268
"# run = client.run_pipeline(exp.id, 'ghsumm', 'ghsumm.tar.gz',\n",
269-
"# params={'working-dir': WORKING_DIR,\n",
270-
"# 'github-token': GITHUB_TOKEN,\n",
269+
"# params={'working_dir': WORKING_DIR,\n",
270+
"# 'github_token': GITHUB_TOKEN,\n",
271271
"# 'project': PROJECT_NAME})"
272272
]
273273
},
@@ -329,12 +329,12 @@
329329
")\n",
330330
"def gh_summ2(\n",
331331
" train_steps: 'Integer' = 2019300,\n",
332-
" project: String = 'YOUR_PROJECT_HERE',\n",
333-
" github_token: String = 'YOUR_GITHUB_TOKEN_HERE',\n",
334-
" working_dir: GCSPath = 'YOUR_GCS_DIR_HERE',\n",
335-
" checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
336-
" deploy_webapp: String = 'true',\n",
337-
" data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
332+
" project: str = 'YOUR_PROJECT_HERE',\n",
333+
" github_token: str = 'YOUR_GITHUB_TOKEN_HERE',\n",
334+
" working_dir: 'GCSPath' = 'YOUR_GCS_DIR_HERE',\n",
335+
" checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
336+
" deploy_webapp: str = 'true',\n",
337+
" data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
338338
" ):\n",
339339
"\n",
340340
" # The new pre-processing op.\n",
@@ -418,9 +418,9 @@
418418
"outputs": [],
419419
"source": [
420420
"run = client.run_pipeline(exp.id, 'ghsumm2', 'ghsumm2.tar.gz',\n",
421-
" params={'working-dir': WORKING_DIR,\n",
422-
" 'github-token': GITHUB_TOKEN,\n",
423-
" 'deploy-webapp': DEPLOY_WEBAPP,\n",
421+
" params={'working_dir': WORKING_DIR,\n",
422+
" 'github_token': GITHUB_TOKEN,\n",
423+
" 'deploy_webapp': DEPLOY_WEBAPP,\n",
424424
" 'project': PROJECT_NAME})"
425425
]
426426
},

0 commit comments

Comments
 (0)