|
15 | 15 | # limitations under the License.
|
16 | 16 |
|
17 | 17 | # [START dataproc_quickstart]
|
| 18 | +""" |
| 19 | +This quickstart sample walks a user through creating a Cloud Dataproc |
| 20 | +cluster, submitting a PySpark job from Google Cloud Storage to the |
| 21 | +cluster, reading the output of the job and deleting the cluster, all |
| 22 | +using the Python client library. |
| 23 | +
|
| 24 | +Usage: |
| 25 | + python quickstart.py --project_id <PROJECT_ID> --region <REGION> \ |
| 26 | + --cluster_name <CLUSTER_NAME> --job_file_path <GCS_JOB_FILE_PATH> |
| 27 | +""" |
| 28 | + |
| 29 | +import argparse |
18 | 30 | import time
|
19 | 31 |
|
20 | 32 | from google.cloud import dataproc_v1 as dataproc
|
21 | 33 | from google.cloud import storage
|
22 | 34 |
|
23 | 35 |
|
24 | 36 | def quickstart(project_id, region, cluster_name, job_file_path):
|
25 |
| - """This quickstart sample walks a user through creating a Cloud Dataproc |
26 |
| - cluster, submitting a PySpark job from Google Cloud Storage to the |
27 |
| - cluster, reading the output of the job and deleting the cluster, all |
28 |
| - using the Python client library. |
29 |
| -
|
30 |
| - Args: |
31 |
| - project_id (string): Project to use for creating resources. |
32 |
| - region (string): Region where the resources should live. |
33 |
| - cluster_name (string): Name to use for creating a cluster. |
34 |
| - job_file_path (string): Job in GCS to execute against the cluster. |
35 |
| - """ |
36 |
| - |
37 | 37 | # Create the cluster client.
|
38 | 38 | cluster_client = dataproc.ClusterControllerClient(client_options={
|
39 | 39 | 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)
|
@@ -125,4 +125,35 @@ def quickstart(project_id, region, cluster_name, job_file_path):
|
125 | 125 | operation.result()
|
126 | 126 |
|
127 | 127 | print('Cluster {} successfully deleted.'.format(cluster_name))
|
128 |
| - # [END dataproc_quickstart] |
| 128 | + |
| 129 | + |
| 130 | +if __name__ == "__main__": |
| 131 | + parser = argparse.ArgumentParser( |
| 132 | + description=__doc__, |
| 133 | + formatter_class=argparse.RawDescriptionHelpFormatter, |
| 134 | + ) |
| 135 | + parser.add_argument( |
| 136 | + '--project_id', |
| 137 | + type=str, |
| 138 | + required=True, |
| 139 | + help='Project to use for creating resources.') |
| 140 | + parser.add_argument( |
| 141 | + '--region', |
| 142 | + type=str, |
| 143 | + required=True, |
| 144 | + help='Region where the resources should live.') |
| 145 | + parser.add_argument( |
| 146 | + '--cluster_name', |
| 147 | + type=str, |
| 148 | + required=True, |
| 149 | + help='Name to use for creating a cluster.') |
| 150 | + parser.add_argument( |
| 151 | + '--job_file_path', |
| 152 | + type=str, |
| 153 | + required=True, |
| 154 | + help='Job in GCS to execute against the cluster.') |
| 155 | + |
| 156 | + args = parser.parse_args() |
| 157 | + quickstart(args.project_id, args.region, |
| 158 | + args.cluster_name, args.job_file_path) |
| 159 | +# [END dataproc_quickstart] |
0 commit comments