Skip to content

Commit 0d46cfa

Browse files
authored
Merge pull request #324 from lyudmil-pelov/master
adding ads example for jobs and updating the Java sample project
2 parents f097c77 + ea7849e commit 0d46cfa

30 files changed

+440
-108
lines changed

blogs_samples/llama2-model-deployment/README.md

-76
This file was deleted.

blogs_samples/llama2-model-deployment/config.yaml

-11
This file was deleted.

jobs/ads/README.md

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Running OCI DataScience Jobs with Oracle ADS Library
2+
3+
Oracle Cloud Infrastructure (OCI) [Data Science Jobs (Jobs)](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm) enables you to define and run repeatable machine learning tasks on a fully managed infrastructure. You can create a compute resource on demand and run applications that perform tasks such as data preparation, model training, hyperparameter tuning, and batch inference.
4+
5+
Oracle ADS library streamlines the process of running OCI Data Science Jobs by abstracting away the complexities of infrastructure management and providing a high-level interface. Its ease of use, integration with OCI services, and focus on simplifying data science and AI workflows make it a powerful tool for running machine learning workloads in the Oracle Cloud Infrastructure Data Science Service.
6+
7+
For more information: [Oracle Accelerated Data Science (ADS)](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/jobs/index.html)

jobs/ads/run.py

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import ads, os
2+
from ads.jobs import Job, DataScienceJob, PythonRuntime
3+
4+
# Setup your OCIDs!
5+
COMPARTMENT_OCID = "ocid1.compartment.oc1..aaaaaaaa"
6+
PROJECT_OCID = "ocid1.datascienceproject.oc1.iad.amaaaaaa"
7+
8+
LOG_GROUP_OCID = "ocid1.loggroup.oc1.iad.amaaaaaa"
9+
# Optional
10+
# LOG_OCID = "ocid1.log.oc1.iad.amaaaaaa"
11+
12+
BUCKET_NAME = ""
13+
BUCKET_NAMESPACE = ""
14+
15+
16+
# example of how to run jobs with Oracle ADS: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/jobs/index.html
17+
def run_and_monitor(artifact_name):
18+
"""Create and run the ML Job for the generated artifact.
19+
20+
Parameters
21+
----------
22+
artifact_name : str
23+
Name of the artifact, returned by the `Export artifact` script.
24+
"""
25+
26+
# detect RP
27+
rp_version = os.environ.get("OCI_RESOURCE_PRINCIPAL_VERSION", "UNDEFINED")
28+
29+
if not rp_version or rp_version == "UNDEFINED":
30+
# RUNs LOCALLY
31+
ads.set_auth("api_key", oci_config_location="~/.oci/config", profile="DEFAULT")
32+
else:
33+
# RUNs ON OCI
34+
ads.set_auth("resource_principal")
35+
36+
print(f"Creating MLJob for {artifact_name}...")
37+
job = (
38+
Job(name="DataStudio to OCI-DS")
39+
.with_infrastructure(
40+
DataScienceJob()
41+
.with_log_group_id(LOG_GROUP_OCID)
42+
# .with_log_id(LOG_OCID)
43+
.with_compartment_id(COMPARTMENT_OCID)
44+
.with_project_id(PROJECT_OCID)
45+
# Optional
46+
# .with_subnet_id(SUBNET_OCID)
47+
.with_shape_name("VM.Standard2.1")
48+
.with_block_storage_size(50)
49+
)
50+
.with_runtime(
51+
PythonRuntime()
52+
.with_service_conda("pytorch110_p38_cpu_v1")
53+
.with_source(f"oci://{BUCKET_NAME}@{BUCKET_NAMESPACE}/tmp/{artifact_name}")
54+
)
55+
)
56+
job.create()
57+
58+
print("Launching Job Run...")
59+
jobrun = job.run()
60+
jobrun.watch()
61+
62+
63+
# if you want to run it directly
64+
# run_and_monitor("hello_world_job.py")

jobs/ads/runit.ipynb

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stderr",
10+
"output_type": "stream",
11+
"text": [
12+
"/Users/lypelov/anaconda/envs/ads_280_clone/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13+
" from .autonotebook import tqdm as notebook_tqdm\n"
14+
]
15+
},
16+
{
17+
"name": "stdout",
18+
"output_type": "stream",
19+
"text": [
20+
"Creating MLJob for hello_world_job.py...\n",
21+
"Launching Job Run...\n",
22+
"Job OCID: ocid1.datasciencejob.oc1.iad.amaaaaaanif7xwia2tkd5y2skd4b5lvczgmwrj5mro7le33f64zsu26krz2a\n",
23+
"Job Run OCID: ocid1.datasciencejobrun.oc1.iad.amaaaaaanif7xwianaiwe5x7waz2j4zeliobtngby2jxb6cn4jij4gym5b5q\n",
24+
"2023-04-05 15:10:16 - Job Run ACCEPTED\n",
25+
"2023-04-05 15:10:23 - Job Run ACCEPTED, Infrastructure provisioning.\n",
26+
"2023-04-05 15:11:12 - Job Run ACCEPTED, Infrastructure provisioned.\n",
27+
"2023-04-05 15:11:45 - Job Run ACCEPTED, Job run bootstrap starting.\n",
28+
"2023-04-05 15:13:28 - Job Run ACCEPTED, Job run bootstrap complete. Artifact execution starting.\n",
29+
"2023-04-05 15:13:32 - Job Run IN_PROGRESS, Job run artifact execution in progress.\n",
30+
"2023-04-05 15:13:31 - Hello world!\n",
31+
"2023-04-05 15:13:34 - Job Done.\n",
32+
"2023-04-05 15:13:34 - current path: /home/datascience/decompressed_artifact\n",
33+
"2023-04-05 15:13:59 - Job Run SUCCEEDED, Job run artifact execution succeeded. Infrastructure de-provisioning.\n"
34+
]
35+
}
36+
],
37+
"source": [
38+
"from run import run_and_monitor\n",
39+
"\n",
40+
"# notice that you have to specify the location of your file.\n",
41+
"run_and_monitor(\"hello_world_job.py\")"
42+
]
43+
}
44+
],
45+
"metadata": {
46+
"kernelspec": {
47+
"display_name": "ads_280_clone",
48+
"language": "python",
49+
"name": "python3"
50+
},
51+
"language_info": {
52+
"codemirror_mode": {
53+
"name": "ipython",
54+
"version": 3
55+
},
56+
"file_extension": ".py",
57+
"mimetype": "text/x-python",
58+
"name": "python",
59+
"nbconvert_exporter": "python",
60+
"pygments_lexer": "ipython3",
61+
"version": "3.8.16"
62+
},
63+
"orig_nbformat": 4
64+
},
65+
"nbformat": 4,
66+
"nbformat_minor": 2
67+
}

jobs/java/pom.xml

+7-6
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
<name>Oracle Cloud Infrastructure Data Science Service Jobs Samples</name>
1313
<description>Samples</description>
1414
<url>https://github.com/oracle/oci-data-science-ai-samples</url>
15+
1516
<developers>
1617
<developer>
1718
<id>oracle</id>
@@ -34,7 +35,7 @@
3435
<jackson.version>2.13.1</jackson.version>
3536
<jackson.databind.version>2.13.4.2</jackson.databind.version>
3637
<apache-httpcomponents.httpclient.version>4.5.13</apache-httpcomponents.httpclient.version>
37-
<oci-sdk.version>3.0.1</oci-sdk.version>
38+
<oci-sdk.version>3.17.0</oci-sdk.version>
3839
</properties>
3940

4041
<dependencies>
@@ -44,11 +45,6 @@
4445
<version>${slf4j.version}</version>
4546
<scope>runtime</scope>
4647
</dependency>
47-
<dependency>
48-
<groupId>com.oracle.oci.sdk</groupId>
49-
<artifactId>oci-java-sdk-common-httpclient-jersey</artifactId>
50-
<version>${oci-sdk.version}</version>
51-
</dependency>
5248
<dependency>
5349
<groupId>com.fasterxml.jackson.core</groupId>
5450
<artifactId>jackson-annotations</artifactId>
@@ -85,6 +81,11 @@
8581
<version>${apache-httpcomponents.httpclient.version}</version>
8682
</dependency>
8783
<!-- OCI Java SDK -->
84+
<dependency>
85+
<groupId>com.oracle.oci.sdk</groupId>
86+
<artifactId>oci-java-sdk-common-httpclient-jersey</artifactId>
87+
<version>${oci-sdk.version}</version>
88+
</dependency>
8889
<dependency>
8990
<groupId>com.oracle.oci.sdk</groupId>
9091
<artifactId>oci-java-sdk-core</artifactId>

jobs/java/src/main/java/MLJobs.java

+41
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@
66
import com.oracle.bmc.datascience.model.*;
77
import com.oracle.bmc.datascience.requests.*;
88
import com.oracle.bmc.datascience.responses.*;
9+
import com.oracle.bmc.limits.LimitsClient;
10+
import com.oracle.bmc.limits.model.LimitDefinitionSummary;
11+
import com.oracle.bmc.limits.model.ServiceSummary;
12+
import com.oracle.bmc.limits.requests.GetResourceAvailabilityRequest;
13+
import com.oracle.bmc.limits.requests.ListLimitDefinitionsRequest;
14+
import com.oracle.bmc.limits.requests.ListServicesRequest;
15+
import com.oracle.bmc.limits.responses.GetResourceAvailabilityResponse;
16+
import com.oracle.bmc.limits.responses.ListLimitDefinitionsResponse;
17+
import com.oracle.bmc.limits.responses.ListServicesResponse;
918

1019
import java.io.*;
1120
import java.nio.file.StandardCopyOption;
@@ -24,6 +33,7 @@ public class MLJobs {
2433
String LOG_GROUP_UUID = "";
2534

2635
DataScienceClient clientDataScience = null;
36+
LimitsClient limitsClient = null;
2737

2838
MLJobs(String configLocation, String configProfile, String compartmentOCID, String projectOCID, String subnetOCID, String logGroupOCID) throws IOException {
2939
CONFIG_LOCATION = configLocation;
@@ -43,6 +53,27 @@ public class MLJobs {
4353
clientDataScience.setRegion(Region.US_ASHBURN_1);
4454
*/
4555
clientDataScience = DataScienceClient.builder().region(Region.US_ASHBURN_1).build(provider);
56+
limitsClient = LimitsClient.builder().region(Region.US_ASHBURN_1).build(provider);
57+
}
58+
59+
public void getLimitsDefinitions() {
60+
ListLimitDefinitionsResponse listLimitDefinitionsResponse= limitsClient.listLimitDefinitions(ListLimitDefinitionsRequest.builder().compartmentId("ocid1.tenancy.oc1..aaaaaaaa25c5a2zpfki3wo4ofza5l72aehvwkjbuavpnzqtmr4nigdgzi57a").build());
61+
List<LimitDefinitionSummary> l = listLimitDefinitionsResponse.getItems();
62+
for (LimitDefinitionSummary summary : l) {
63+
System.out.println("Name: " + summary.getName() + " - Service Name: " + summary.getServiceName());
64+
}
65+
}
66+
public void getLimits() {
67+
// compartmentId is the tenancy id
68+
GetResourceAvailabilityResponse r = limitsClient.getResourceAvailability(GetResourceAvailabilityRequest
69+
.builder()
70+
.compartmentId("ocid1.tenancy.oc1..aaaaaaaa")
71+
.serviceName("data-science")
72+
.limitName("ds-gpu-a10-count")
73+
.build()
74+
);
75+
76+
System.out.println(r.getResourceAvailability().toString());
4677
}
4778

4879
public CreateProjectResponse createProject() {
@@ -95,6 +126,15 @@ public CreateJobResponse createJob(String jobName, String compartmentUuid,
95126
envVariables.put("CONDA_ENV_TYPE", "service");
96127
envVariables.put("CONDA_ENV_SLUG", "generalml_p38_cpu_v1");
97128

129+
// mounts storages - coming soon
130+
// List<JobStorageMountConfigurationDetails> jobStorageMountConfigurationDetails = new ArrayList<>();
131+
// jobStorageMountConfigurationDetails.add(
132+
// ObjectStorageMountConfigurationDetails
133+
// .builder()
134+
// .bucket("beta")
135+
// .namespace("bucket-namespace")
136+
// .destinationDirectoryName("beta").build());
137+
98138
CreateJobDetails jobRequestDetails = CreateJobDetails.builder()
99139
.displayName(jobName)
100140
.projectId(projectUuid)
@@ -104,6 +144,7 @@ public CreateJobResponse createJob(String jobName, String compartmentUuid,
104144
.builder()
105145
.environmentVariables(envVariables)
106146
.build())
147+
// .jobStorageMountConfigurationDetailsList(jobStorageMountConfigurationDetails)
107148
.jobInfrastructureConfigurationDetails(
108149
StandaloneJobInfrastructureConfigurationDetails
109150
.builder()

jobs/java/src/main/java/Test.java

+4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ public static void main(String[] args) throws IOException {
2727
System.out.println("* INIT");
2828
MLJobs client = new MLJobs(CONFIG_LOCATION,CONFIG_PROFILE,COMPARTMENT_OCID,PROJECT_OCID,SUBNET_OCID,LOG_GROUP_UUID);
2929

30+
// get limits example
31+
client.getLimits();
32+
client.getLimitsDefinitions();
33+
3034
// Create Job with Managed Egress
3135
System.out.println("* CREATE JOB - MANAGED EGRESS");
3236
CreateJobResponse jobManagedEgress = client.createJobWithManagedEgress("Java Job - Managed Egress", COMPARTMENT_OCID, PROJECT_OCID);

jobs/python/sdk/jobs.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -206,14 +206,18 @@ def list_job_shapes(self, compartment_id):
206206

207207
return self.dsc.list_job_shapes(compartment_id=compartment_id)
208208

209-
# List all avaialble fast launch shapes in given region
209+
# List all available fast launch shapes in given region
210210
def list_fast_job_shapes(self, compartment_id):
211211
logging.info("*** List Fast Job Shapes ...")
212212

213213
return self.dsc.list_fast_launch_job_configs(compartment_id=compartment_id)
214214

215215
def run_job(
216-
self, compartment_id, project_id, job_id, job_run_name="Job Run",
216+
self,
217+
compartment_id,
218+
project_id,
219+
job_id,
220+
job_run_name="Job Run",
217221
):
218222
logging.info("*** Run Job ...")
219223

0 commit comments

Comments
 (0)