Skip to content

Commit e44dd8e

Browse files
authored
add functions for creating ray with oauth proxy in front of the dashboard (#298)
* add functions for creating ray with oauth proxy in front of the dashboard Signed-off-by: Kevin <[email protected]> * add unit test for OAuth create Signed-off-by: Kevin <[email protected]> * add tests for replace and generate sidecar Signed-off-by: Kevin <[email protected]> --------- Signed-off-by: Kevin <[email protected]>
1 parent 06a3a59 commit e44dd8e

File tree

8 files changed

+667
-127
lines changed

8 files changed

+667
-127
lines changed

src/codeflare_sdk/cluster/auth.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import urllib3
2626
from ..utils.kube_api_helpers import _kube_api_error_handling
2727

28+
from typing import Optional
29+
2830
global api_client
2931
api_client = None
3032
global config_path
@@ -188,7 +190,7 @@ def config_check() -> str:
188190
return config_path
189191

190192

191-
def api_config_handler() -> str:
193+
def api_config_handler() -> Optional[client.ApiClient]:
192194
"""
193195
This function is used to load the api client if the user has logged in
194196
"""

src/codeflare_sdk/cluster/cluster.py

+82-14
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,19 @@
2121
from time import sleep
2222
from typing import List, Optional, Tuple, Dict
2323

24+
import openshift as oc
25+
from kubernetes import config
2426
from ray.job_submission import JobSubmissionClient
27+
import urllib3
2528

2629
from .auth import config_check, api_config_handler
2730
from ..utils import pretty_print
2831
from ..utils.generate_yaml import generate_appwrapper
2932
from ..utils.kube_api_helpers import _kube_api_error_handling
33+
from ..utils.openshift_oauth import (
34+
create_openshift_oauth_objects,
35+
delete_openshift_oauth_objects,
36+
)
3037
from .config import ClusterConfiguration
3138
from .model import (
3239
AppWrapper,
@@ -40,6 +47,8 @@
4047
import os
4148
import requests
4249

50+
from kubernetes import config
51+
4352

4453
class Cluster:
4554
"""
@@ -61,6 +70,39 @@ def __init__(self, config: ClusterConfiguration):
6170
self.config = config
6271
self.app_wrapper_yaml = self.create_app_wrapper()
6372
self.app_wrapper_name = self.app_wrapper_yaml.split(".")[0]
73+
self._client = None
74+
75+
@property
76+
def _client_headers(self):
77+
k8_client = api_config_handler() or client.ApiClient()
78+
return {
79+
"Authorization": k8_client.configuration.get_api_key_with_prefix(
80+
"authorization"
81+
)
82+
}
83+
84+
@property
85+
def _client_verify_tls(self):
86+
return not self.config.openshift_oauth
87+
88+
@property
89+
def client(self):
90+
if self._client:
91+
return self._client
92+
if self.config.openshift_oauth:
93+
print(
94+
api_config_handler().configuration.get_api_key_with_prefix(
95+
"authorization"
96+
)
97+
)
98+
self._client = JobSubmissionClient(
99+
self.cluster_dashboard_uri(),
100+
headers=self._client_headers,
101+
verify=self._client_verify_tls,
102+
)
103+
else:
104+
self._client = JobSubmissionClient(self.cluster_dashboard_uri())
105+
return self._client
64106

65107
def evaluate_dispatch_priority(self):
66108
priority_class = self.config.dispatch_priority
@@ -147,6 +189,7 @@ def create_app_wrapper(self):
147189
image_pull_secrets=image_pull_secrets,
148190
dispatch_priority=dispatch_priority,
149191
priority_val=priority_val,
192+
openshift_oauth=self.config.openshift_oauth,
150193
)
151194

152195
# creates a new cluster with the provided or default spec
@@ -156,6 +199,11 @@ def up(self):
156199
the MCAD queue.
157200
"""
158201
namespace = self.config.namespace
202+
if self.config.openshift_oauth:
203+
create_openshift_oauth_objects(
204+
cluster_name=self.config.name, namespace=namespace
205+
)
206+
159207
try:
160208
config_check()
161209
api_instance = client.CustomObjectsApi(api_config_handler())
@@ -190,6 +238,11 @@ def down(self):
190238
except Exception as e: # pragma: no cover
191239
return _kube_api_error_handling(e)
192240

241+
if self.config.openshift_oauth:
242+
delete_openshift_oauth_objects(
243+
cluster_name=self.config.name, namespace=namespace
244+
)
245+
193246
def status(
194247
self, print_to_console: bool = True
195248
) -> Tuple[CodeFlareClusterStatus, bool]:
@@ -258,7 +311,16 @@ def status(
258311
return status, ready
259312

260313
def is_dashboard_ready(self) -> bool:
261-
response = requests.get(self.cluster_dashboard_uri(), timeout=5)
314+
try:
315+
response = requests.get(
316+
self.cluster_dashboard_uri(),
317+
headers=self._client_headers,
318+
timeout=5,
319+
verify=self._client_verify_tls,
320+
)
321+
except requests.exceptions.SSLError:
322+
# SSL exception occurs when oauth ingress has been created but cluster is not up
323+
return False
262324
if response.status_code == 200:
263325
return True
264326
else:
@@ -330,7 +392,13 @@ def cluster_dashboard_uri(self) -> str:
330392
return _kube_api_error_handling(e)
331393

332394
for route in routes["items"]:
333-
if route["metadata"]["name"] == f"ray-dashboard-{self.config.name}":
395+
if route["metadata"][
396+
"name"
397+
] == f"ray-dashboard-{self.config.name}" or route["metadata"][
398+
"name"
399+
].startswith(
400+
f"{self.config.name}-ingress"
401+
):
334402
protocol = "https" if route["spec"].get("tls") else "http"
335403
return f"{protocol}://{route['spec']['host']}"
336404
return "Dashboard route not available yet, have you run cluster.up()?"
@@ -339,30 +407,24 @@ def list_jobs(self) -> List:
339407
"""
340408
This method accesses the head ray node in your cluster and lists the running jobs.
341409
"""
342-
dashboard_route = self.cluster_dashboard_uri()
343-
client = JobSubmissionClient(dashboard_route)
344-
return client.list_jobs()
410+
return self.client.list_jobs()
345411

346412
def job_status(self, job_id: str) -> str:
347413
"""
348414
This method accesses the head ray node in your cluster and returns the job status for the provided job id.
349415
"""
350-
dashboard_route = self.cluster_dashboard_uri()
351-
client = JobSubmissionClient(dashboard_route)
352-
return client.get_job_status(job_id)
416+
return self.client.get_job_status(job_id)
353417

354418
def job_logs(self, job_id: str) -> str:
355419
"""
356420
This method accesses the head ray node in your cluster and returns the logs for the provided job id.
357421
"""
358-
dashboard_route = self.cluster_dashboard_uri()
359-
client = JobSubmissionClient(dashboard_route)
360-
return client.get_job_logs(job_id)
422+
return self.client.get_job_logs(job_id)
361423

362424
def torchx_config(
363425
self, working_dir: str = None, requirements: str = None
364426
) -> Dict[str, str]:
365-
dashboard_address = f"{self.cluster_dashboard_uri().lstrip('http://')}"
427+
dashboard_address = urllib3.util.parse_url(self.cluster_dashboard_uri()).host
366428
to_return = {
367429
"cluster_name": self.config.name,
368430
"dashboard_address": dashboard_address,
@@ -591,7 +653,7 @@ def _get_app_wrappers(
591653

592654

593655
def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
594-
if "status" in rc and "state" in rc["status"]:
656+
if "state" in rc["status"]:
595657
status = RayClusterStatus(rc["status"]["state"].lower())
596658
else:
597659
status = RayClusterStatus.UNKNOWN
@@ -606,7 +668,13 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
606668
)
607669
ray_route = None
608670
for route in routes["items"]:
609-
if route["metadata"]["name"] == f"ray-dashboard-{rc['metadata']['name']}":
671+
if route["metadata"][
672+
"name"
673+
] == f"ray-dashboard-{rc['metadata']['name']}" or route["metadata"][
674+
"name"
675+
].startswith(
676+
f"{rc['metadata']['name']}-ingress"
677+
):
610678
protocol = "https" if route["spec"].get("tls") else "http"
611679
ray_route = f"{protocol}://{route['spec']['host']}"
612680

src/codeflare_sdk/cluster/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,4 @@ class ClusterConfiguration:
5151
local_interactive: bool = False
5252
image_pull_secrets: list = field(default_factory=list)
5353
dispatch_priority: str = None
54+
openshift_oauth: bool = False # NOTE: to use the user must have permission to create a RoleBinding for system:auth-delegator

0 commit comments

Comments
 (0)