diff --git a/ray-operator/config/samples/ray-cluster-label-selector.yaml b/ray-operator/config/samples/ray-cluster-label-selector.yaml index a0f0b2a363c..e899a3728c1 100644 --- a/ray-operator/config/samples/ray-cluster-label-selector.yaml +++ b/ray-operator/config/samples/ray-cluster-label-selector.yaml @@ -38,11 +38,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: ray-example-configmap @@ -74,11 +74,11 @@ spec: image: rayproject/ray:nightly resources: limits: - cpu: 2 - memory: 4Gi + cpu: "2" + memory: "4Gi" requests: - cpu: 2 - memory: 4Gi + cpu: "2" + memory: "4Gi" nodeSelector: cloud.google.com/machine-family: "N4" - replicas: 0 @@ -98,11 +98,11 @@ spec: image: rayproject/ray:nightly-gpu resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" nodeSelector: cloud.google.com/gke-spot: "true" cloud.google.com/gke-accelerator: "nvidia-tesla-a100" @@ -121,11 +121,11 @@ spec: image: rayproject/ray:nightly resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" nodeSelector: cloud.google.com/gke-spot: "true" --- diff --git a/ray-operator/config/samples/ray-cluster-resource-isolation-with-overrides.gke.yaml b/ray-operator/config/samples/ray-cluster-resource-isolation-with-overrides.gke.yaml index 75022b69340..d8041404efc 100644 --- a/ray-operator/config/samples/ray-cluster-resource-isolation-with-overrides.gke.yaml +++ b/ray-operator/config/samples/ray-cluster-resource-isolation-with-overrides.gke.yaml @@ -19,11 +19,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" requests: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" ports: - containerPort: 6379 name: gcs-server @@ -52,10 +52,10 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" requests: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" nodeSelector: node.gke.io/enable-writable-cgroups: "true" diff --git a/ray-operator/config/samples/ray-cluster-resource-isolation.gke.yaml b/ray-operator/config/samples/ray-cluster-resource-isolation.gke.yaml index ce4000e94f9..f64938c3242 100644 --- a/ray-operator/config/samples/ray-cluster-resource-isolation.gke.yaml +++ b/ray-operator/config/samples/ray-cluster-resource-isolation.gke.yaml @@ -17,11 +17,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" requests: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" ports: - containerPort: 6379 name: gcs-server @@ -48,10 +48,10 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" requests: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" nodeSelector: node.gke.io/enable-writable-cgroups: "true" diff --git a/ray-operator/config/samples/ray-cluster.auth-manual.yaml b/ray-operator/config/samples/ray-cluster.auth-manual.yaml index 3762dbf006a..09720f70020 100644 --- a/ray-operator/config/samples/ray-cluster.auth-manual.yaml +++ b/ray-operator/config/samples/ray-cluster.auth-manual.yaml @@ -23,11 +23,11 @@ spec: name: ray-cluster-with-auth # change this to match your Secret name resources: limits: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" requests: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" ports: - containerPort: 6379 name: gcs-server @@ -56,8 +56,8 @@ spec: name: ray-cluster-with-auth # change this to match your Secret name resources: limits: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" requests: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" diff --git a/ray-operator/config/samples/ray-cluster.auth.yaml b/ray-operator/config/samples/ray-cluster.auth.yaml index 4c1712a5b13..7a986412256 100644 --- a/ray-operator/config/samples/ray-cluster.auth.yaml +++ b/ray-operator/config/samples/ray-cluster.auth.yaml @@ -16,11 +16,11 @@ spec: image: rayproject/ray:2.53.0 resources: limits: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" requests: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" ports: - containerPort: 6379 name: gcs-server @@ -41,8 +41,8 @@ spec: image: rayproject/ray:2.53.0 resources: limits: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" requests: - cpu: 4 - memory: 8Gi + cpu: "4" + memory: "8Gi" diff --git a/ray-operator/config/samples/ray-cluster.autoscaler-v2.yaml b/ray-operator/config/samples/ray-cluster.autoscaler-v2.yaml index 7ee99e19d1c..c6f7a848f62 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler-v2.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler-v2.yaml @@ -51,11 +51,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: ray-example-configmap @@ -84,11 +84,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" --- apiVersion: v1 kind: ConfigMap diff --git a/ray-operator/config/samples/ray-cluster.autoscaler.yaml b/ray-operator/config/samples/ray-cluster.autoscaler.yaml index c48d5a018b2..8afe868312e 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler.yaml @@ -72,11 +72,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: ray-example-configmap @@ -114,11 +114,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" --- apiVersion: v1 kind: ConfigMap diff --git a/ray-operator/config/samples/ray-cluster.complete.yaml b/ray-operator/config/samples/ray-cluster.complete.yaml index d32d84e9071..d66f7dcbd3b 100644 --- a/ray-operator/config/samples/ray-cluster.complete.yaml +++ b/ray-operator/config/samples/ray-cluster.complete.yaml @@ -46,15 +46,15 @@ spec: # entire Kubernetes node on which it is scheduled. resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: # For production use-cases, we recommend specifying integer CPU requests and limits. # We also recommend setting requests equal to limits for both CPU and memory. # For this example, we use a 500m CPU request to accomodate resource-constrained local # Kubernetes testing environments such as KinD and minikube. - cpu: 500m - memory: 2Gi + cpu: "500m" + memory: "2Gi" volumes: - name: ray-logs emptyDir: {} @@ -98,8 +98,8 @@ spec: # entire Kubernetes node on which it is scheduled. resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" # For production use-cases, we recommend specifying integer CPU requests and limits. # We also recommend setting requests equal to limits for both CPU and memory. # For this example, we use a 500m CPU request to accomodate resource-constrained local @@ -109,9 +109,9 @@ spec: # We also recommend setting requests equal to limits for both CPU and memory. # For this example, we use a 500m CPU request to accomodate resource-constrained local # Kubernetes testing environments such as KinD and minikube. - cpu: 500m + cpu: "500m" # For production use-cases, we recommend allocating at least 8Gb memory for each Ray container. - memory: 1Gi + memory: "1Gi" # use volumes # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ volumes: diff --git a/ray-operator/config/samples/ray-cluster.custom-head-service.yaml b/ray-operator/config/samples/ray-cluster.custom-head-service.yaml index 895cad459a0..c240c674299 100644 --- a/ray-operator/config/samples/ray-cluster.custom-head-service.yaml +++ b/ray-operator/config/samples/ray-cluster.custom-head-service.yaml @@ -35,11 +35,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 500m - memory: 2Gi + cpu: "500m" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server diff --git a/ray-operator/config/samples/ray-cluster.deprecate-gcs-ft.yaml b/ray-operator/config/samples/ray-cluster.deprecate-gcs-ft.yaml index 430f16f8a6a..6e09c7d7a0d 100644 --- a/ray-operator/config/samples/ray-cluster.deprecate-gcs-ft.yaml +++ b/ray-operator/config/samples/ray-cluster.deprecate-gcs-ft.yaml @@ -29,11 +29,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" env: # Ray will read the RAY_REDIS_ADDRESS environment variable to establish # a connection with the Redis server. In this instance, we use the "redis" @@ -92,11 +92,11 @@ spec: name: ray-logs resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" volumes: - name: ray-logs emptyDir: {} diff --git a/ray-operator/config/samples/ray-cluster.embed-grafana.yaml b/ray-operator/config/samples/ray-cluster.embed-grafana.yaml index e76e6d09e06..1ec883384d0 100644 --- a/ray-operator/config/samples/ray-cluster.embed-grafana.yaml +++ b/ray-operator/config/samples/ray-cluster.embed-grafana.yaml @@ -30,11 +30,11 @@ spec: name: ray-logs resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" env: - name: RAY_GRAFANA_IFRAME_HOST value: http://127.0.0.1:3000 @@ -63,11 +63,11 @@ spec: name: ray-logs resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" # use volumes # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ volumes: diff --git a/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml b/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml index 527d511c719..de8da2a92b3 100644 --- a/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml +++ b/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml @@ -31,11 +31,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: redis @@ -81,11 +81,11 @@ spec: name: ray-logs resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" volumes: - name: ray-logs emptyDir: {} diff --git a/ray-operator/config/samples/ray-cluster.external-redis.yaml b/ray-operator/config/samples/ray-cluster.external-redis.yaml index 890ff089361..fe029815653 100644 --- a/ray-operator/config/samples/ray-cluster.external-redis.yaml +++ b/ray-operator/config/samples/ray-cluster.external-redis.yaml @@ -31,11 +31,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: redis @@ -81,11 +81,11 @@ spec: name: ray-logs resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" volumes: - name: ray-logs emptyDir: {} diff --git a/ray-operator/config/samples/ray-cluster.fluentbit.yaml b/ray-operator/config/samples/ray-cluster.fluentbit.yaml index 160709bdc9f..bb238987306 100644 --- a/ray-operator/config/samples/ray-cluster.fluentbit.yaml +++ b/ray-operator/config/samples/ray-cluster.fluentbit.yaml @@ -16,11 +16,11 @@ spec: # Use larger Ray containers in production! resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" # Share logs with Fluent Bit volumeMounts: - mountPath: /tmp/ray @@ -37,11 +37,11 @@ spec: # These resource requests for Fluent Bit should be sufficient in production. resources: requests: - cpu: 100m - memory: 128Mi + cpu: "100m" + memory: "128Mi" limits: - cpu: 100m - memory: 128Mi + cpu: "100m" + memory: "128Mi" volumeMounts: - mountPath: /tmp/ray name: ray-logs diff --git a/ray-operator/config/samples/ray-cluster.gke-bucket.yaml b/ray-operator/config/samples/ray-cluster.gke-bucket.yaml index 2930ef0ce22..393573a1ce4 100644 --- a/ray-operator/config/samples/ray-cluster.gke-bucket.yaml +++ b/ray-operator/config/samples/ray-cluster.gke-bucket.yaml @@ -16,11 +16,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server diff --git a/ray-operator/config/samples/ray-cluster.head-command.yaml b/ray-operator/config/samples/ray-cluster.head-command.yaml index c097cc2f9a5..0ca712e54ce 100644 --- a/ray-operator/config/samples/ray-cluster.head-command.yaml +++ b/ray-operator/config/samples/ray-cluster.head-command.yaml @@ -20,11 +20,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server diff --git a/ray-operator/config/samples/ray-cluster.kai-gpu-sharing.yaml b/ray-operator/config/samples/ray-cluster.kai-gpu-sharing.yaml index dc10e9496ae..864b6ac976e 100644 --- a/ray-operator/config/samples/ray-cluster.kai-gpu-sharing.yaml +++ b/ray-operator/config/samples/ray-cluster.kai-gpu-sharing.yaml @@ -23,17 +23,17 @@ spec: # overQuotaWeight: How surplus resources are shared among queues # Note: Using -1 (unlimited) for demo purposes cpu: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" gpu: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" memory: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" --- # Child queue: Represents a team within the department-1 apiVersion: scheduling.run.ai/v2 @@ -77,11 +77,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" # ---- Two workers share one GPU (0.5 each) ---- workerGroupSpecs: - groupName: shared-gpu @@ -97,8 +97,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-cluster.kai-scheduler.yaml b/ray-operator/config/samples/ray-cluster.kai-scheduler.yaml index a5ffdfdb031..2b62226bfae 100644 --- a/ray-operator/config/samples/ray-cluster.kai-scheduler.yaml +++ b/ray-operator/config/samples/ray-cluster.kai-scheduler.yaml @@ -19,17 +19,17 @@ spec: # overQuotaWeight: How surplus resources are shared among queues # Note: Using -1 (unlimited) for demo purposes cpu: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" gpu: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" memory: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" --- # Child queue: Represents a team within the department-1 apiVersion: scheduling.run.ai/v2 @@ -45,17 +45,17 @@ spec: # overQuotaWeight: How surplus resources are shared among queues # Note: Using -1 (unlimited) for demo purposes cpu: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" gpu: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" memory: - quota: -1 - limit: -1 - overQuotaWeight: 1 + quota: "-1" + limit: "-1" + overQuotaWeight: "1" --- # RayCluster with KAI Scheduler apiVersion: ray.io/v1 @@ -73,11 +73,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: - groupName: worker replicas: 2 @@ -89,8 +89,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-cluster.overwrite-command.yaml b/ray-operator/config/samples/ray-cluster.overwrite-command.yaml index cca59e6ef2d..ebc6b74419e 100644 --- a/ray-operator/config/samples/ray-cluster.overwrite-command.yaml +++ b/ray-operator/config/samples/ray-cluster.overwrite-command.yaml @@ -19,11 +19,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server @@ -53,11 +53,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" # See the comments in headGroupSpec. command: ["/bin/bash", "-c", "--"] args: ["ulimit -n 65536; echo worker; $KUBERAY_GEN_RAY_START_CMD"] diff --git a/ray-operator/config/samples/ray-cluster.persistent-redis.yaml b/ray-operator/config/samples/ray-cluster.persistent-redis.yaml index 2812f06c837..57d13d76fc7 100644 --- a/ray-operator/config/samples/ray-cluster.persistent-redis.yaml +++ b/ray-operator/config/samples/ray-cluster.persistent-redis.yaml @@ -31,11 +31,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: redis @@ -70,11 +70,11 @@ spec: name: ray-logs resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" volumes: - name: ray-logs emptyDir: {} @@ -134,7 +134,7 @@ spec: - ReadWriteOnce resources: requests: - storage: 8Gi + storage: "8Gi" --- apiVersion: apps/v1 kind: StatefulSet diff --git a/ray-operator/config/samples/ray-cluster.py-spy.yaml b/ray-operator/config/samples/ray-cluster.py-spy.yaml index e1b56031b1a..685bdbbfb48 100644 --- a/ray-operator/config/samples/ray-cluster.py-spy.yaml +++ b/ray-operator/config/samples/ray-cluster.py-spy.yaml @@ -15,11 +15,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" # `py-spy` is a sampling profiler that requires `SYS_PTRACE` to read process memory effectively. # Once enabled, you can profile Ray worker processes through Ray Dashboard. # For more details, refer to: diff --git a/ray-operator/config/samples/ray-cluster.sample.yaml b/ray-operator/config/samples/ray-cluster.sample.yaml index 562ada295e0..0dd84ce7b75 100644 --- a/ray-operator/config/samples/ray-cluster.sample.yaml +++ b/ray-operator/config/samples/ray-cluster.sample.yaml @@ -33,11 +33,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server @@ -61,8 +61,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-cluster.scheduler-plugins.yaml b/ray-operator/config/samples/ray-cluster.scheduler-plugins.yaml index e7a90158fb3..a74c3cf3a5e 100644 --- a/ray-operator/config/samples/ray-cluster.scheduler-plugins.yaml +++ b/ray-operator/config/samples/ray-cluster.scheduler-plugins.yaml @@ -15,11 +15,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: - groupName: worker rayStartParams: {} @@ -33,8 +33,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-cluster.separate-ingress.yaml b/ray-operator/config/samples/ray-cluster.separate-ingress.yaml index 85f74ce17f4..89e49951b88 100644 --- a/ray-operator/config/samples/ray-cluster.separate-ingress.yaml +++ b/ray-operator/config/samples/ray-cluster.separate-ingress.yaml @@ -22,11 +22,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server diff --git a/ray-operator/config/samples/ray-cluster.tls.yaml b/ray-operator/config/samples/ray-cluster.tls.yaml index f1772be2516..2367ab5b69f 100644 --- a/ray-operator/config/samples/ray-cluster.tls.yaml +++ b/ray-operator/config/samples/ray-cluster.tls.yaml @@ -86,11 +86,11 @@ spec: name: ray-tls resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" env: # Environment variables for Ray TLS authentication. # See https://docs.ray.io/en/latest/ray-core/configure.html#tls-authentication for more details. @@ -165,11 +165,11 @@ spec: name: ray-tls resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" env: # Environment variables for Ray TLS authentication. # See https://docs.ray.io/en/latest/ray-core/configure.html#tls-authentication for more details. diff --git a/ray-operator/config/samples/ray-cluster.tpu-v4-multihost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v4-multihost.yaml index de612dcc75f..0eead88dbae 100644 --- a/ray-operator/config/samples/ray-cluster.tpu-v4-multihost.yaml +++ b/ray-operator/config/samples/ray-cluster.tpu-v4-multihost.yaml @@ -18,12 +18,12 @@ spec: resources: limits: cpu: "8" - ephemeral-storage: 20Gi - memory: 40G + ephemeral-storage: "20Gi" + memory: "40G" requests: cpu: "8" - ephemeral-storage: 10Gi - memory: 40G + ephemeral-storage: "10Gi" + memory: "40G" env: - name: RAY_memory_monitor_refresh_ms value: "0" @@ -62,14 +62,14 @@ spec: resources: limits: cpu: "1" - ephemeral-storage: 20Gi + ephemeral-storage: "20Gi" google.com/tpu: "4" - memory: 40G + memory: "40G" requests: cpu: "1" - ephemeral-storage: 10Gi + ephemeral-storage: "10Gi" google.com/tpu: "4" - memory: 40G + memory: "40G" nodeSelector: cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice cloud.google.com/gke-tpu-topology: 2x2x2 diff --git a/ray-operator/config/samples/ray-cluster.tpu-v4-singlehost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v4-singlehost.yaml index 0936e98435d..00cc3a53c78 100644 --- a/ray-operator/config/samples/ray-cluster.tpu-v4-singlehost.yaml +++ b/ray-operator/config/samples/ray-cluster.tpu-v4-singlehost.yaml @@ -18,12 +18,12 @@ spec: resources: limits: cpu: "8" - ephemeral-storage: 20Gi - memory: 40G + ephemeral-storage: "20Gi" + memory: "40G" requests: cpu: "8" - ephemeral-storage: 10Gi - memory: 40G + ephemeral-storage: "10Gi" + memory: "40G" env: - name: RAY_memory_monitor_refresh_ms value: "0" @@ -58,14 +58,14 @@ spec: resources: limits: cpu: "1" - ephemeral-storage: 20Gi + ephemeral-storage: "20Gi" google.com/tpu: "4" - memory: 40G + memory: "40G" requests: cpu: "1" - ephemeral-storage: 10Gi + ephemeral-storage: "10Gi" google.com/tpu: "4" - memory: 40G + memory: "40G" nodeSelector: cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice cloud.google.com/gke-tpu-topology: 2x2x1 diff --git a/ray-operator/config/samples/ray-cluster.tpu-v6e-16-multihost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v6e-16-multihost.yaml index bb47aba9cce..f924c0ccf97 100644 --- a/ray-operator/config/samples/ray-cluster.tpu-v6e-16-multihost.yaml +++ b/ray-operator/config/samples/ray-cluster.tpu-v6e-16-multihost.yaml @@ -15,10 +15,10 @@ spec: resources: limits: cpu: "8" - memory: 40G + memory: "40G" requests: cpu: "8" - memory: 40G + memory: "40G" ports: - containerPort: 6379 name: gcs @@ -47,11 +47,11 @@ spec: limits: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" requests: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" env: - name: NODE_IP valueFrom: diff --git a/ray-operator/config/samples/ray-cluster.tpu-v6e-256-multihost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v6e-256-multihost.yaml index 60321bcee27..eb31a402856 100644 --- a/ray-operator/config/samples/ray-cluster.tpu-v6e-256-multihost.yaml +++ b/ray-operator/config/samples/ray-cluster.tpu-v6e-256-multihost.yaml @@ -15,10 +15,10 @@ spec: resources: limits: cpu: "8" - memory: 40G + memory: "40G" requests: cpu: "8" - memory: 40G + memory: "40G" ports: - containerPort: 6379 name: gcs @@ -47,11 +47,11 @@ spec: limits: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" requests: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" env: - name: NODE_IP valueFrom: diff --git a/ray-operator/config/samples/ray-cluster.tpu-v6e-singlehost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v6e-singlehost.yaml index d8eceb99841..a70baad808f 100644 --- a/ray-operator/config/samples/ray-cluster.tpu-v6e-singlehost.yaml +++ b/ray-operator/config/samples/ray-cluster.tpu-v6e-singlehost.yaml @@ -15,10 +15,10 @@ spec: resources: limits: cpu: "8" - memory: 40G + memory: "40G" requests: cpu: "8" - memory: 40G + memory: "40G" ports: - containerPort: 6379 name: gcs @@ -49,11 +49,11 @@ spec: limits: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" requests: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" nodeSelector: cloud.google.com/gke-tpu-accelerator: tpu-v6e-slice cloud.google.com/gke-tpu-topology: 2x2 diff --git a/ray-operator/config/samples/ray-cluster.uv.yaml b/ray-operator/config/samples/ray-cluster.uv.yaml index 46a27c7b3ae..8114b596718 100644 --- a/ray-operator/config/samples/ray-cluster.uv.yaml +++ b/ray-operator/config/samples/ray-cluster.uv.yaml @@ -16,11 +16,11 @@ spec: value: ray._private.runtime_env.uv_runtime_env_hook.hook resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server diff --git a/ray-operator/config/samples/ray-cluster.verl.yaml b/ray-operator/config/samples/ray-cluster.verl.yaml index 35df8c8a51c..caff1e48887 100644 --- a/ray-operator/config/samples/ray-cluster.verl.yaml +++ b/ray-operator/config/samples/ray-cluster.verl.yaml @@ -13,12 +13,12 @@ spec: image: hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.4-flashinfer0.2.2-cxx11abi0 resources: limits: - cpu: 48 - memory: 192G + cpu: "48" + memory: "192G" nvidia.com/gpu: "4" requests: - cpu: 36 - memory: 144G + cpu: "36" + memory: "144G" nvidia.com/gpu: "4" ports: - containerPort: 6379 diff --git a/ray-operator/config/samples/ray-cluster.volcano-scheduler-queue.yaml b/ray-operator/config/samples/ray-cluster.volcano-scheduler-queue.yaml index 79cf96f0fdd..3c440779b5e 100644 --- a/ray-operator/config/samples/ray-cluster.volcano-scheduler-queue.yaml +++ b/ray-operator/config/samples/ray-cluster.volcano-scheduler-queue.yaml @@ -5,8 +5,8 @@ metadata: spec: weight: 1 capability: - cpu: 4 - memory: 6Gi + cpu: "4" + memory: "6Gi" --- apiVersion: ray.io/v1 kind: RayCluster @@ -25,11 +25,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: - groupName: worker replicas: 2 @@ -43,8 +43,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-cluster.volcano-scheduler.yaml b/ray-operator/config/samples/ray-cluster.volcano-scheduler.yaml index 0010b5d41be..f9176f3ba8e 100644 --- a/ray-operator/config/samples/ray-cluster.volcano-scheduler.yaml +++ b/ray-operator/config/samples/ray-cluster.volcano-scheduler.yaml @@ -13,9 +13,9 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: [] diff --git a/ray-operator/config/samples/ray-cluster.yunikorn-scheduler.yaml b/ray-operator/config/samples/ray-cluster.yunikorn-scheduler.yaml index cc87f3bba6e..b51d25b6656 100644 --- a/ray-operator/config/samples/ray-cluster.yunikorn-scheduler.yaml +++ b/ray-operator/config/samples/ray-cluster.yunikorn-scheduler.yaml @@ -17,11 +17,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: - groupName: worker replicas: 2 @@ -35,8 +35,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-data-image-resize/ray-data-image-resize-gcsfusecsi-job.yaml b/ray-operator/config/samples/ray-data-image-resize/ray-data-image-resize-gcsfusecsi-job.yaml index aec5691969c..5c750c246a1 100644 --- a/ray-operator/config/samples/ray-data-image-resize/ray-data-image-resize-gcsfusecsi-job.yaml +++ b/ray-operator/config/samples/ray-data-image-resize/ray-data-image-resize-gcsfusecsi-job.yaml @@ -21,10 +21,10 @@ spec: template: metadata: annotations: - gke-gcsfuse/cpu-limit: '0' - gke-gcsfuse/ephemeral-storage-limit: '0' - gke-gcsfuse/memory-limit: '0' - gke-gcsfuse/volumes: 'true' + gke-gcsfuse/cpu-limit: "0" + gke-gcsfuse/ephemeral-storage-limit: "0" + gke-gcsfuse/memory-limit: "0" + gke-gcsfuse/volumes: "true" spec: containers: - image: rayproject/ray:2.52.0 @@ -38,11 +38,11 @@ spec: name: client resources: limits: - cpu: '1' - memory: '5Gi' + cpu: "1" + memory: "5Gi" requests: - cpu: '1' - memory: '4Gi' + cpu: "1" + memory: "4Gi" volumeMounts: - mountPath: /tmp/ray name: ray-logs @@ -84,8 +84,8 @@ spec: name: ray-worker resources: requests: - cpu: '1' - memory: 4Gi + cpu: "1" + memory: "4Gi" volumeMounts: - mountPath: /tmp/ray name: ray-logs diff --git a/ray-operator/config/samples/ray-job.custom-head-svc.yaml b/ray-operator/config/samples/ray-job.custom-head-svc.yaml index c4a2b67e4ef..86e5f8af246 100644 --- a/ray-operator/config/samples/ray-job.custom-head-svc.yaml +++ b/ray-operator/config/samples/ray-job.custom-head-svc.yaml @@ -45,11 +45,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample @@ -79,11 +79,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" ######################Ray code sample################################# # this sample is from https://docs.ray.io/en/latest/cluster/job-submission.html#quick-start-example # it is mounted into the container and executed to show the Ray job at work diff --git a/ray-operator/config/samples/ray-job.deletion-rules.yaml b/ray-operator/config/samples/ray-job.deletion-rules.yaml index d61e05239de..7f3512b7950 100644 --- a/ray-operator/config/samples/ray-job.deletion-rules.yaml +++ b/ray-operator/config/samples/ray-job.deletion-rules.yaml @@ -70,11 +70,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: - replicas: 1 minReplicas: 1 @@ -88,8 +88,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-job.interactive-mode.yaml b/ray-operator/config/samples/ray-job.interactive-mode.yaml index 927968089b7..6427ff39a2d 100644 --- a/ray-operator/config/samples/ray-job.interactive-mode.yaml +++ b/ray-operator/config/samples/ray-job.interactive-mode.yaml @@ -28,11 +28,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" rayVersion: 2.52.0 workerGroupSpecs: - groupName: default-group @@ -45,8 +45,8 @@ spec: name: ray-worker resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-job.kueue-toy-sample.yaml b/ray-operator/config/samples/ray-job.kueue-toy-sample.yaml index ca0d862df5f..d808fb2db9c 100644 --- a/ray-operator/config/samples/ray-job.kueue-toy-sample.yaml +++ b/ray-operator/config/samples/ray-job.kueue-toy-sample.yaml @@ -50,11 +50,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: - replicas: 1 minReplicas: 1 @@ -68,8 +68,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-job.light-weight-submitter.yaml b/ray-operator/config/samples/ray-job.light-weight-submitter.yaml index 8a1a15f406d..071e43fab9d 100644 --- a/ray-operator/config/samples/ray-job.light-weight-submitter.yaml +++ b/ray-operator/config/samples/ray-job.light-weight-submitter.yaml @@ -27,11 +27,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample @@ -64,11 +64,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" submitterPodTemplate: spec: restartPolicy: Never diff --git a/ray-operator/config/samples/ray-job.modin.yaml b/ray-operator/config/samples/ray-job.modin.yaml index 7fd2d9d34b7..fe4dc7dca74 100644 --- a/ray-operator/config/samples/ray-job.modin.yaml +++ b/ray-operator/config/samples/ray-job.modin.yaml @@ -25,11 +25,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample diff --git a/ray-operator/config/samples/ray-job.resources.yaml b/ray-operator/config/samples/ray-job.resources.yaml index 1be4aca72cf..e6c9c9822bb 100644 --- a/ray-operator/config/samples/ray-job.resources.yaml +++ b/ray-operator/config/samples/ray-job.resources.yaml @@ -44,11 +44,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample @@ -81,12 +81,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi - + cpu: "1" + memory: "1Gi" ######################Ray code sample################################# # this sample is from https://docs.ray.io/en/latest/cluster/job-submission.html#quick-start-example diff --git a/ray-operator/config/samples/ray-job.sample.yaml b/ray-operator/config/samples/ray-job.sample.yaml index 89380db25be..ed8564ce984 100644 --- a/ray-operator/config/samples/ray-job.sample.yaml +++ b/ray-operator/config/samples/ray-job.sample.yaml @@ -57,11 +57,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample diff --git a/ray-operator/config/samples/ray-job.shutdown.yaml b/ray-operator/config/samples/ray-job.shutdown.yaml index 7ed50b2d551..66b71a799f4 100644 --- a/ray-operator/config/samples/ray-job.shutdown.yaml +++ b/ray-operator/config/samples/ray-job.shutdown.yaml @@ -50,11 +50,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample @@ -87,12 +87,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi - + cpu: "1" + memory: "1Gi" ######################Ray code sample################################# # this sample is from https://docs.ray.io/en/latest/cluster/job-submission.html#quick-start-example # it is mounted into the container and executed to show the Ray job at work diff --git a/ray-operator/config/samples/ray-job.sidecar-mode.yaml b/ray-operator/config/samples/ray-job.sidecar-mode.yaml index ebec276f2e3..6240c4f666b 100644 --- a/ray-operator/config/samples/ray-job.sidecar-mode.yaml +++ b/ray-operator/config/samples/ray-job.sidecar-mode.yaml @@ -33,11 +33,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample @@ -61,11 +61,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" --- apiVersion: v1 diff --git a/ray-operator/config/samples/ray-job.tpu-v6e-16-multihost.yaml b/ray-operator/config/samples/ray-job.tpu-v6e-16-multihost.yaml index ce52b39a02a..8465754e6a7 100644 --- a/ray-operator/config/samples/ray-job.tpu-v6e-16-multihost.yaml +++ b/ray-operator/config/samples/ray-job.tpu-v6e-16-multihost.yaml @@ -30,10 +30,10 @@ spec: resources: limits: cpu: "8" - memory: 40G + memory: "40G" requests: cpu: "8" - memory: 40G + memory: "40G" workerGroupSpecs: - replicas: 1 minReplicas: 1 @@ -52,11 +52,11 @@ spec: limits: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" requests: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" env: - name: NODE_IP valueFrom: diff --git a/ray-operator/config/samples/ray-job.tpu-v6e-256-multihost.yaml b/ray-operator/config/samples/ray-job.tpu-v6e-256-multihost.yaml index 09e191cc7c4..10a9d3a978b 100644 --- a/ray-operator/config/samples/ray-job.tpu-v6e-256-multihost.yaml +++ b/ray-operator/config/samples/ray-job.tpu-v6e-256-multihost.yaml @@ -30,10 +30,10 @@ spec: resources: limits: cpu: "8" - memory: 40G + memory: "40G" requests: cpu: "8" - memory: 40G + memory: "40G" workerGroupSpecs: - replicas: 1 minReplicas: 1 @@ -51,11 +51,11 @@ spec: limits: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" requests: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" env: - name: JAX_PLATFORMS value: tpu,cpu diff --git a/ray-operator/config/samples/ray-job.tpu-v6e-singlehost.yaml b/ray-operator/config/samples/ray-job.tpu-v6e-singlehost.yaml index 13bfb47083a..36c0a9e4e03 100644 --- a/ray-operator/config/samples/ray-job.tpu-v6e-singlehost.yaml +++ b/ray-operator/config/samples/ray-job.tpu-v6e-singlehost.yaml @@ -28,10 +28,10 @@ spec: resources: limits: cpu: "8" - memory: 40G + memory: "40G" requests: cpu: "8" - memory: 40G + memory: "40G" workerGroupSpecs: - replicas: 1 minReplicas: 1 @@ -50,11 +50,11 @@ spec: limits: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" requests: cpu: "24" google.com/tpu: "4" - memory: 200G + memory: "200G" nodeSelector: cloud.google.com/gke-tpu-accelerator: tpu-v6e-slice cloud.google.com/gke-tpu-topology: 2x2 diff --git a/ray-operator/config/samples/ray-job.volcano-scheduler-queue.yaml b/ray-operator/config/samples/ray-job.volcano-scheduler-queue.yaml index 7b567f7e474..8ae18cf832d 100644 --- a/ray-operator/config/samples/ray-job.volcano-scheduler-queue.yaml +++ b/ray-operator/config/samples/ray-job.volcano-scheduler-queue.yaml @@ -5,8 +5,8 @@ metadata: spec: weight: 1 capability: - cpu: 4 - memory: 6Gi + cpu: "4" + memory: "6Gi" --- apiVersion: ray.io/v1 kind: RayJob @@ -40,11 +40,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample @@ -68,11 +68,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" --- apiVersion: v1 kind: ConfigMap diff --git a/ray-operator/config/samples/ray-job.yunikorn-scheduler.yaml b/ray-operator/config/samples/ray-job.yunikorn-scheduler.yaml index 1ae8f690fb1..165dd840a40 100644 --- a/ray-operator/config/samples/ray-job.yunikorn-scheduler.yaml +++ b/ray-operator/config/samples/ray-job.yunikorn-scheduler.yaml @@ -32,11 +32,11 @@ spec: name: client resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" volumeMounts: - mountPath: /home/ray/samples name: code-sample @@ -60,12 +60,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 1Gi + cpu: "1" + memory: "1Gi" requests: - cpu: 1 - memory: 1Gi - + cpu: "1" + memory: "1Gi" --- apiVersion: v1 kind: ConfigMap diff --git a/ray-operator/config/samples/ray-service.custom-serve-service.yaml b/ray-operator/config/samples/ray-service.custom-serve-service.yaml index 23428f2016e..fed824ba41d 100644 --- a/ray-operator/config/samples/ray-service.custom-serve-service.yaml +++ b/ray-operator/config/samples/ray-service.custom-serve-service.yaml @@ -65,11 +65,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 @@ -89,8 +89,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" requests: - cpu: 500m - memory: 2Gi + cpu: "500m" + memory: "2Gi" diff --git a/ray-operator/config/samples/ray-service.different-port.yaml b/ray-operator/config/samples/ray-service.different-port.yaml index 604bf7a521c..87dc42f0b27 100644 --- a/ray-operator/config/samples/ray-service.different-port.yaml +++ b/ray-operator/config/samples/ray-service.different-port.yaml @@ -55,11 +55,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server @@ -85,11 +85,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" requests: - cpu: 500m - memory: 2Gi + cpu: "500m" + memory: "2Gi" ports: - containerPort: 9000 name: serve diff --git a/ray-operator/config/samples/ray-service.high-availability.yaml b/ray-operator/config/samples/ray-service.high-availability.yaml index 8fb7914e559..5e0fde5bda1 100644 --- a/ray-operator/config/samples/ray-service.high-availability.yaml +++ b/ray-operator/config/samples/ray-service.high-availability.yaml @@ -64,11 +64,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 2 @@ -88,11 +88,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" --- kind: ConfigMap apiVersion: v1 diff --git a/ray-operator/config/samples/ray-service.incremental-upgrade.yaml b/ray-operator/config/samples/ray-service.incremental-upgrade.yaml index aee243d8335..ea50db61946 100644 --- a/ray-operator/config/samples/ray-service.incremental-upgrade.yaml +++ b/ray-operator/config/samples/ray-service.incremental-upgrade.yaml @@ -59,11 +59,11 @@ spec: image: rayproject/ray:2.51.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" ports: - containerPort: 6379 name: gcs-server @@ -89,8 +89,8 @@ spec: command: ["/bin/sh", "-c", "ray stop"] resources: requests: - cpu: 500m - memory: 1Gi + cpu: "500m" + memory: "1Gi" limits: - cpu: 1 - memory: 4Gi + cpu: "1" + memory: "4Gi" diff --git a/ray-operator/config/samples/ray-service.llm-serve.yaml b/ray-operator/config/samples/ray-service.llm-serve.yaml index 70fb4d9d013..0d873875858 100644 --- a/ray-operator/config/samples/ray-service.llm-serve.yaml +++ b/ray-operator/config/samples/ray-service.llm-serve.yaml @@ -53,11 +53,11 @@ spec: protocol: TCP resources: limits: - cpu: 2 - memory: 5Gi + cpu: "2" + memory: "5Gi" requests: - cpu: 2 - memory: 4Gi + cpu: "2" + memory: "4Gi" workerGroupSpecs: - replicas: 1 minReplicas: 1 @@ -79,12 +79,12 @@ spec: key: hf_token resources: limits: - cpu: 32 - memory: 32Gi + cpu: "32" + memory: "32Gi" nvidia.com/gpu: "4" requests: - cpu: 32 - memory: 32Gi + cpu: "32" + memory: "32Gi" nvidia.com/gpu: "4" --- diff --git a/ray-operator/config/samples/ray-service.mobilenet.yaml b/ray-operator/config/samples/ray-service.mobilenet.yaml index 1c1212d2447..35450839456 100644 --- a/ray-operator/config/samples/ray-service.mobilenet.yaml +++ b/ray-operator/config/samples/ray-service.mobilenet.yaml @@ -29,11 +29,11 @@ spec: image: rayproject/ray-ml:2.46.0.0e19ea-py39-cpu resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 4Gi + cpu: "1" + memory: "4Gi" workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 @@ -50,8 +50,8 @@ spec: image: rayproject/ray-ml:2.46.0.0e19ea-py39-cpu resources: limits: - cpu: 1 - memory: 4Gi + cpu: "1" + memory: "4Gi" requests: - cpu: 1 - memory: 4Gi + cpu: "1" + memory: "4Gi" diff --git a/ray-operator/config/samples/ray-service.no-ray-serve-replica.yaml b/ray-operator/config/samples/ray-service.no-ray-serve-replica.yaml index 5d71310481a..5200f3e03d0 100644 --- a/ray-operator/config/samples/ray-service.no-ray-serve-replica.yaml +++ b/ray-operator/config/samples/ray-service.no-ray-serve-replica.yaml @@ -28,11 +28,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 5Gi + cpu: "1" + memory: "5Gi" requests: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" workerGroupSpecs: - replicas: 2 minReplicas: 1 @@ -46,8 +46,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" requests: - cpu: 500m - memory: 2Gi + cpu: "500m" + memory: "2Gi" diff --git a/ray-operator/config/samples/ray-service.sample.yaml b/ray-operator/config/samples/ray-service.sample.yaml index 8e9a6266ec1..b45b26696ab 100644 --- a/ray-operator/config/samples/ray-service.sample.yaml +++ b/ray-operator/config/samples/ray-service.sample.yaml @@ -76,11 +76,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 2 - memory: 5Gi + cpu: "2" + memory: "5Gi" requests: - cpu: 2 - memory: 2Gi + cpu: "2" + memory: "2Gi" workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 @@ -100,8 +100,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" requests: - cpu: 500m - memory: 2Gi + cpu: "500m" + memory: "2Gi" diff --git a/ray-operator/config/samples/ray-service.stable-diffusion.yaml b/ray-operator/config/samples/ray-service.stable-diffusion.yaml index 0eadec85e17..ccd9804c47a 100644 --- a/ray-operator/config/samples/ray-service.stable-diffusion.yaml +++ b/ray-operator/config/samples/ray-service.stable-diffusion.yaml @@ -27,11 +27,11 @@ spec: name: ray-logs resources: limits: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" requests: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" volumes: - name: ray-logs emptyDir: {} @@ -50,13 +50,13 @@ spec: image: rayproject/ray-ml:2.46.0.0e19ea-py39-gpu resources: limits: - cpu: 4 - memory: 16Gi - nvidia.com/gpu: 1 + cpu: "4" + memory: "16Gi" + nvidia.com/gpu: "1" requests: - cpu: 3 - memory: 12Gi - nvidia.com/gpu: 1 + cpu: "3" + memory: "12Gi" + nvidia.com/gpu: "1" # Please add the following taints to the GPU node. tolerations: - key: "ray.io/node-type" diff --git a/ray-operator/config/samples/ray-service.text-ml.yaml b/ray-operator/config/samples/ray-service.text-ml.yaml index 6b8b7e6fa18..ba58150b8af 100644 --- a/ray-operator/config/samples/ray-service.text-ml.yaml +++ b/ray-operator/config/samples/ray-service.text-ml.yaml @@ -43,11 +43,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 2 - memory: 5Gi + cpu: "2" + memory: "5Gi" requests: - cpu: 2 - memory: 2Gi + cpu: "2" + memory: "2Gi" workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 @@ -64,8 +64,8 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 1 - memory: 2Gi + cpu: "1" + memory: "2Gi" requests: - cpu: 500m - memory: 2Gi + cpu: "500m" + memory: "2Gi" diff --git a/ray-operator/config/samples/ray-service.text-summarizer.yaml b/ray-operator/config/samples/ray-service.text-summarizer.yaml index 1be3df6a01f..22490b257f7 100644 --- a/ray-operator/config/samples/ray-service.text-summarizer.yaml +++ b/ray-operator/config/samples/ray-service.text-summarizer.yaml @@ -26,11 +26,11 @@ spec: name: ray-logs resources: limits: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" requests: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" volumes: - name: ray-logs emptyDir: {} @@ -49,13 +49,13 @@ spec: image: rayproject/ray-ml:2.46.0.0e19ea-py39-gpu resources: limits: - cpu: 4 - memory: 16Gi - nvidia.com/gpu: 1 + cpu: "4" + memory: "16Gi" + nvidia.com/gpu: "1" requests: - cpu: 3 - memory: 12Gi - nvidia.com/gpu: 1 + cpu: "3" + memory: "12Gi" + nvidia.com/gpu: "1" # Please add the following taints to the GPU node. tolerations: - key: "ray.io/node-type" diff --git a/ray-operator/config/samples/ray-service.tpu-single-host.yaml b/ray-operator/config/samples/ray-service.tpu-single-host.yaml index 4b5099bb573..f416d25f5e8 100644 --- a/ray-operator/config/samples/ray-service.tpu-single-host.yaml +++ b/ray-operator/config/samples/ray-service.tpu-single-host.yaml @@ -31,11 +31,11 @@ spec: image: rayproject/ray:2.52.0 resources: limits: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" requests: - cpu: 2 - memory: 8Gi + cpu: "2" + memory: "8Gi" workerGroupSpecs: - replicas: 1 minReplicas: 1 @@ -52,14 +52,14 @@ spec: limits: # ct4p-hightpu-4t (v4) TPUs have 240 vCPUs, adjust this value based on your resource needs cpu: "100" - ephemeral-storage: 20Gi + ephemeral-storage: "20Gi" google.com/tpu: "4" - memory: 200Gi + memory: "200Gi" requests: cpu: "100" - ephemeral-storage: 20Gi + ephemeral-storage: "20Gi" google.com/tpu: "4" - memory: 200Gi + memory: "200Gi" nodeSelector: # https://cloud.google.com/kubernetes-engine/docs/concepts/tpus cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice diff --git a/ray-operator/config/samples/vllm/ray-service.vllm-tpu-v6e-singlehost.yaml b/ray-operator/config/samples/vllm/ray-service.vllm-tpu-v6e-singlehost.yaml index fd9654d3892..9059f6e53a0 100644 --- a/ray-operator/config/samples/vllm/ray-service.vllm-tpu-v6e-singlehost.yaml +++ b/ray-operator/config/samples/vllm/ray-service.vllm-tpu-v6e-singlehost.yaml @@ -47,10 +47,10 @@ spec: resources: limits: cpu: "2" - memory: 8G + memory: "8G" requests: cpu: "2" - memory: 8G + memory: "8G" volumeMounts: - name: gcs-fuse-csi-ephemeral mountPath: /data @@ -96,13 +96,13 @@ spec: limits: cpu: "100" google.com/tpu: "8" - ephemeral-storage: 40G - memory: 200G + ephemeral-storage: "40G" + memory: "200G" requests: cpu: "100" google.com/tpu: "8" - ephemeral-storage: 40G - memory: 200G + ephemeral-storage: "40G" + memory: "200G" env: - name: JAX_PLATFORMS value: "tpu"