Skip to content

Commit e3fafb9

Browse files
committed
Implement NetworkPolicy support
Signed-off-by: Pat O'Connor <[email protected]>
1 parent 240e3f4 commit e3fafb9

File tree

5 files changed

+119
-0
lines changed

5 files changed

+119
-0
lines changed

ray-operator/config/manager/manager.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,9 @@ spec:
8080
# environment variable is not set, requeue after the default value (300).
8181
# - name: RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV
8282
# value: "300"
83+
# Required for NetworkPolicy feature when operator is NOT deployed in 'ray-system' namespace
84+
# - name: POD_NAMESPACE
85+
# valueFrom:
86+
# fieldRef:
87+
# fieldPath: metadata.namespace
8388
terminationGracePeriodSeconds: 10

ray-operator/config/rbac/role.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,19 @@ rules:
109109
- get
110110
- list
111111
- watch
112+
- apiGroups:
113+
- networking.k8s.io
114+
resources:
115+
- ingresses
116+
- networkpolicies
117+
verbs:
118+
- create
119+
- delete
120+
- get
121+
- list
122+
- patch
123+
- update
124+
- watch
112125
- apiGroups:
113126
- ray.io
114127
resources:

ray-operator/controllers/ray/raycluster_controller.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ type RayClusterReconcilerOptions struct {
100100
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update
101101
// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingressclasses,verbs=get;list;watch
102102
// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update;delete;patch
103+
// +kubebuilder:rbac:groups=networking.k8s.io,resources=networkpolicies,verbs=get;list;watch;create;update;delete;patch
103104
// +kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=get;list;watch;create;update;patch;delete
104105
// +kubebuilder:rbac:groups=extensions,resources=ingresses,verbs=get;list;watch;create;update;delete;patch
105106
// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;delete
@@ -298,6 +299,7 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, instance
298299
r.reconcileHeadlessService,
299300
r.reconcileServeService,
300301
r.reconcilePods,
302+
r.reconcileNetworkPolicies,
301303
}
302304

303305
for _, fn := range reconcileFuncs {
@@ -782,6 +784,97 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv
782784
return nil
783785
}
784786

787+
// reconcileNetworkPolicies creates a default deny NetworkPolicy for the RayCluster.
788+
func (r *RayClusterReconciler) reconcileNetworkPolicies(ctx context.Context, instance *rayv1.RayCluster) error {
789+
logger := ctrl.LoggerFrom(ctx)
790+
791+
// Feature gate check
792+
if !features.Enabled(features.RayClusterNetworkPolicy) {
793+
logger.V(1).Info("NetworkPolicy feature disabled, skipping reconciliation")
794+
return nil
795+
}
796+
797+
logger.Info("Reconciling Network Policies")
798+
799+
networkPolicy := createDefaultDenyPolicy(instance)
800+
801+
if err := ctrl.SetControllerReference(instance, networkPolicy, r.Scheme); err != nil {
802+
return err
803+
}
804+
805+
if err := r.Create(ctx, networkPolicy); err != nil {
806+
if errors.IsAlreadyExists(err) {
807+
logger.Info("NetworkPolicy already exists, no need to create")
808+
return nil
809+
}
810+
r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToCreateNetworkPolicy), "Failed to apply Head NetworkPolicy %s/%s, %v", networkPolicy.Namespace, networkPolicy.Name, err)
811+
return err
812+
}
813+
814+
logger.Info("Successfully created NetworkPolicy", "name", networkPolicy.Name)
815+
r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.CreatedNetworkPolicy),
816+
"Created NetworkPolicy %s/%s", networkPolicy.Namespace, networkPolicy.Name)
817+
818+
return nil
819+
}
820+
821+
// createDefaultDenyPolicy creates a default deny NetworkPolicy for the RayCluster.
822+
func createDefaultDenyPolicy(instance *rayv1.RayCluster) *networkingv1.NetworkPolicy {
823+
operatorNamespace := os.Getenv("POD_NAMESPACE")
824+
if operatorNamespace == "" {
825+
operatorNamespace = "ray-system" // fallback
826+
}
827+
828+
labels := map[string]string{
829+
utils.RayClusterLabelKey: instance.Name,
830+
utils.KubernetesApplicationNameLabelKey: utils.ApplicationName,
831+
utils.KubernetesCreatedByLabelKey: utils.ComponentName,
832+
}
833+
834+
return &networkingv1.NetworkPolicy{
835+
ObjectMeta: metav1.ObjectMeta{
836+
Name: fmt.Sprintf("%s-default-deny", instance.Name),
837+
Namespace: instance.Namespace,
838+
Labels: labels,
839+
},
840+
Spec: networkingv1.NetworkPolicySpec{
841+
PodSelector: metav1.LabelSelector{
842+
MatchLabels: map[string]string{
843+
utils.RayClusterLabelKey: instance.Name,
844+
},
845+
},
846+
PolicyTypes: []networkingv1.PolicyType{networkingv1.PolicyTypeIngress},
847+
Ingress: []networkingv1.NetworkPolicyIngressRule{
848+
// Allow traffic from within the same cluster
849+
{
850+
From: []networkingv1.NetworkPolicyPeer{
851+
{
852+
PodSelector: &metav1.LabelSelector{
853+
MatchLabels: map[string]string{
854+
utils.RayClusterLabelKey: instance.Name,
855+
},
856+
},
857+
},
858+
// Allow KubeRay operator communication
859+
{
860+
PodSelector: &metav1.LabelSelector{
861+
MatchLabels: map[string]string{
862+
utils.KubernetesApplicationNameLabelKey: utils.ApplicationName,
863+
},
864+
},
865+
NamespaceSelector: &metav1.LabelSelector{
866+
MatchLabels: map[string]string{
867+
"kubernetes.io/metadata.name": operatorNamespace,
868+
},
869+
},
870+
},
871+
},
872+
},
873+
},
874+
},
875+
}
876+
}
877+
785878
// shouldDeletePod returns whether the Pod should be deleted and the reason
786879
//
787880
// @param pod: The Pod to be checked.

ray-operator/controllers/ray/utils/constant.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,10 @@ const (
315315
FailedToUpdateHeadPodServeLabel K8sEventType = "FailedToUpdateHeadPodServeLabel"
316316
FailedToUpdateServeApplications K8sEventType = "FailedToUpdateServeApplications"
317317

318+
// NetworkPolicy event list
319+
CreatedNetworkPolicy K8sEventType = "CreatedNetworkPolicy"
320+
FailedToCreateNetworkPolicy K8sEventType = "FailedToCreateNetworkPolicy"
321+
318322
// Generic Pod event list
319323
DeletedPod K8sEventType = "DeletedPod"
320324
FailedToDeletePod K8sEventType = "FailedToDeletePod"

ray-operator/pkg/features/features.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ const (
2424
//
2525
// Enables new deletion policy API in RayJob
2626
RayJobDeletionPolicy featuregate.Feature = "RayJobDeletionPolicy"
27+
28+
// Might be overkill to have a feature gate for this but for the sake of argument...
29+
RayClusterNetworkPolicy featuregate.Feature = "RayClusterNetworkPolicy"
2730
)
2831

2932
func init() {
@@ -33,6 +36,7 @@ func init() {
3336
var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
3437
RayClusterStatusConditions: {Default: true, PreRelease: featuregate.Beta},
3538
RayJobDeletionPolicy: {Default: false, PreRelease: featuregate.Alpha},
39+
RayClusterNetworkPolicy: {Default: false, PreRelease: featuregate.Alpha},
3640
}
3741

3842
// SetFeatureGateDuringTest is a helper method to override feature gates in tests.

0 commit comments

Comments
 (0)