From df8f231907357e73bf9cd212344d2b9df09071f0 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Sun, 19 Oct 2025 19:59:52 +0200 Subject: [PATCH] Change the capacity struct from int to ptrOfInt Today, in CNCC we store the capacity values as integers: type capacity struct { IPv4 int `json:"ipv4,omitempty"` IPv6 int `json:"ipv6,omitempty"` IP int `json:"ip,omitempty"` } When capacity is full, CNCC sets the value to 0. Also, depending on the platform it also ignores setting fields it doesn't care about (example AWS doesn't use IP, gcp and azure don't use IPv4 and IPv6). However given we have omitempty set, this was omitting the zero value in the annotation. When OVN-Kubernetes reads this annotation it was then setting the capacity to unlimited: nodeEgressIPConfig := []nodeEgressIPConfiguration{ { Capacity: Capacity{ IP: UnlimitedNodeCapacity, IPv4: UnlimitedNodeCapacity, --> we set this to maxint32 IPv6: UnlimitedNodeCapacity, }, }, } which is causing all EgressIPs to be assigned to this node leading to: status: conditions: - lastTransitionTime: "2025-10-06T19:24:24Z" message: "Error processing cloud assignment request, err: PrivateIpAddressLimitExceeded: Number of private addresses will exceed limit.\n\tstatus code: 400, request id: 457f4332-e9c4-44c9-bfcf-deeb5e7e43ce" In this fix, what we really want is to remove omitempty so that the zero capacity gets reflected correctly, however doing so also means fields that are unset will also be zero which can lead to confusion. Basically we are not able to distinguish between unset field and 0 value fields. Hence we are changing the capacity struct to be pointer type values so that null/nil means unset and 0 means full capacity. We still keep the omitempty since we don't need to do anything with unset fields - there is no behaviour change there and OVN-Kubernetes will continue to treat that as unlimited capacity. Upgrades: CNCC upon reboot seems to call: func (n *NodeController) SyncHandler(key string) error { .... // Filter out cloudPrivateIPConfigs assigned to node (key) and write the entry // into same slice starting from index 0, finally chop off unwanted entries // when passing it into GetNodeEgressIPConfiguration. index := 0 for _, cloudPrivateIPConfig := range cloudPrivateIPConfigs { if isAssignedCloudPrivateIPConfigOnNode(cloudPrivateIPConfig, key) { cloudPrivateIPConfigs[index] = cloudPrivateIPConfig index++ } } nodeEgressIPConfigs, err := n.cloudProviderClient.GetNodeEgressIPConfiguration(node, cloudPrivateIPConfigs[:index]) if err != nil { return fmt.Errorf("error retrieving the private IP configuration for node: %s, err: %v", node.Name, err) } return n.SetNodeEgressIPConfigAnnotation(node, nodeEgressIPConfigs) } // SetCloudPrivateIPConfigAnnotationOnNode annotates the corev1.Node with the cloud subnet information and capacity func (n *NodeController) SetNodeEgressIPConfigAnnotation(node *corev1.Node, nodeEgressIPConfigs []*cloudprovider.NodeEgressIPConfiguration) error { annotation, err := n.generateAnnotation(nodeEgressIPConfigs) if err != nil { return err } klog.Infof("Setting annotation: '%s: %s' on node: %s", nodeEgressIPConfigAnnotationKey, annotation, node.Name) return retry.RetryOnConflict(retry.DefaultRetry, func() error { ctx, cancel := context.WithTimeout(n.ctx, controller.ClientTimeout) defer cancel() // See: updateCloudPrivateIPConfigStatus nodeLatest, err := n.kubeClient.CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{}) if err != nil { return err } existingAnnotations := nodeLatest.Annotations existingAnnotations[nodeEgressIPConfigAnnotationKey] = annotation nodeLatest.SetAnnotations(existingAnnotations) _, err = n.kubeClient.CoreV1().Nodes().Update(ctx, nodeLatest, metav1.UpdateOptions{}) return err }) } and we seem to be overwriting the annotation - so we should be good on upgrades in changing from older annotations to new annotations - where 0 valued fields will appear for full capacity nodes. Once that happens, OVN-Kubernetes should overrite the UnlimitedValue to value 0 tat indicates 0 capacity and we should enter: if eNode.egressIPConfig.Capacity.IP < util.UnlimitedNodeCapacity { if eNode.egressIPConfig.Capacity.IP-len(eNode.allocations) <= 0 { klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IP capacity, trying another node", eNode.name) continue } } if eNode.egressIPConfig.Capacity.IPv4 < util.UnlimitedNodeCapacity && utilnet.IsIPv4(eIP) { if eNode.egressIPConfig.Capacity.IPv4-getIPFamilyAllocationCount(eNode.allocations, false) <= 0 { klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv4 capacity, trying another node", eNode.name) continue } } if eNode.egressIPConfig.Capacity.IPv6 < util.UnlimitedNodeCapacity && utilnet.IsIPv6(eIP) { if eNode.egressIPConfig.Capacity.IPv6-getIPFamilyAllocationCount(eNode.allocations, true) <= 0 { klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv6 capacity, trying another node", eNode.name) continue } } these desired conditions correctly. Signed-off-by: Surya Seetharaman --- pkg/cloudprovider/aws.go | 6 ++++-- pkg/cloudprovider/azure.go | 8 +++++--- pkg/cloudprovider/cloudprovider.go | 13 +++++++------ pkg/cloudprovider/gcp.go | 4 +++- pkg/cloudprovider/openstack.go | 4 +++- pkg/cloudprovider/openstack_test.go | 17 +++++++++-------- 6 files changed, 31 insertions(+), 21 deletions(-) diff --git a/pkg/cloudprovider/aws.go b/pkg/cloudprovider/aws.go index e54e57c09..f2024cf2e 100644 --- a/pkg/cloudprovider/aws.go +++ b/pkg/cloudprovider/aws.go @@ -22,6 +22,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + "k8s.io/utils/ptr" ) const ( @@ -218,8 +219,9 @@ func (a *AWS) GetNodeEgressIPConfiguration(node *corev1.Node, cloudPrivateIPConf return nil, err } config.Capacity = capacity{ - IPv4: capV4, - IPv6: capV6, + IPv4: ptr.To(capV4), + IPv6: ptr.To(capV6), + // IP field not used by AWS (uses per-IP-family capacity) } return []*NodeEgressIPConfiguration{config}, nil } diff --git a/pkg/cloudprovider/azure.go b/pkg/cloudprovider/azure.go index 9184eeb74..72af19953 100644 --- a/pkg/cloudprovider/azure.go +++ b/pkg/cloudprovider/azure.go @@ -3,14 +3,15 @@ package cloudprovider import ( "context" "fmt" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" - "k8s.io/utils/ptr" "net" "os" "strings" "sync" "time" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "k8s.io/utils/ptr" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" "github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud" @@ -351,7 +352,8 @@ func (a *Azure) GetNodeEgressIPConfiguration(node *corev1.Node, cloudPrivateIPCo config.IFAddr.IPv6 = v6Subnet.String() } config.Capacity = capacity{ - IP: a.getCapacity(networkInterface, len(cloudPrivateIPConfigs)), + // IPv4 and IPv6 fields not used by Azure (uses IP-family-agnostic capacity) + IP: ptr.To(a.getCapacity(networkInterface, len(cloudPrivateIPConfigs))), } return []*NodeEgressIPConfiguration{config}, nil } diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index a26005b99..934678782 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -4,14 +4,15 @@ import ( "context" "errors" "fmt" - apifeatures "github.com/openshift/api/features" - configv1 "github.com/openshift/api/config/v1" - "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" "net" "os" "path/filepath" "sync" + configv1 "github.com/openshift/api/config/v1" + apifeatures "github.com/openshift/api/features" + "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" + v1 "github.com/openshift/api/cloudnetwork/v1" corev1 "k8s.io/api/core/v1" ) @@ -102,9 +103,9 @@ type ifAddr struct { } type capacity struct { - IPv4 int `json:"ipv4,omitempty"` - IPv6 int `json:"ipv6,omitempty"` - IP int `json:"ip,omitempty"` + IPv4 *int `json:"ipv4,omitempty"` + IPv6 *int `json:"ipv6,omitempty"` + IP *int `json:"ip,omitempty"` } // NodeEgressIPConfiguration stores details - specific to each cloud - which are diff --git a/pkg/cloudprovider/gcp.go b/pkg/cloudprovider/gcp.go index 380b97b0a..2592be6bd 100644 --- a/pkg/cloudprovider/gcp.go +++ b/pkg/cloudprovider/gcp.go @@ -12,6 +12,7 @@ import ( "google.golang.org/api/option" corev1 "k8s.io/api/core/v1" utilnet "k8s.io/utils/net" + "k8s.io/utils/ptr" ) const ( @@ -182,7 +183,8 @@ func (g *GCP) GetNodeEgressIPConfiguration(node *corev1.Node, cloudPrivateIPConf config.IFAddr.IPv6 = v6Subnet.String() } config.Capacity = capacity{ - IP: g.getCapacity(networkInterface, len(cloudPrivateIPConfigs)), + // IPv4 and IPv6 fields not used by GCP (uses IP-family-agnostic capacity) + IP: ptr.To(g.getCapacity(networkInterface, len(cloudPrivateIPConfigs))), } return []*NodeEgressIPConfiguration{config}, nil //nolint:staticcheck } diff --git a/pkg/cloudprovider/openstack.go b/pkg/cloudprovider/openstack.go index 46350bef5..32cb4a3fd 100644 --- a/pkg/cloudprovider/openstack.go +++ b/pkg/cloudprovider/openstack.go @@ -31,6 +31,7 @@ import ( "k8s.io/client-go/util/retry" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + "k8s.io/utils/ptr" ) const ( @@ -608,7 +609,8 @@ func (o *OpenStack) getNeutronPortNodeEgressIPConfiguration(p neutronports.Port, IPv6: ipv6, }, Capacity: capacity{ - IP: c, + // IPv4 and IPv6 fields not used by OpenStack (uses IP-family-agnostic capacity) + IP: ptr.To(c), }, }, nil } diff --git a/pkg/cloudprovider/openstack_test.go b/pkg/cloudprovider/openstack_test.go index c6370eb91..af67525bd 100644 --- a/pkg/cloudprovider/openstack_test.go +++ b/pkg/cloudprovider/openstack_test.go @@ -22,6 +22,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/utils/ptr" ) const ( @@ -677,7 +678,7 @@ func TestOpenStackPlugin(t *testing.T) { IPv6: "2000::/64", }, Capacity: capacity{ - IP: openstackMaxCapacity, + IP: ptr.To(openstackMaxCapacity), }, }, } @@ -804,7 +805,7 @@ func TestGetNodeEgressIPConfiguration(t *testing.T) { IPv6: "2000::/64", }, Capacity: capacity{ - IP: openstackMaxCapacity, + IP: ptr.To(openstackMaxCapacity), }, }, }, @@ -840,7 +841,7 @@ func TestGetNodeEgressIPConfiguration(t *testing.T) { IPv6: "2001::/64", }, Capacity: capacity{ - IP: openstackMaxCapacity, + IP: ptr.To(openstackMaxCapacity), }, }, }, @@ -884,7 +885,7 @@ func TestGetNodeEgressIPConfiguration(t *testing.T) { IPv6: "2000::/64", }, Capacity: capacity{ - IP: openstackMaxCapacity, + IP: ptr.To(openstackMaxCapacity), }, }, }, @@ -931,7 +932,7 @@ func TestGetNodeEgressIPConfiguration(t *testing.T) { IPv6: "2000::/64", }, Capacity: capacity{ - IP: openstackMaxCapacity, + IP: ptr.To(openstackMaxCapacity), }, }, }, @@ -943,7 +944,7 @@ func TestGetNodeEgressIPConfiguration(t *testing.T) { IPv6: "2001::/64", }, Capacity: capacity{ - IP: openstackMaxCapacity, + IP: ptr.To(openstackMaxCapacity), }, }, }, @@ -1028,7 +1029,7 @@ func TestGetNeutronPortNodeEgressIPConfiguration(t *testing.T) { IPv6: "2000::/64", }, Capacity: capacity{ - IP: openstackMaxCapacity - 2, // 2 allowed_address_pairs configured on the port. + IP: ptr.To(openstackMaxCapacity - 2), // 2 allowed_address_pairs configured on the port. }, }, }, @@ -1041,7 +1042,7 @@ func TestGetNeutronPortNodeEgressIPConfiguration(t *testing.T) { IPv6: "2000::/64", }, Capacity: capacity{ - IP: openstackMaxCapacity + 3 - 2, // excluding 2 allowed_address_pairs configured on the port. + IP: ptr.To(openstackMaxCapacity + 3 - 2), // excluding 2 allowed_address_pairs configured on the port. }, }, // Configure cloudPrivateIPConfigs with 3 ips are within neutron subnet, 1 ip outside neutron subnet.