Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/cloudprovider/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ func (a *AWS) GetNodeEgressIPConfiguration(node *corev1.Node, cloudPrivateIPConf
return []*NodeEgressIPConfiguration{config}, nil
}

func (a *AWS) SyncLBBackend(_ net.IP, _ *corev1.Node) error {
// We dont add Egress IP to AWS public LB backend; nothing to do
return nil
}

// Unfortunately the AWS API (WaitUntilInstanceRunning) only handles equality
// assertion: so on delete we can't specify and assert that the IP which is
// being removed is completely removed, we are forced to do the inverse, i.e:
Expand Down
169 changes: 64 additions & 105 deletions pkg/cloudprovider/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@ import (
"sync"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"k8s.io/utils/ptr"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6"
Expand All @@ -26,6 +24,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
utilnet "k8s.io/utils/net"
"k8s.io/utils/ptr"
)

const (
Expand All @@ -48,10 +47,10 @@ type Azure struct {
vmClient *armcompute.VirtualMachinesClient
virtualNetworkClient *armnetwork.VirtualNetworksClient
networkClient *armnetwork.InterfacesClient
backendAddressPoolClient *armnetwork.LoadBalancerBackendAddressPoolsClient
nodeMapLock sync.Mutex
nodeLockMap map[string]*sync.Mutex
azureWorkloadIdentityEnabled bool
lbBackendPoolSynced bool
}

type azureCredentialsConfig struct {
Expand Down Expand Up @@ -162,11 +161,6 @@ func (a *Azure) initCredentials() error {
return fmt.Errorf("failed to initialize new VirtualNetworksClient: %w", err)
}

a.backendAddressPoolClient, err = armnetwork.NewLoadBalancerBackendAddressPoolsClient(cfg.subscriptionID, cred, options)
if err != nil {
return fmt.Errorf("failed to initialize new LoadBalancerBackendAddressPoolsClient: %w", err)
}

return nil
}

Expand Down Expand Up @@ -198,65 +192,14 @@ func (a *Azure) AssignPrivateIP(ip net.IP, node *corev1.Node) error {
name := fmt.Sprintf("%s_%s", node.Name, ipc)
untrue := false

// In some Azure setups (Azure private, public ARO, private ARO) outbound connectivity is achieved through
// outbound rules tied to the backend address pool of the primary IP of the VM NIC. An Azure constraint
// forbids the creation of a secondary IP tied to such address pool and would result in
// OutboundRuleCannotBeUsedWithBackendAddressPoolThatIsReferencedBySecondaryIpConfigs.
// Work around it by not specifying the backend address pool when an outbound rule is set, even though
// that means preventing outbound connectivity to the egress IP, which will be able to reach the
// infrastructure subnet nonetheless. In public Azure clusters, outbound connectivity is achieved through
// UserDefinedRouting, which doesn't impose such constraints on secondary IPs.
loadBalancerBackendAddressPoolsArgument := networkInterface.Properties.IPConfigurations[0].Properties.LoadBalancerBackendAddressPools
var attachedOutboundRule *armnetwork.SubResource
OuterLoop:
for _, ipconfig := range networkInterface.Properties.IPConfigurations {
if ipconfig.Properties.LoadBalancerBackendAddressPools != nil {
for _, pool := range ipconfig.Properties.LoadBalancerBackendAddressPools {
if pool.ID == nil {
continue
}
// for some reason, the struct for the pool above is not entirely filled out:
// BackendAddressPoolPropertiesFormat:(*network.BackendAddressPoolPropertiesFormat)(nil)
// Do a separate get for this pool in order to check whether there are any outbound rules
// attached to it
realPool, err := a.getBackendAddressPool(ptr.Deref(pool.ID, ""))
if err != nil {
return fmt.Errorf("error looking up backend address pool %s with ID %s: %v", ptr.Deref(pool.Name, ""), ptr.Deref(pool.ID, ""), err)
}
if len(realPool.Properties.LoadBalancerBackendAddresses) > 0 {
if realPool.Properties.OutboundRule != nil {
loadBalancerBackendAddressPoolsArgument = nil
attachedOutboundRule = realPool.Properties.OutboundRule
break OuterLoop
}
if len(realPool.Properties.OutboundRules) > 0 {
loadBalancerBackendAddressPoolsArgument = nil
attachedOutboundRule = (realPool.Properties.OutboundRules)[0]
break OuterLoop
}
}
}
}
}
if loadBalancerBackendAddressPoolsArgument == nil {
outboundRuleStr := ""
if attachedOutboundRule != nil && attachedOutboundRule.ID != nil {
// https://issues.redhat.com/browse/OCPBUGS-33617 showed that there can be a rule without an ID...
outboundRuleStr = fmt.Sprintf(": %s", ptr.Deref(attachedOutboundRule.ID, ""))
}
klog.Warningf("Egress IP %s will have no outbound connectivity except for the infrastructure subnet: "+
"omitting backend address pool when adding secondary IP: it has an outbound rule already%s",
ipc, outboundRuleStr)
}
newIPConfiguration := &armnetwork.InterfaceIPConfiguration{
Name: &name,
Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{
PrivateIPAddress: &ipc,
PrivateIPAllocationMethod: ptr.To(armnetwork.IPAllocationMethodStatic),
Subnet: networkInterface.Properties.IPConfigurations[0].Properties.Subnet,
Primary: &untrue,
LoadBalancerBackendAddressPools: loadBalancerBackendAddressPoolsArgument,
ApplicationSecurityGroups: applicationSecurityGroups,
PrivateIPAddress: &ipc,
PrivateIPAllocationMethod: ptr.To(armnetwork.IPAllocationMethodStatic),
Subnet: networkInterface.Properties.IPConfigurations[0].Properties.Subnet,
Primary: &untrue,
ApplicationSecurityGroups: applicationSecurityGroups,
},
}
for _, ipCfg := range ipConfigurations {
Expand All @@ -272,6 +215,8 @@ OuterLoop:
ipConfigurations = append(ipConfigurations, newIPConfiguration)
networkInterface.Properties.IPConfigurations = ipConfigurations
// Send the request
klog.Warningf("Egress IP %s will have no outbound connectivity except for the infrastructure subnet: "+
"omitting backend address pool when adding secondary IP", ipc)
poller, err := a.createOrUpdate(networkInterface)
if err != nil {
return err
Expand Down Expand Up @@ -357,6 +302,60 @@ func (a *Azure) GetNodeEgressIPConfiguration(node *corev1.Node, cloudPrivateIPCo
return []*NodeEgressIPConfiguration{config}, nil
}

// The consensus is to not add egress IP to public load balancer
// backend pool regardless of the presence of an OutBoundRule.
// During upgrade this function removes any egress IP added to
// public load balancer backend pool previously.
func (a *Azure) SyncLBBackend(ip net.IP, node *corev1.Node) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have you tested this SyncLBBackend function with so many EIPs already in place ? because AFAIK Azure APIs are so slow and not sure how it works when you want to sync already existing IPs.
have you explored sync IPs belong to a node with single API call ? something similar to processing existing items (like this) before watching CloudPrivateIPConfig objects.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tested with around 10 egress IPs. I have not seen much delay. We can ask QE to test with more egress IPs. There is a slack thread where ARO team did some testing with this PR.
https://redhat-internal.slack.com/archives/C09G14XDR9B/p1759942816358369?thread_ts=1759848001.402299&cid=C09G14XDR9B
Egress IPs are queued separately and may be difficult to obtain all at once. This is also a one time thing and expected to be run mostly during the upgrade.
I do not anticipate it taking much time and going beyond the upgrade completion time.

if a.lbBackendPoolSynced {
// nothing to do. Return immediately if LB backend has already synced
return nil
}
ipc := ip.String()
klog.Infof("Acquiring node lock for modifying load balancer backend pool, node: %s, ip: %s", node.Name, ipc)
nodeLock := a.getNodeLock(node.Name)
nodeLock.Lock()
defer nodeLock.Unlock()
instance, err := a.getInstance(node)
if err != nil {
return err
}
networkInterfaces, err := a.getNetworkInterfaces(instance)
if err != nil {
return err
}
if networkInterfaces[0].Properties == nil {
return fmt.Errorf("nil network interface properties")
}
// Perform the operation against the first interface listed, which will be
// the primary interface (if it's defined as such) or the first one returned
// following the order Azure specifies.
networkInterface := networkInterfaces[0]
var loadBalanceerBackendPoolModified bool
// omit Egress IP from LB backend pool
ipConfigurations := networkInterface.Properties.IPConfigurations
for _, ipCfg := range ipConfigurations {
if ptr.Deref(ipCfg.Properties.PrivateIPAddress, "") == ipc &&
ipCfg.Properties.LoadBalancerBackendAddressPools != nil {
ipCfg.Properties.LoadBalancerBackendAddressPools = nil
loadBalanceerBackendPoolModified = true
}
}
if loadBalanceerBackendPoolModified {
networkInterface.Properties.IPConfigurations = ipConfigurations
poller, err := a.createOrUpdate(networkInterface)
if err != nil {
return err
}
if err = a.waitForCompletion(poller); err != nil {
return err
}
a.lbBackendPoolSynced = true
return nil
}
Comment on lines +310 to +355
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Do not short-circuit backend cleanup after the first IP.

lbBackendPoolSynced is a single flag on the provider. After the very first IP update sets it to true, every later call returns immediately, so we never remove backend pool references for the rest of the cluster’s Egress IPs. That is a functional regression for any cluster with more than one IP (or any IP reconciled after the first). We need to track sync state per IP (or simply drop the early return and rely on idempotent updates) so that each IP can be cleaned up.

A minimal fix is to replace the boolean with per-IP bookkeeping:

 type Azure struct {
 	CloudProvider
 	platformStatus               *configv1.AzurePlatformStatus
 	resourceGroup                string
 	env                          azureapi.Environment
 	vmClient                     *armcompute.VirtualMachinesClient
 	virtualNetworkClient         *armnetwork.VirtualNetworksClient
 	networkClient                *armnetwork.InterfacesClient
 	nodeMapLock                  sync.Mutex
 	nodeLockMap                  map[string]*sync.Mutex
 	azureWorkloadIdentityEnabled bool
-	lbBackendPoolSynced          bool
+	lbBackendPoolSynced          map[string]bool
 }

Initialize the map where we build the Azure struct, and update the method to key off node.Name/ip.String():

-	if a.lbBackendPoolSynced {
-		return nil
-	}
+	if a.lbBackendPoolSynced == nil {
+		a.lbBackendPoolSynced = make(map[string]bool)
+	}
+	cacheKey := fmt.Sprintf("%s|%s", node.Name, ipc)
+	if a.lbBackendPoolSynced[cacheKey] {
+		return nil
+	}-		a.lbBackendPoolSynced = true
+		a.lbBackendPoolSynced[cacheKey] = true

Make sure to import "fmt" if it’s not already present. That preserves the “run once” optimization per IP while still cleaning every IP.

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In pkg/cloudprovider/azure.go around lines 310-355 the code short-circuits
cleanup using a single boolean a.lbBackendPoolSynced causing only the first IP
to ever be cleaned; replace this single flag with per-IP bookkeeping (e.g.
map[string]bool keyed by node.Name+"/"+ip.String()) initialized where the Azure
struct is built, change the early-return check to look up the node/ip key, and
after a successful update set the map entry to true for that key; ensure the map
is properly created on struct initialization and import fmt if not already
present.

return nil
}

func (a *Azure) createOrUpdate(networkInterface armnetwork.Interface) (*runtime.Poller[armnetwork.InterfacesClientCreateOrUpdateResponse], error) {
ctx, cancel := context.WithTimeout(a.ctx, defaultAzureOperationTimeout)
defer cancel()
Expand Down Expand Up @@ -491,46 +490,6 @@ func (a *Azure) getNetworkInterfaces(instance *armcompute.VirtualMachine) ([]arm
return networkInterfaces, nil
}

func splitObjectID(azureResourceID string) (resourceGroupName, loadBalancerName, backendAddressPoolName string) {
// example of an azureResourceID:
// "/subscriptions/53b8f551-f0fc-4bea-8cba-6d1fefd54c8a/resourceGroups/huirwang-debug1-2qh9t-rg/providers/Microsoft.Network/loadBalancers/huirwang-debug1-2qh9t/backendAddressPools/huirwang-debug1-2qh9t"

// Split the Azure resource ID into parts using "/"
parts := strings.Split(azureResourceID, "/")

// Iterate through the parts to find the relevant subIDs
for i, part := range parts {
switch part {
case "resourceGroups":
if i+1 < len(parts) {
resourceGroupName = parts[i+1]
}
case "loadBalancers":
if i+1 < len(parts) {
loadBalancerName = parts[i+1]
}
case "backendAddressPools":
if i+1 < len(parts) {
backendAddressPoolName = parts[i+1]
}
}
}
return
}

func (a *Azure) getBackendAddressPool(poolID string) (*armnetwork.BackendAddressPool, error) {
ctx, cancel := context.WithTimeout(a.ctx, defaultAzureOperationTimeout)
defer cancel()
resourceGroupName, loadBalancerName, backendAddressPoolName := splitObjectID(poolID)
response, err := a.backendAddressPoolClient.Get(ctx, resourceGroupName, loadBalancerName, backendAddressPoolName, nil)
if err != nil {
return nil, fmt.Errorf("failed to retrieve backend address pool for backendAddressPoolClient=%s, loadBalancerName=%s, backendAddressPoolName=%s: %w",
resourceGroupName, loadBalancerName, backendAddressPoolName, err)
}
return &response.BackendAddressPool, nil

}

func (a *Azure) getNetworkInterface(id string) (armnetwork.Interface, error) {
ctx, cancel := context.WithTimeout(a.ctx, defaultAzureOperationTimeout)
defer cancel()
Expand Down
7 changes: 5 additions & 2 deletions pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ import (
"path/filepath"
"sync"

v1 "github.com/openshift/api/cloudnetwork/v1"
configv1 "github.com/openshift/api/config/v1"
apifeatures "github.com/openshift/api/features"
"github.com/openshift/library-go/pkg/operator/configobserver/featuregates"

v1 "github.com/openshift/api/cloudnetwork/v1"
corev1 "k8s.io/api/core/v1"
)

Expand Down Expand Up @@ -61,6 +60,10 @@ type CloudProviderIntf interface {
// instance and IP family (AWS), also: the interface is either keyed by name
// (GCP) or ID (Azure, AWS).
GetNodeEgressIPConfiguration(node *corev1.Node, cloudPrivateIPConfigs []*v1.CloudPrivateIPConfig) ([]*NodeEgressIPConfiguration, error)

// SyncLBBackend removes any egress IP which is already added to backend pool of
// a public load balancer. This is mostly Azure specific and may be removed later.
SyncLBBackend(ip net.IP, node *corev1.Node) error
}

// CloudProviderWithMoveIntf is additional interface that can be added to cloud
Expand Down
4 changes: 4 additions & 0 deletions pkg/cloudprovider/cloudprovider_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,7 @@ func (f *FakeCloudProvider) GetNodeEgressIPConfiguration(node *corev1.Node, clou
}
return nil, nil
}

func (a *FakeCloudProvider) SyncLBBackend(_ net.IP, _ *corev1.Node) error {
return nil
}
5 changes: 5 additions & 0 deletions pkg/cloudprovider/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ func (g *GCP) GetNodeEgressIPConfiguration(node *corev1.Node, cloudPrivateIPConf
return nil, nil
}

func (a *GCP) SyncLBBackend(_ net.IP, _ *corev1.Node) error {
// We dont add Egress IP to GCP public LB backend; nothing to do
return nil
}

// The GCP zone operations API call. All GCP infrastructure modifications are
// assigned a unique operation ID and are queued in a global/zone operations
// queue. In the case of assignments of private IP addresses to instances, the
Expand Down
5 changes: 5 additions & 0 deletions pkg/cloudprovider/openstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,11 @@ func (o *OpenStack) GetNodeEgressIPConfiguration(node *corev1.Node, cloudPrivate
return nil, fmt.Errorf("no suitable interface configurations found")
}

func (a *OpenStack) SyncLBBackend(_ net.IP, _ *corev1.Node) error {
// We dont add Egress IP to OpenStack public LB backend; nothing to do
return nil
}

// getNeutronPortNodeEgressIPConfiguration renders the NeutronPortNodeEgressIPConfiguration for a given port.
// The interface is keyed by a neutron UUID.
// If multiple IPv4 repectively multiple IPv6 subnets are attached to the same port, throw an error.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,14 @@ func (c *CloudPrivateIPConfigController) SyncHandler(key string) error {

nodeNameToAdd, nodeNameToDel := c.computeOp(cloudPrivateIPConfig)
switch {
// Dequeue on NOOP, there's nothing to do
case nodeNameToAdd == "" && nodeNameToDel == "":
node, err := c.nodesLister.Get(cloudPrivateIPConfig.Spec.Node)
if err != nil {
return err
}
if err := c.cloudProviderClient.SyncLBBackend(ip, node); err != nil {
return fmt.Errorf("error while syncing egress IP %q added to LB backend pool", key)
}
return nil
case nodeNameToAdd != "" && nodeNameToDel != "":
klog.Infof("CloudPrivateIPConfig: %q will be moved from node %q to node %q", key, nodeNameToDel, nodeNameToAdd)
Expand Down