Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dist/images/ovnkube.sh
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@ function memory_trim_on_compaction_supported {
}

function get_node_zone() {
zone=$(kubectl --subresource=status --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \
zone=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \
get node ${K8S_NODE} -o=jsonpath={'.metadata.labels.k8s\.ovn\.org/zone-name'})
if [ -z "$zone" ]; then
if [[ ${ovn_enable_interconnect} == "true" ]]; then
Expand Down
10 changes: 5 additions & 5 deletions dist/templates/rbac-ovnkube-node.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ subjects:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: ovnkube-node-status-reader
name: ovnkube-node-reader
roleRef:
name: ovnkube-node-status-reader
name: ovnkube-node-reader
kind: ClusterRole
apiGroup: rbac.authorization.k8s.io
subjects:
Expand Down Expand Up @@ -114,12 +114,12 @@ subjects:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: ovnkube-node-status-reader
name: ovnkube-node-reader
rules:
- apiGroups: [""]
resources:
- nodes/status
verbs: [ "get" ]
- nodes
verbs: [ "get", "list", "watch" ]

---
apiVersion: rbac.authorization.k8s.io/v1
Expand Down
9 changes: 9 additions & 0 deletions go-controller/pkg/allocator/id/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type Allocator interface {
ReserveID(name string, id int) error
ReleaseID(name string)
ForName(name string) NamedAllocator
GetID(name string) int
}

// NamedAllocator of IDs for a specific resource
Expand Down Expand Up @@ -107,6 +108,14 @@ func (idAllocator *idAllocator) ForName(name string) NamedAllocator {
}
}

func (idAllocator *idAllocator) GetID(name string) int {
v, ok := idAllocator.nameIdMap.Load(name)
if !ok {
return invalidID
}
return v
}

type namedAllocator struct {
name string
allocator *idAllocator
Expand Down
4 changes: 4 additions & 0 deletions go-controller/pkg/clustermanager/pod/allocator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ func (a *idAllocatorStub) AllocateID(string) (int, error) {
panic("not implemented") // TODO: Implement
}

func (a *idAllocatorStub) GetID(string) int {
panic("not implemented") // TODO: Implement
}

func (a *idAllocatorStub) ReserveID(string, int) error {
panic("not implemented") // TODO: Implement
}
Expand Down
2 changes: 1 addition & 1 deletion go-controller/pkg/controllermanager/controller_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ func (cm *ControllerManager) Start(ctx context.Context) error {

err = cm.createACLLoggingMeter()
if err != nil {
return nil
return fmt.Errorf("failed to create acl logging meter: %w", err)
}

if config.Metrics.EnableConfigDuration {
Expand Down
126 changes: 85 additions & 41 deletions go-controller/pkg/networkmanager/nad_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package networkmanager
import (
"fmt"
"reflect"
"sort"
"strconv"
"sync"
"time"
Expand Down Expand Up @@ -76,6 +77,7 @@ type nadController struct {
// primaryNADs holds a mapping of namespace to NAD of primary UDNs
primaryNADs map[string]string

// networkIDAllocator used by cluster-manager to allocate new IDs, zone/node mode only uses as a cache
networkIDAllocator id.Allocator
tunnelKeysAllocator *id.TunnelKeysAllocator
nadClient nadclientset.Interface
Expand Down Expand Up @@ -108,14 +110,13 @@ func newController(
c.nadClient = ovnClient.NetworkAttchDefClient
}

// this is cluster network manager, so we allocate network IDs and tunnel keys
c.networkIDAllocator = id.NewIDAllocator("NetworkIDs", MaxNetworks)
// Reserve the ID of the default network
err := c.networkIDAllocator.ReserveID(types.DefaultNetworkName, types.DefaultNetworkID)
if err != nil {
return nil, fmt.Errorf("failed to allocate default network ID: %w", err)
}
if zone == "" && node == "" {
c.networkIDAllocator = id.NewIDAllocator("NetworkIDs", MaxNetworks)
// Reserve the ID of the default network
err := c.networkIDAllocator.ReserveID(types.DefaultNetworkName, types.DefaultNetworkID)
if err != nil {
return nil, fmt.Errorf("failed to allocate default network ID: %w", err)
}
// tunnelKeysAllocator must be passed for cluster manager
c.tunnelKeysAllocator = tunnelKeysAllocator
}
Expand Down Expand Up @@ -220,7 +221,7 @@ func (c *nadController) syncAll() (err error) {
nadsWithoutID := []*nettypes.NetworkAttachmentDefinition{}
for _, nad := range existingNADs {
// skip NADs that are not annotated with an ID
if c.networkIDAllocator != nil && nad.Annotations[types.OvnNetworkIDAnnotation] == "" {
if nad.Annotations[types.OvnNetworkIDAnnotation] == "" {
nadsWithoutID = append(nadsWithoutID, nad)
continue
}
Expand All @@ -234,28 +235,35 @@ func (c *nadController) syncAll() (err error) {
return nil
}

// preallocate all node IDs to avoid new NADs taking them post start up
// If we are missing IDs, get them from the nodes which is where we
// originally had them
klog.V(5).Infof("%s: %d NADs are missing the network ID annotation, fetching from nodes", c.name, len(nadsWithoutID))
nodes, err := c.nodeLister.List(labels.Everything())
if err != nil {
return fmt.Errorf("error listing nodes: %w", err)
}
for _, n := range nodes {
networkIdsMap, err := util.GetNodeNetworkIDsAnnotationNetworkIDs(n)
if err == nil {
for networkName, id := range networkIdsMap {
// Reserve the id for the network name. We can safely
// ignore any errors if there are duplicate ids or if
// two networks have the same id. We will assign network
// IDs anyway on sync.
_ = c.networkIDAllocator.ReserveID(networkName, id)
klog.Infof("%s: %d NADs are missing the network ID annotation, fetching from nodes", c.name, len(nadsWithoutID))
for _, nad := range nadsWithoutID {
nadNetwork, err := util.ParseNADInfo(nad)
if err != nil {
// in case the type for the NAD is not ovn-k we should not record the error event
if err.Error() != util.ErrorAttachDefNotOvnManaged.Error() {
klog.Errorf("%s: failed parsing NAD %s/%s: %v", c.name, nad.Namespace, nad.Name, err)
}
continue
}
netID, err := c.getNetworkIDFromNode(nadNetwork)
if err != nil {
return fmt.Errorf("%s: failed to fetch network ID from nodes for nad %s/%s: %v",
c.name, nad.Namespace, nad.Name, err)
}
if netID != types.InvalidID {
// Reserve the id for the network name. We can safely
// ignore any errors if there are duplicate ids or if
// two networks have the same id. We will assign network
// IDs anyway on sync.
_ = c.networkIDAllocator.ReserveID(nadNetwork.GetNetworkName(), netID)
}
}

// finally process the pending NADs
for _, nad := range existingNADs {
for _, nad := range nadsWithoutID {
err := syncNAD(nad)
if err != nil {
return err
Expand Down Expand Up @@ -608,12 +616,19 @@ func (c *nadController) handleNetworkAnnotations(old util.NetInfo, new util.Muta
}

id := types.InvalidID
// check what ID is currently annotated
if nad != nil && nad.Annotations[types.OvnNetworkIDAnnotation] != "" {
annotated := nad.Annotations[types.OvnNetworkIDAnnotation]
id, err = strconv.Atoi(annotated)
if err != nil {
return fmt.Errorf("failed to parse annotated network ID: %w", err)

// check if in cache first
if new != nil {
id = c.networkIDAllocator.GetID(new.GetNetworkName())
}
if nad != nil && id == types.InvalidID {
// check what ID is currently annotated
if nad.Annotations[types.OvnNetworkIDAnnotation] != "" {
annotated := nad.Annotations[types.OvnNetworkIDAnnotation]
id, err = strconv.Atoi(annotated)
if err != nil {
return fmt.Errorf("failed to parse annotated network ID: %w", err)
}
}
}

Expand All @@ -626,23 +641,15 @@ func (c *nadController) handleNetworkAnnotations(old util.NetInfo, new util.Muta
}
}

// this is not the cluster manager nad controller and we are not allocating
// so just return what we got from the annotation
if c.networkIDAllocator == nil {
if new != nil {
new.SetNetworkID(id)
new.SetTunnelKeys(tunnelKeys)
}
return nil
}

// release old ID if the network is being deleted
if old != nil && !old.IsDefault() && len(old.GetNADs()) == 0 {
c.networkIDAllocator.ReleaseID(old.GetNetworkName())
c.tunnelKeysAllocator.ReleaseKeys(old.GetNetworkName())
if c.isClusterManagerMode() {
c.tunnelKeysAllocator.ReleaseKeys(old.GetNetworkName())
}
}

// nothing to allocate
// nothing to allocate, delete case
if new == nil {
return nil
}
Expand All @@ -656,6 +663,15 @@ func (c *nadController) handleNetworkAnnotations(old util.NetInfo, new util.Muta
id = types.InvalidID
}
}

// this is not the cluster manager nad controller, and we are not allocating
// so just return what ids we already found
if !c.isClusterManagerMode() {
new.SetNetworkID(id)
new.SetTunnelKeys(tunnelKeys)
return nil
}

// tunnel key annotation doesn't need the same check ^ because it is initialized outside the
// nad controller and has already assured that all annotated tunnel keys are reserved.

Expand Down Expand Up @@ -749,6 +765,30 @@ func (c *nadController) handleNetworkAnnotations(old util.NetInfo, new util.Muta
return nil
}

func (c *nadController) getNetworkIDFromNode(nadNetwork util.NetInfo) (int, error) {
// check if the node has a legacy ID
nodes, err := c.nodeLister.List(labels.Everything())
if err != nil {
return types.InvalidID, fmt.Errorf("failed to list nodes: %w", err)
}
// sort to make retrieval semi-consistent across nodes
sort.Slice(nodes, func(i, j int) bool {
return nodes[i].CreationTimestamp.Before(&nodes[j].CreationTimestamp)
})
netName := nadNetwork.GetNetworkName()
// Find from node annotations
for _, node := range nodes {
idMap, err := util.GetNodeNetworkIDsAnnotationNetworkIDs(node)
if err != nil {
continue
}
if v, ok := idMap[netName]; ok && v != types.InvalidID {
return v, nil
}
}
return types.InvalidID, nil
}

func (c *nadController) GetActiveNetwork(network string) util.NetInfo {
c.RLock()
defer c.RUnlock()
Expand All @@ -759,6 +799,10 @@ func (c *nadController) GetActiveNetwork(network string) util.NetInfo {
return state.controller
}

func (c *nadController) isClusterManagerMode() bool {
return c.tunnelKeysAllocator != nil
}

func getNumberOfTunnelKeys(netInfo util.NetInfo) int {
if netInfo.IsDefault() {
// default network does not need tunnel keys allocation because it always uses network ID 0.
Expand Down
Loading