Skip to content

Commit

Permalink
fix: eni may detached in race condition
Browse files Browse the repository at this point in the history
Signed-off-by: l1b0k <[email protected]>
  • Loading branch information
l1b0k committed Jan 17, 2025
1 parent 40ebe7b commit f3d1696
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 3 deletions.
28 changes: 25 additions & 3 deletions pkg/eni/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -824,8 +824,7 @@ func (l *Local) Dispose(n int) int {

// 1. check if can dispose the eni
if n >= max(len(l.ipv4), len(l.ipv6)) {
eniType := strings.ToLower(l.eniType)
if eniType != "trunk" && eniType != "erdma" && !l.eni.Trunk && len(l.ipv4.InUse()) == 0 && len(l.ipv6.InUse()) == 0 {
if l.canDispose() {
log.Info("dispose eni")
l.status = statusDeleting
return max(len(l.ipv4), len(l.ipv6))
Expand Down Expand Up @@ -902,7 +901,11 @@ func (l *Local) factoryDisposeWorker(ctx context.Context) {

if l.status == statusDeleting {
// remove the eni

if !l.canDispose() {
l.cond.Wait()
// this can't happen
continue
}
l.cond.L.Unlock()

err := l.rateLimitEni.Wait(ctx)
Expand Down Expand Up @@ -1082,6 +1085,25 @@ func (l *Local) commit(ctx context.Context, respCh chan *AllocResp, ipv4, ipv6 *
}
}

// canDispose will check current eni status , and make sure no pending jobs ,and no pod is using this eni
func (l *Local) canDispose() bool {
if l.eni == nil {
return true
}
eniType := strings.ToLower(l.eniType)
switch eniType {
case "trunk", "erdma":
return false
}
if l.eni.Trunk {
return false
}
return len(l.ipv4.InUse()) == 0 &&
len(l.ipv6.InUse()) == 0 &&
l.allocatingV4.Len() == 0 &&
l.allocatingV6.Len() == 0
}

// syncIPLocked will mark ip as invalid , if not found in remote
func syncIPLocked(lo Set, remote []netip.Addr) {
s := sets.New[netip.Addr](remote...)
Expand Down
32 changes: 32 additions & 0 deletions pkg/eni/local_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,25 @@ func TestLocal_AllocWorker_ParentCancelContext(t *testing.T) {
assert.False(t, ok)
}

func TestLocal_AllocWorker_UpdateCache(t *testing.T) {
local := NewLocalTest(&daemon.ENI{ID: "eni-1"}, nil, &types.PoolConfig{
EnableIPv4: true,
}, "")
cni := &daemon.CNI{PodID: "pod-1"}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
respCh := make(chan *AllocResp)

req := NewLocalIPRequest()
req.NoCache = true
go local.allocWorker(ctx, cni, req, respCh)
req.cancel()
result, ok := <-respCh
assert.True(t, ok)
assert.NotNil(t, result)
}

func TestLocal_Dispose(t *testing.T) {
local := NewLocalTest(&daemon.ENI{ID: "eni-1"}, nil, &types.PoolConfig{}, "")
local.status = statusInUse
Expand Down Expand Up @@ -229,6 +248,19 @@ func TestLocal_Allocate_NoCache(t *testing.T) {
assert.Equal(t, 1, len(resp))
}

func TestLocal_DisposeFailWhenAllocatingIsNotEmpty(t *testing.T) {
local := NewLocalTest(&daemon.ENI{ID: "eni-1"}, nil, &types.PoolConfig{}, "")
local.status = statusInUse
local.ipv4.Add(NewValidIP(netip.MustParseAddr("192.0.2.1"), true))
local.ipv6.Add(NewValidIP(netip.MustParseAddr("fd00:46dd:e::1"), false))

local.allocatingV4 = append(local.allocatingV4, NewLocalIPRequest())
n := local.Dispose(1)

assert.Equal(t, 1, n)
assert.Equal(t, statusInUse, local.status)
}

func TestLocal_Allocate_NoCache_AllocSuccess(t *testing.T) {
local := NewLocalTest(&daemon.ENI{ID: "eni-1"}, nil, &types.PoolConfig{
MaxIPPerENI: 10, EnableIPv4: true, EnableIPv6: true}, "")
Expand Down

0 comments on commit f3d1696

Please sign in to comment.