-
Notifications
You must be signed in to change notification settings - Fork 140
CBG-5020: fix panic in eviction based on out of sync stats #7895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -561,7 +561,10 @@ func (rc *LRURevisionCache) removeFromCacheByCV(ctx context.Context, docID strin | |
| return | ||
| } | ||
| // grab the revid key from the value to enable us to remove the reference from the rev lookup map too | ||
| elem := element.Value.(*revCacheValue) | ||
| elem, ok := element.Value.(*revCacheValue) | ||
| if !ok { | ||
| return | ||
| } | ||
|
|
||
| legacyKey := IDAndRev{DocID: docID, RevID: elem.revID, CollectionID: collectionID} | ||
| rc.lruList.Remove(element) | ||
|
|
@@ -582,7 +585,10 @@ func (rc *LRURevisionCache) removeFromCacheByRev(ctx context.Context, docID, rev | |
| return | ||
| } | ||
| // grab the cv key from the value to enable us to remove the reference from the rev lookup map too | ||
| elem := element.Value.(*revCacheValue) | ||
| elem, ok := element.Value.(*revCacheValue) | ||
| if !ok { | ||
| return | ||
| } | ||
|
|
||
| hlvKey := IDandCV{DocID: docID, Source: elem.cv.SourceID, Version: elem.cv.Value, CollectionID: collectionID} | ||
| rc.lruList.Remove(element) | ||
|
|
@@ -848,6 +854,7 @@ func (value *revCacheValue) store(docRev DocumentRevision) { | |
|
|
||
| func (value *revCacheValue) updateDelta(toDelta RevisionDelta) (diffInBytes int64) { | ||
| value.lock.Lock() | ||
| defer value.lock.Unlock() | ||
| var previousDeltaBytes int64 | ||
| if value.delta != nil { | ||
| // delta exists, need to pull this to update overall memory size correctly | ||
|
|
@@ -858,7 +865,6 @@ func (value *revCacheValue) updateDelta(toDelta RevisionDelta) (diffInBytes int6 | |
| if diffInBytes != 0 { | ||
| value.itemBytes.Add(diffInBytes) | ||
| } | ||
| value.lock.Unlock() | ||
| return diffInBytes | ||
| } | ||
|
|
||
|
|
@@ -912,17 +918,32 @@ func (rc *LRURevisionCache) revCacheMemoryBasedEviction(ctx context.Context) { | |
|
|
||
| // performEviction will evict the oldest items in the cache till we are below the memory threshold | ||
| func (rc *LRURevisionCache) performEviction(ctx context.Context) { | ||
| numItemsRemoved := rc.evictBasedOffMemoryUsage(ctx) | ||
| rc.cacheNumItems.Add(-numItemsRemoved) | ||
| } | ||
|
|
||
| func (rc *LRURevisionCache) evictBasedOffMemoryUsage(ctx context.Context) int64 { | ||
| var numItemsRemoved, numBytesRemoved int64 | ||
| rc.lock.Lock() // hold rev cache lock to remove items from cache until we're below memory threshold for the shard | ||
| rc.lock.Lock() | ||
| defer rc.lock.Unlock() | ||
| // check if we are over memory capacity after holding rev cache mutex (protect against another goroutine evicting whilst waiting for mutex above) | ||
| if currMemoryUsage := rc.currMemoryUsage.Value(); currMemoryUsage > rc.memoryCapacity { | ||
| // find amount of bytes needed to evict till below threshold | ||
| bytesNeededToRemove := currMemoryUsage - rc.memoryCapacity | ||
| for bytesNeededToRemove > numBytesRemoved { | ||
| value := rc._findEvictionValue() | ||
| if value == nil { | ||
| // no more values ready for eviction | ||
| break | ||
| if rc.lruList.Len() > 0 { | ||
| // no more values ready for eviction | ||
| break | ||
| } else { | ||
| // list is empty, nothing more to evict but stats are wrong so zero stats and return | ||
| base.DebugfCtx(ctx, base.KeyCache, "Revision cache memory stats inconsistent for this shard, resetting to zero") | ||
| correctionVal := rc.currMemoryUsage.Value() | ||
| rc.currMemoryUsage.Add(-correctionVal) | ||
| rc.cacheMemoryBytesStat.Add(-correctionVal) | ||
|
Comment on lines
942
to
945
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be much clearer and potentially safer to use Set(0) for this than doing two separate Value and Add IMO
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have changed this, I have added some comments on this area, we can;t quite set |
||
| break | ||
| } | ||
| } | ||
| revKey := IDAndRev{DocID: value.id, RevID: value.revID, CollectionID: value.collectionID} | ||
| hlvKey := IDandCV{DocID: value.id, Source: value.cv.SourceID, Version: value.cv.Value, CollectionID: value.collectionID} | ||
|
|
@@ -953,8 +974,7 @@ func (rc *LRURevisionCache) performEviction(ctx context.Context) { | |
| } | ||
| } | ||
| rc._decrRevCacheMemoryUsage(ctx, -numBytesRemoved) // need update rev cache memory stats before release lock to stop other goroutines evicting based on outdated stats | ||
| rc.lock.Unlock() // release lock after removing items from cache | ||
| rc.cacheNumItems.Add(-numItemsRemoved) | ||
| return numItemsRemoved | ||
| } | ||
|
|
||
| // _decrRevCacheMemoryUsage atomically decreases overall memory usage for cache and the actual rev cache objects usage. | ||
|
|
@@ -991,7 +1011,13 @@ func (rc *LRURevisionCache) incrRevCacheMemoryUsage(ctx context.Context, bytesCo | |
|
|
||
| func (rc *LRURevisionCache) _findEvictionValue() *revCacheValue { | ||
| evictionCandidate := rc.lruList.Back() | ||
| revItem := evictionCandidate.Value.(*revCacheValue) | ||
| if evictionCandidate == nil { | ||
| return nil | ||
| } | ||
| revItem, ok := evictionCandidate.Value.(*revCacheValue) | ||
| if !ok { | ||
| return nil | ||
| } | ||
|
|
||
| if revItem.canEvict.Load() { | ||
| rc.lruList.Remove(evictionCandidate) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.