@@ -9,27 +9,29 @@ import {
9
9
} from '@powersync/lib-services-framework' ;
10
10
import {
11
11
BroadcastIterable ,
12
- CHECKPOINT_INVALIDATE_ALL ,
13
12
CheckpointChanges ,
14
13
GetCheckpointChangesOptions ,
15
14
InternalOpId ,
16
15
internalToExternalOpId ,
17
16
ProtocolOpId ,
18
17
ReplicationCheckpoint ,
19
- SourceTable ,
20
18
storage ,
21
19
utils ,
22
- WatchWriteCheckpointOptions
20
+ WatchWriteCheckpointOptions ,
21
+ CHECKPOINT_INVALIDATE_ALL ,
22
+ deserializeParameterLookup
23
23
} from '@powersync/service-core' ;
24
- import { SqliteJsonRow , SqliteJsonValue , SqlSyncRules } from '@powersync/service-sync-rules' ;
24
+ import { SqliteJsonRow , ParameterLookup , SqlSyncRules } from '@powersync/service-sync-rules' ;
25
25
import * as bson from 'bson' ;
26
26
import { wrapWithAbort } from 'ix/asynciterable/operators/withabort.js' ;
27
+ import { LRUCache } from 'lru-cache' ;
27
28
import * as timers from 'timers/promises' ;
28
29
import { MongoBucketStorage } from '../MongoBucketStorage.js' ;
29
30
import { PowerSyncMongo } from './db.js' ;
30
31
import {
31
32
BucketDataDocument ,
32
33
BucketDataKey ,
34
+ BucketStateDocument ,
33
35
SourceKey ,
34
36
SourceTableDocument ,
35
37
SyncRuleCheckpointState ,
@@ -39,6 +41,7 @@ import { MongoBucketBatch } from './MongoBucketBatch.js';
39
41
import { MongoCompactor } from './MongoCompactor.js' ;
40
42
import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js' ;
41
43
import { idPrefixFilter , mapOpEntry , readSingleBatch } from './util.js' ;
44
+ import { JSONBig } from '@powersync/service-jsonbig' ;
42
45
43
46
export class MongoSyncBucketStorage
44
47
extends BaseObserver < storage . SyncRulesBucketStorageListener >
@@ -154,7 +157,7 @@ export class MongoSyncBucketStorage
154
157
155
158
await callback ( batch ) ;
156
159
await batch . flush ( ) ;
157
- if ( batch . last_flushed_op ) {
160
+ if ( batch . last_flushed_op != null ) {
158
161
return { flushed_op : batch . last_flushed_op } ;
159
162
} else {
160
163
return null ;
@@ -252,7 +255,7 @@ export class MongoSyncBucketStorage
252
255
return result ! ;
253
256
}
254
257
255
- async getParameterSets ( checkpoint : utils . InternalOpId , lookups : SqliteJsonValue [ ] [ ] ) : Promise < SqliteJsonRow [ ] > {
258
+ async getParameterSets ( checkpoint : utils . InternalOpId , lookups : ParameterLookup [ ] ) : Promise < SqliteJsonRow [ ] > {
256
259
const lookupFilter = lookups . map ( ( lookup ) => {
257
260
return storage . serializeLookup ( lookup ) ;
258
261
} ) ;
@@ -585,6 +588,13 @@ export class MongoSyncBucketStorage
585
588
{ maxTimeMS : lib_mongo . db . MONGO_CLEAR_OPERATION_TIMEOUT_MS }
586
589
) ;
587
590
591
+ await this . db . bucket_state . deleteMany (
592
+ {
593
+ _id : idPrefixFilter < BucketStateDocument [ '_id' ] > ( { g : this . group_id } , [ 'b' ] )
594
+ } ,
595
+ { maxTimeMS : lib_mongo . db . MONGO_CLEAR_OPERATION_TIMEOUT_MS }
596
+ ) ;
597
+
588
598
await this . db . source_tables . deleteMany (
589
599
{
590
600
group_id : this . group_id
@@ -795,12 +805,7 @@ export class MongoSyncBucketStorage
795
805
796
806
const updates : CheckpointChanges =
797
807
lastCheckpoint == null
798
- ? {
799
- invalidateDataBuckets : true ,
800
- invalidateParameterBuckets : true ,
801
- updatedDataBuckets : [ ] ,
802
- updatedParameterBucketDefinitions : [ ]
803
- }
808
+ ? CHECKPOINT_INVALIDATE_ALL
804
809
: await this . getCheckpointChanges ( {
805
810
lastCheckpoint : lastCheckpoint ,
806
811
nextCheckpoint : checkpoint
@@ -869,7 +874,105 @@ export class MongoSyncBucketStorage
869
874
return pipeline ;
870
875
}
871
876
877
+ private async getDataBucketChanges (
878
+ options : GetCheckpointChangesOptions
879
+ ) : Promise < Pick < CheckpointChanges , 'updatedDataBuckets' | 'invalidateDataBuckets' > > {
880
+ const limit = 1000 ;
881
+ const bucketStateUpdates = await this . db . bucket_state
882
+ . find (
883
+ {
884
+ // We have an index on (_id.g, last_op).
885
+ '_id.g' : this . group_id ,
886
+ last_op : { $gt : BigInt ( options . lastCheckpoint ) }
887
+ } ,
888
+ {
889
+ projection : {
890
+ '_id.b' : 1
891
+ } ,
892
+ limit : limit + 1 ,
893
+ batchSize : limit + 1 ,
894
+ singleBatch : true
895
+ }
896
+ )
897
+ . toArray ( ) ;
898
+
899
+ const buckets = bucketStateUpdates . map ( ( doc ) => doc . _id . b ) ;
900
+ const invalidateDataBuckets = buckets . length > limit ;
901
+
902
+ return {
903
+ invalidateDataBuckets : invalidateDataBuckets ,
904
+ updatedDataBuckets : invalidateDataBuckets ? new Set < string > ( ) : new Set ( buckets )
905
+ } ;
906
+ }
907
+
908
+ private async getParameterBucketChanges (
909
+ options : GetCheckpointChangesOptions
910
+ ) : Promise < Pick < CheckpointChanges , 'updatedParameterLookups' | 'invalidateParameterBuckets' > > {
911
+ const limit = 1000 ;
912
+ const parameterUpdates = await this . db . bucket_parameters
913
+ . find (
914
+ {
915
+ _id : { $gt : BigInt ( options . lastCheckpoint ) , $lte : BigInt ( options . nextCheckpoint ) } ,
916
+ 'key.g' : this . group_id
917
+ } ,
918
+ {
919
+ projection : {
920
+ lookup : 1
921
+ } ,
922
+ limit : limit + 1 ,
923
+ batchSize : limit + 1 ,
924
+ singleBatch : true
925
+ }
926
+ )
927
+ . toArray ( ) ;
928
+ const invalidateParameterUpdates = parameterUpdates . length > limit ;
929
+
930
+ return {
931
+ invalidateParameterBuckets : invalidateParameterUpdates ,
932
+ updatedParameterLookups : invalidateParameterUpdates
933
+ ? new Set < string > ( )
934
+ : new Set < string > ( parameterUpdates . map ( ( p ) => JSONBig . stringify ( deserializeParameterLookup ( p . lookup ) ) ) )
935
+ } ;
936
+ }
937
+
938
+ // If we processed all connections together for each checkpoint, we could do a single lookup for all connections.
939
+ // In practice, specific connections may fall behind. So instead, we just cache the results of each specific lookup.
940
+ // TODO (later):
941
+ // We can optimize this by implementing it like ChecksumCache: We can use partial cache results to do
942
+ // more efficient lookups in some cases.
943
+ private checkpointChangesCache = new LRUCache < string , CheckpointChanges , { options : GetCheckpointChangesOptions } > ( {
944
+ // Limit to 50 cache entries, or 10MB, whichever comes first.
945
+ // Some rough calculations:
946
+ // If we process 10 checkpoints per second, and a connection may be 2 seconds behind, we could have
947
+ // up to 20 relevant checkpoints. That gives us 20*20 = 400 potentially-relevant cache entries.
948
+ // That is a worst-case scenario, so we don't actually store that many. In real life, the cache keys
949
+ // would likely be clustered around a few values, rather than spread over all 400 potential values.
950
+ max : 50 ,
951
+ maxSize : 10 * 1024 * 1024 ,
952
+ sizeCalculation : ( value : CheckpointChanges ) => {
953
+ // Estimate of memory usage
954
+ const paramSize = [ ...value . updatedParameterLookups ] . reduce < number > ( ( a , b ) => a + b . length , 0 ) ;
955
+ const bucketSize = [ ...value . updatedDataBuckets ] . reduce < number > ( ( a , b ) => a + b . length , 0 ) ;
956
+ return 100 + paramSize + bucketSize ;
957
+ } ,
958
+ fetchMethod : async ( _key , _staleValue , options ) => {
959
+ return this . getCheckpointChangesInternal ( options . context . options ) ;
960
+ }
961
+ } ) ;
962
+
872
963
async getCheckpointChanges ( options : GetCheckpointChangesOptions ) : Promise < CheckpointChanges > {
873
- return CHECKPOINT_INVALIDATE_ALL ;
964
+ const key = `${ options . lastCheckpoint } _${ options . nextCheckpoint } ` ;
965
+ const result = await this . checkpointChangesCache . fetch ( key , { context : { options } } ) ;
966
+ return result ! ;
967
+ }
968
+
969
+ private async getCheckpointChangesInternal ( options : GetCheckpointChangesOptions ) : Promise < CheckpointChanges > {
970
+ const dataUpdates = await this . getDataBucketChanges ( options ) ;
971
+ const parameterUpdates = await this . getParameterBucketChanges ( options ) ;
972
+
973
+ return {
974
+ ...dataUpdates ,
975
+ ...parameterUpdates
976
+ } ;
874
977
}
875
978
}
0 commit comments