-
Notifications
You must be signed in to change notification settings - Fork 1.9k
[ENH]: Execute task with no backfill or incremental #5867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -1736,6 +1736,73 @@ func (tc *Catalog) FlushCollectionCompaction(ctx context.Context, flushCollectio | |||||
| return flushCollectionInfo, nil | ||||||
| } | ||||||
|
|
||||||
| // FlushCollectionCompactionsAndAttachedFunction atomically updates multiple collection compaction data | ||||||
| // and attached function completion offset in a single transaction. | ||||||
| func (tc *Catalog) FlushCollectionCompactionsAndAttachedFunction( | ||||||
| ctx context.Context, | ||||||
| collectionCompactions []*model.FlushCollectionCompaction, | ||||||
| attachedFunctionID uuid.UUID, | ||||||
| completionOffset int64, | ||||||
| ) (*model.ExtendedFlushCollectionInfo, error) { | ||||||
| if !tc.versionFileEnabled { | ||||||
| // Attached-function-based compactions are only supported with versioned collections | ||||||
| log.Error("FlushCollectionCompactionsAndAttachedFunction is only supported for versioned collections") | ||||||
| return nil, errors.New("attached-function-based compaction requires versioned collections") | ||||||
| } | ||||||
|
|
||||||
| if len(collectionCompactions) == 0 { | ||||||
| return nil, errors.New("at least one collection compaction is required") | ||||||
| } | ||||||
|
|
||||||
| flushInfos := make([]*model.FlushCollectionInfo, 0, len(collectionCompactions)) | ||||||
|
|
||||||
| err := tc.txImpl.Transaction(ctx, func(txCtx context.Context) error { | ||||||
| var err error | ||||||
| // Get the transaction from context to pass to FlushCollectionCompactionForVersionedCollection | ||||||
| tx := dbcore.GetDB(txCtx) | ||||||
|
|
||||||
| // Handle all collection compactions | ||||||
| for _, collectionCompaction := range collectionCompactions { | ||||||
| log.Info("FlushCollectionCompactionsAndAttachedFunction", zap.String("collection_id", collectionCompaction.ID.String())) | ||||||
| flushInfo, err := tc.FlushCollectionCompactionForVersionedCollection(txCtx, collectionCompaction, tx) | ||||||
| if err != nil { | ||||||
| return err | ||||||
| } | ||||||
| flushInfos = append(flushInfos, flushInfo) | ||||||
| } | ||||||
|
|
||||||
| err = tc.metaDomain.AttachedFunctionDb(txCtx).Update(&dbmodel.AttachedFunction{ | ||||||
| ID: attachedFunctionID, | ||||||
|
Comment on lines
+1769
to
+1775
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [BestPractice] Idempotency Issue: Direct UPDATE Without Existence Check The
err = tc.metaDomain.AttachedFunctionDb(txCtx).Update(&dbmodel.AttachedFunction{
ID: attachedFunctionID,
CompletionOffset: completionOffset,
})If this transaction is retried or replayed (e.g., due to network issues), it could:
Recommendation: // First fetch current state
currentFunction, err := tc.metaDomain.AttachedFunctionDb(txCtx).GetByID(attachedFunctionID)
if err != nil {
return err // Handle not found
}
// Verify offset is progressing forward
if completionOffset < currentFunction.CompletionOffset {
return errors.New("completion offset cannot move backward")
}
// Then update
err = tc.metaDomain.AttachedFunctionDb(txCtx).Update(&dbmodel.AttachedFunction{
ID: attachedFunctionID,
CompletionOffset: completionOffset,
})Context for Agents |
||||||
| CompletionOffset: completionOffset, | ||||||
| }) | ||||||
| if err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| return nil | ||||||
| }) | ||||||
|
|
||||||
| if err != nil { | ||||||
| return nil, err | ||||||
| } | ||||||
|
|
||||||
| // Populate attached function fields with authoritative values from database | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Documentation] The comment mentions populating with "authoritative values from database", but the code uses the Consider rephrasing to clarify that it's populating the response with the successfully committed value.
Suggested change
⚡ Committable suggestion Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Context for Agents |
||||||
| for _, flushInfo := range flushInfos { | ||||||
| flushInfo.AttachedFunctionCompletionOffset = &completionOffset | ||||||
| } | ||||||
|
|
||||||
| // Log with first collection ID (typically the output collection) | ||||||
| log.Info("FlushCollectionCompactionsAndAttachedFunction", | ||||||
| zap.String("first_collection_id", collectionCompactions[0].ID.String()), | ||||||
| zap.Int("collection_count", len(collectionCompactions)), | ||||||
| zap.String("attached_function_id", attachedFunctionID.String()), | ||||||
| zap.Int64("completion_offset", completionOffset)) | ||||||
|
|
||||||
| return &model.ExtendedFlushCollectionInfo{ | ||||||
| Collections: flushInfos, | ||||||
| }, nil | ||||||
| } | ||||||
|
|
||||||
| func (tc *Catalog) validateVersionFile(versionFile *coordinatorpb.CollectionVersionFile, collectionID string, version int64) error { | ||||||
| if versionFile.GetCollectionInfoImmutable().GetCollectionId() != collectionID { | ||||||
| log.Error("collection id mismatch", zap.String("collection_id", collectionID), zap.String("version_file_collection_id", versionFile.GetCollectionInfoImmutable().GetCollectionId())) | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -76,7 +76,7 @@ func (s *Coordinator) validateAttachedFunctionMatchesRequest(ctx context.Context | |
| return nil | ||
| } | ||
|
|
||
| // AttachFunction creates a new attached function in the database | ||
| // AttachFunction creates an output collection and attached function in a single transaction | ||
| func (s *Coordinator) AttachFunction(ctx context.Context, req *coordinatorpb.AttachFunctionRequest) (*coordinatorpb.AttachFunctionResponse, error) { | ||
tanujnay112 marked this conversation as resolved.
Show resolved
Hide resolved
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we handle soft deletes for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, compaction flushing uses the same logic as before which fails the flush if is_deleted is true. Similarly, a flush on a function that doesn't exist or is soft deleted will abort the transaction. Attaching a function does not interact with soft deleted functions and soft deleted functions are renamed with a "deleted" prefix just how collections do. |
||
| log := log.With(zap.String("method", "AttachFunction")) | ||
|
|
||
|
|
@@ -143,18 +143,6 @@ func (s *Coordinator) AttachFunction(ctx context.Context, req *coordinatorpb.Att | |
| return common.ErrCollectionNotFound | ||
| } | ||
|
|
||
| // Check if output collection already exists | ||
| outputCollectionName := req.OutputCollectionName | ||
| existingOutputCollections, err := s.catalog.metaDomain.CollectionDb(txCtx).GetCollections(nil, &outputCollectionName, req.TenantId, req.Database, nil, nil, false) | ||
| if err != nil { | ||
| log.Error("AttachFunction: failed to check output collection", zap.Error(err)) | ||
| return err | ||
| } | ||
| if len(existingOutputCollections) > 0 { | ||
| log.Error("AttachFunction: output collection already exists") | ||
| return common.ErrCollectionUniqueConstraintViolation | ||
| } | ||
|
|
||
| // Serialize params | ||
| var paramsJSON string | ||
| if req.Params != nil { | ||
|
|
@@ -168,6 +156,7 @@ func (s *Coordinator) AttachFunction(ctx context.Context, req *coordinatorpb.Att | |
| paramsJSON = "{}" | ||
| } | ||
|
|
||
| // Create attached function | ||
| now := time.Now() | ||
| attachedFunction := &dbmodel.AttachedFunction{ | ||
| ID: attachedFunctionID, | ||
|
|
@@ -176,6 +165,7 @@ func (s *Coordinator) AttachFunction(ctx context.Context, req *coordinatorpb.Att | |
| DatabaseID: databases[0].ID, | ||
| InputCollectionID: req.InputCollectionId, | ||
| OutputCollectionName: req.OutputCollectionName, | ||
| OutputCollectionID: nil, | ||
| FunctionID: function.ID, | ||
| FunctionParams: paramsJSON, | ||
| CompletionOffset: 0, | ||
|
|
@@ -196,6 +186,7 @@ func (s *Coordinator) AttachFunction(ctx context.Context, req *coordinatorpb.Att | |
|
|
||
| log.Debug("AttachFunction: attached function created with is_ready=false", | ||
| zap.String("attached_function_id", attachedFunctionID.String()), | ||
| zap.String("output_collection_name", req.OutputCollectionName), | ||
| zap.String("name", req.Name)) | ||
| return nil | ||
| }) | ||
|
|
@@ -205,7 +196,9 @@ func (s *Coordinator) AttachFunction(ctx context.Context, req *coordinatorpb.Att | |
| } | ||
|
|
||
| return &coordinatorpb.AttachFunctionResponse{ | ||
| Id: attachedFunctionID.String(), | ||
| AttachedFunction: &coordinatorpb.AttachedFunction{ | ||
| Id: attachedFunctionID.String(), | ||
| }, | ||
| }, nil | ||
| } | ||
|
|
||
|
Comment on lines
198
to
204
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [BestPractice] The Consider either adjusting the response message to only contain the ID or populating more fields in the returned Context for Agents
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah why is only id populated? |
||
|
|
@@ -229,6 +222,10 @@ func attachedFunctionToProto(attachedFunction *dbmodel.AttachedFunction, functio | |
| return nil, status.Errorf(codes.Internal, "attached function has invalid completion_offset: %d", attachedFunction.CompletionOffset) | ||
| } | ||
|
|
||
| if !attachedFunction.IsReady { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [BestPractice] The check This redundant check returns a generic Context for Agents |
||
| return nil, status.Errorf(codes.Internal, "serialized attached function is not ready") | ||
| } | ||
|
|
||
| attachedFunctionProto := &coordinatorpb.AttachedFunction{ | ||
| Id: attachedFunction.ID.String(), | ||
| Name: attachedFunction.Name, | ||
|
|
@@ -243,7 +240,6 @@ func attachedFunctionToProto(attachedFunction *dbmodel.AttachedFunction, functio | |
| DatabaseId: attachedFunction.DatabaseID, | ||
| CreatedAt: uint64(attachedFunction.CreatedAt.UnixMicro()), | ||
| UpdatedAt: uint64(attachedFunction.UpdatedAt.UnixMicro()), | ||
| IsReady: attachedFunction.IsReady, | ||
| } | ||
| if attachedFunction.OutputCollectionID != nil { | ||
| attachedFunctionProto.OutputCollectionId = attachedFunction.OutputCollectionID | ||
|
|
@@ -581,7 +577,7 @@ func (s *Coordinator) FinishCreateAttachedFunction(ctx context.Context, req *coo | |
|
|
||
| _, _, err = s.catalog.CreateCollectionAndSegments(txCtx, collection, segments, 0) | ||
| if err != nil { | ||
| log.Error("FinishCreateAttachedFunction: failed to create collection", zap.Error(err)) | ||
| log.Error("FinishCreateAttachedFunction: failed to create output collection", zap.Error(err)) | ||
| return err | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note for myself: discuss the transaction setup for this