Skip to content

Commit 43bd677

Browse files
sonpiazclaude
andcommitted
fix: reduce Convex write conflicts ~125 → <10 per session
Serialize Linear sync (Promise.all → for...of), skip-if-unchanged guard on syncStatusFromLinear, merge double-patch in dispatches:fail, stagger cron intervals (5/6/7/8/9m) to reduce overlap. Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 40a20eb commit 43bd677

File tree

4 files changed

+66
-49
lines changed

4 files changed

+66
-49
lines changed

convex/crons.ts

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,23 @@ crons.cron(
3636
{}
3737
);
3838

39-
// AGT-215: Alert System — Check for stuck agents every 5 minutes
39+
// AGT-215: Alert System — Check for stuck agents every 7 minutes
4040
// Triggers alerts when agents are stuck on a task for >30 minutes
41+
// Staggered from sync-linear (5m) to reduce write conflicts
4142
crons.interval(
4243
"check-stuck-agents",
43-
{ minutes: 5 },
44+
{ minutes: 7 },
4445
internal.alerts.checkStuckAgents,
4546
{}
4647
);
4748

4849
// AGT-216: Auto-Recovery — Self-Healing Agent Restart & Retry
4950
// Checks for crashed agents (heartbeat timeout) and auto-restarts with backoff
5051
// Circuit breaker stops after 3 consecutive failures
52+
// Staggered to 6 min to avoid overlap with sync-linear (5m) and stuck-agents (7m)
5153
crons.interval(
5254
"auto-recovery-check",
53-
{ minutes: 5 },
55+
{ minutes: 6 },
5456
internal.recovery.runRecoveryCheck,
5557
{}
5658
);
@@ -66,11 +68,12 @@ crons.interval(
6668
{}
6769
);
6870

69-
// AGT-247: Event Bus — Cleanup expired events every 5 minutes
71+
// AGT-247: Event Bus — Cleanup expired events every 9 minutes
7072
// Removes events older than 5 minutes that were never delivered
73+
// Staggered to reduce cron overlap
7174
crons.interval(
7275
"cleanup-expired-events",
73-
{ minutes: 5 },
76+
{ minutes: 9 },
7477
internal.agentEvents.cleanupExpiredEvents,
7578
{}
7679
);
@@ -93,11 +96,12 @@ crons.interval(
9396
{}
9497
);
9598

96-
// CORE-209 + AGT-337: The Loop — SLA Monitor every 5 minutes
99+
// CORE-209 + AGT-337: The Loop — SLA Monitor every 8 minutes
97100
// Detects AND auto-escalates: reply >15min → DM MAX, action >2h → critical MAX, report >24h → broken + CEO dispatch
101+
// Staggered from sync-linear (5m) to reduce write conflicts
98102
crons.interval(
99103
"loop-sla-monitor",
100-
{ minutes: 5 },
104+
{ minutes: 8 },
101105
internal.loopMonitor.checkSLABreaches,
102106
{}
103107
);

convex/dispatches.ts

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,19 +116,19 @@ export const fail = mutation({
116116
const currentRetry = dispatch.retryCount ?? 0;
117117
const maxRetries = dispatch.maxRetries ?? MAX_RETRIES;
118118

119-
await ctx.db.patch(dispatchId, {
120-
status: "failed",
121-
completedAt: Date.now(),
122-
error,
123-
retryCount: currentRetry,
124-
});
125-
126119
// AGT-308: Auto-retry with exponential backoff
127120
if (currentRetry < maxRetries) {
128121
const backoffMs = RETRY_BACKOFF_MS[Math.min(currentRetry, RETRY_BACKOFF_MS.length - 1)];
129122
const nextRetryAt = Date.now() + backoffMs;
130123

131-
await ctx.db.patch(dispatchId, { nextRetryAt });
124+
// Single patch with all fields to avoid write conflicts
125+
await ctx.db.patch(dispatchId, {
126+
status: "failed",
127+
completedAt: Date.now(),
128+
error,
129+
retryCount: currentRetry,
130+
nextRetryAt,
131+
});
132132

133133
await ctx.scheduler.runAfter(backoffMs, internal.dispatches.retryFailedDispatch, {
134134
originalDispatchId: dispatchId,
@@ -138,7 +138,13 @@ export const fail = mutation({
138138
`[AutoRetry] Dispatch ${dispatchId} failed (attempt ${currentRetry + 1}/${maxRetries}). Retrying in ${backoffMs / 60000}m.`
139139
);
140140
} else {
141-
// Max retries exhausted — escalate to MAX
141+
// Max retries exhausted — single patch, then escalate to MAX
142+
await ctx.db.patch(dispatchId, {
143+
status: "failed",
144+
completedAt: Date.now(),
145+
error,
146+
retryCount: currentRetry,
147+
});
142148
await ctx.scheduler.runAfter(0, internal.dispatches.escalateFailedDispatch, {
143149
dispatchId,
144150
});

convex/linearSync.ts

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -206,40 +206,41 @@ export const syncAll = internalAction({
206206
return undefined;
207207
}
208208

209-
// Upsert each task
210-
const results = await Promise.all(
211-
linearIssues.map(async (issue) => {
212-
// Try to match assignee by name
213-
let assigneeId: Id<"agents"> | undefined = undefined;
214-
if (issue.assigneeName) {
215-
const matchedAgent = agents.find(
216-
(a: { name: string; _id: Id<"agents"> }) => a.name.toLowerCase() === issue.assigneeName?.toLowerCase()
217-
);
218-
assigneeId = matchedAgent?._id;
219-
}
209+
// Upsert each task sequentially to avoid write conflicts on tasks table
210+
// (Promise.all caused ~80 OCC conflicts per sync cycle)
211+
const results: Array<{ created: boolean }> = [];
212+
for (const issue of linearIssues) {
213+
// Try to match assignee by name
214+
let assigneeId: Id<"agents"> | undefined = undefined;
215+
if (issue.assigneeName) {
216+
const matchedAgent = agents.find(
217+
(a: { name: string; _id: Id<"agents"> }) => a.name.toLowerCase() === issue.assigneeName?.toLowerCase()
218+
);
219+
assigneeId = matchedAgent?._id;
220+
}
220221

221-
// AGT-142: Parse agent from description first, fallback to "max" (PM owns unassigned)
222-
const parsedAgent = parseAgentFromDescription(issue.description);
223-
const taskAgentName = parsedAgent ?? "max";
224-
225-
// AGT-175: Use taskAgentName for activity attribution (not hardcoded "max")
226-
return await ctx.runMutation(api.tasks.upsertByLinearId, {
227-
agentName: taskAgentName,
228-
taskAgentName,
229-
projectId: evoxProject._id,
230-
linearId: issue.linearId,
231-
linearIdentifier: issue.linearIdentifier,
232-
linearUrl: issue.linearUrl,
233-
title: issue.title,
234-
description: issue.description,
235-
status: issue.status,
236-
priority: issue.priority,
237-
assignee: assigneeId,
238-
createdAt: issue.createdAt,
239-
updatedAt: issue.updatedAt,
240-
});
241-
})
242-
);
222+
// AGT-142: Parse agent from description first, fallback to "max" (PM owns unassigned)
223+
const parsedAgent = parseAgentFromDescription(issue.description);
224+
const taskAgentName = parsedAgent ?? "max";
225+
226+
// AGT-175: Use taskAgentName for activity attribution (not hardcoded "max")
227+
const result = await ctx.runMutation(api.tasks.upsertByLinearId, {
228+
agentName: taskAgentName,
229+
taskAgentName,
230+
projectId: evoxProject._id,
231+
linearId: issue.linearId,
232+
linearIdentifier: issue.linearIdentifier,
233+
linearUrl: issue.linearUrl,
234+
title: issue.title,
235+
description: issue.description,
236+
status: issue.status,
237+
priority: issue.priority,
238+
assignee: assigneeId,
239+
createdAt: issue.createdAt,
240+
updatedAt: issue.updatedAt,
241+
});
242+
results.push(result);
243+
}
243244

244245
const created = results.filter((r: { created: boolean }) => r.created).length;
245246
const updated = results.filter((r: { created: boolean }) => !r.created).length;

convex/tasks.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,12 @@ export const syncStatusFromLinear = mutation({
978978
};
979979

980980
const mappedStatus = statusMap[status] || "backlog";
981+
982+
// Skip write if status hasn't changed (avoids conflicts from webhook retries)
983+
if (task.status === mappedStatus) {
984+
return task._id;
985+
}
986+
981987
const now = Date.now();
982988

983989
await ctx.db.patch(task._id, {

0 commit comments

Comments
 (0)