openclaw · sethconvex · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -47,3 +47,12 @@
 - New Convex functions must be pushed before `convex run`: use `bunx convex dev --once` (dev) or `bunx convex deploy` (prod).
 - For non-interactive prod deploys, use `bunx convex deploy -y` to skip confirmation.
 - If `bunx convex run --env-file .env.local ...` returns `401 MissingAccessToken` despite `bunx convex login`, workaround: omit `--env-file` and use `--deployment-name <name>` / `--prod`.
+
+## Convex Query & Bandwidth Rules
+- **Always use `.withIndex()` instead of `.filter()` for fields that can be indexed.** `.filter()` causes full table scans — every doc is read and billed. Even a single `.filter()` on a 16K-row table reads ~16 MB per call.
+- **Convex reads entire documents** — no field projections. If you only need a few fields from large docs (~6 KB+), denormalize a lightweight summary onto the parent doc or use a lookup table (see `embeddingSkillMap`, `skill.latestVersionSummary`, `skill.badges` for examples).
+- **Denormalization pattern**: persist computed fields so they can be indexed. Every mutation that updates source fields must also update the denormalized field. Always write a cursor-based backfill for new fields (see `backfillIsSuspiciousInternal`, `backfillLatestVersionSummaryInternal`, `backfillDenormalizedBadgesInternal` for examples).
+- **Cron jobs must never scan entire tables.** Use indexed queries with equality filters. Use cursor-based pagination for large datasets. Prefer incremental/delta tracking over full recounts.
+- **32K document limit per query.** Split `.collect()` calls by a partition field (e.g., one day at a time instead of a 7-day range). See `buildTrendingLeaderboard` for an example.
+- **Common mistakes**: `.filter().collect()` without an index; `ctx.db.get()` on large docs in a loop for list views; while loops that paginate the whole table to find filtered results.
+- **Before writing or reviewing Convex queries, check deployment health.** Run `bunx convex insights` to check for OCC conflicts, `bytesReadLimit`, and `documentsReadLimit` errors. Run `bunx convex logs --failure` to see individual error messages and stack traces. This helps identify which functions are causing bandwidth issues so you can prioritize fixes.
diff --git a/convex/comments.query.test.ts b/convex/comments.query.test.ts
@@ -1,6 +1,6 @@
 /* @vitest-environment node */
 import { describe, expect, it } from 'vitest'
-import { listBySkill } from './comments'
+import { listBySkillHandler } from './comments'
 
 function makeCtx(args: {
   comments: Array<Record<string, unknown>>
@@ -45,7 +45,7 @@ describe('comments.listBySkill', () => {
       },
     })
 
-    const result = await listBySkill._handler(ctx, {
+    const result = await listBySkillHandler(ctx, {
       skillId: 'skills:1',
       limit: 50,
     } as never)
@@ -115,7 +115,7 @@ describe('comments.listBySkill', () => {
       },
     })
 
-    const result = await listBySkill._handler(ctx, {
+    const result = await listBySkillHandler(ctx, {
       skillId: 'skills:1',
       limit: 50,
     } as never)

diff --git a/convex/comments.ts b/convex/comments.ts
@@ -6,26 +6,28 @@ import { type PublicUser, toPublicUser } from './lib/public'
 
 export const listBySkill = query({
   args: { skillId: v.id('skills'), limit: v.optional(v.number()) },
-  handler: async (ctx, args) => {
-    const limit = args.limit ?? 50
-    const comments = await ctx.db
-      .query('comments')
-      .withIndex('by_skill', (q) => q.eq('skillId', args.skillId))
-      .order('desc')
-      .take(limit)
-
-    const rows = await Promise.all(
-      comments.map(async (comment): Promise<{ comment: Doc<'comments'>; user: PublicUser } | null> => {
-        if (comment.softDeletedAt) return null
-        const user = toPublicUser(await ctx.db.get(comment.userId))
-        if (!user) return null
-        return { comment, user }
-      }),
-    )
-    return rows.filter((row): row is { comment: Doc<'comments'>; user: PublicUser } => row !== null)
-  },
+  handler: listBySkillHandler,
 })
 
+export async function listBySkillHandler(ctx: import('./_generated/server').QueryCtx, args: { skillId: import('./_generated/dataModel').Id<'skills'>; limit?: number }) {
+  const limit = args.limit ?? 50
+  const comments = await ctx.db
+    .query('comments')
+    .withIndex('by_skill', (q) => q.eq('skillId', args.skillId))
+    .order('desc')
+    .take(limit)
+
+  const rows = await Promise.all(
+    comments.map(async (comment): Promise<{ comment: Doc<'comments'>; user: PublicUser } | null> => {
+      if (comment.softDeletedAt) return null
+      const user = toPublicUser(await ctx.db.get(comment.userId))
+      if (!user) return null
+      return { comment, user }
+    }),
+  )
+  return rows.filter((row): row is { comment: Doc<'comments'>; user: PublicUser } => row !== null)
+}
+
 export const add = mutation({
   args: { skillId: v.id('skills'), body: v.string() },
   handler: addHandler,

diff --git a/convex/crons.ts b/convex/crons.ts
@@ -45,7 +45,7 @@ crons.interval(
 
 crons.interval(
   'global-stats-update',
-  { minutes: 60 },
+  { hours: 24 },
   internal.statsMaintenance.updateGlobalStatsInternal,
   {},
 )

diff --git a/convex/lib/leaderboards.ts b/convex/lib/leaderboards.ts
@@ -27,17 +27,27 @@ export async function buildTrendingLeaderboard(
 ) {
   const now = params.now ?? Date.now()
   const { startDay, endDay } = getTrendingRange(now)
-  const rows = await ctx.db
-    .query('skillDailyStats')
-    .withIndex('by_day', (q) => q.gte('day', startDay).lte('day', endDay))
-    .collect()
 
+  // Query one day at a time to stay well under the 32K document limit.
+  // Each daily query reads ~4,500 docs instead of 32K for the full 7-day range.
+  // Parallelized since there are no cross-day dependencies.
+  const dayKeys = Array.from({ length: endDay - startDay + 1 }, (_, i) => startDay + i)
+  const perDayRows = await Promise.all(
+    dayKeys.map((day) =>
+      ctx.db
+        .query('skillDailyStats')
+        .withIndex('by_day', (q) => q.eq('day', day))
+        .collect(),
+    ),
+  )
   const totals = new Map<Id<'skills'>, { installs: number; downloads: number }>()
-  for (const row of rows) {
-    const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
-    current.installs += row.installs
-    current.downloads += row.downloads
-    totals.set(row.skillId, current)
+  for (const rows of perDayRows) {
+    for (const row of rows) {
+      const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
+      current.installs += row.installs
+      current.downloads += row.downloads
+      totals.set(row.skillId, current)
+    }
   }
 
   const entries = Array.from(totals, ([skillId, totalsEntry]) => ({

diff --git a/convex/lib/skillSafety.ts b/convex/lib/skillSafety.ts
@@ -11,3 +11,13 @@ export function isSkillSuspicious(
   if (skill.moderationFlags?.includes('flagged.suspicious')) return true
   return isScannerSuspiciousReason(skill.moderationReason)
 }
+
+/**
+ * Compute the denormalized `isSuspicious` boolean for a skill.
+ * Use at every mutation site that writes `moderationFlags` or `moderationReason`.
+ */
+export function computeIsSuspicious(
+  skill: Pick<Doc<'skills'>, 'moderationFlags' | 'moderationReason'>,
+): boolean {
+  return isSkillSuspicious(skill)
+}
diff --git a/convex/maintenance.ts b/convex/maintenance.ts
@@ -12,6 +12,7 @@ import {
   type TrustTier,
 } from './lib/skillQuality'
 import { generateSkillSummary } from './lib/skillSummary'
+import { computeIsSuspicious } from './lib/skillSafety'
 import { hashSkillFiles } from './lib/skills'
 
 const DEFAULT_BATCH_SIZE = 50
@@ -1529,6 +1530,105 @@ export const backfillDenormalizedBadgesInternal = internalMutation({
   },
 })
 
+/**
+ * Backfill `latestVersionSummary` on all skills. Cursor-based paginated mutation
+ * that self-schedules until done. Reads each skill's latestVersionId, extracts
+ * the summary fields, and patches the skill.
+ *
+ * Always reconciles against the current `latestVersionId` — if the summary is
+ * stale (e.g. from a tag retarget), it will be rewritten. To force a full
+ * re-backfill, simply re-run the function; every row is re-evaluated.
+ */
+export const backfillLatestVersionSummaryInternal = internalMutation({
+  args: {
+    cursor: v.optional(v.string()),
+    batchSize: v.optional(v.number()),
+  },
+  handler: async (ctx, args) => {
+    const batchSize = clampInt(args.batchSize ?? 50, 10, 200)
+    const { page, continueCursor, isDone } = await ctx.db
+      .query('skills')
+      .paginate({ cursor: args.cursor ?? null, numItems: batchSize })
+
+    let patched = 0
+    for (const skill of page) {
+      if (!skill.latestVersionId) continue
+      const version = await ctx.db.get(skill.latestVersionId)
+      if (!version) continue
+
+      const expected = {
+        version: version.version,
+        createdAt: version.createdAt,
+        changelog: version.changelog,
+        changelogSource: version.changelogSource,
+        clawdis: version.parsed?.clawdis,
+      }
+
+      // Skip if already in sync
+      const existing = skill.latestVersionSummary
+      if (
+        existing &&
+        existing.version === expected.version &&
+        existing.createdAt === expected.createdAt &&
+        existing.changelog === expected.changelog
+      ) {
+        continue
+      }
+
+      await ctx.db.patch(skill._id, { latestVersionSummary: expected })
+      patched++
+    }
+
+    if (!isDone) {
+      await ctx.scheduler.runAfter(
+        0,
+        internal.maintenance.backfillLatestVersionSummaryInternal,
+        {
+          cursor: continueCursor,
+          batchSize: args.batchSize,
+        },
+      )
+    }
+
+    return { patched, isDone, scanned: page.length }
+  },
+})
+
+/**
+ * Backfill `isSuspicious` on all skills. Cursor-based paginated mutation
+ * that self-schedules until done.
+ */
+export const backfillIsSuspiciousInternal = internalMutation({
+  args: {
+    cursor: v.optional(v.string()),
+    batchSize: v.optional(v.number()),
+  },
+  handler: async (ctx, args) => {
+    const batchSize = clampInt(args.batchSize ?? 100, 10, 200)
+    const { page, continueCursor, isDone } = await ctx.db
+      .query('skills')
+      .paginate({ cursor: args.cursor ?? null, numItems: batchSize })
+
+    let patched = 0
+    for (const skill of page) {
+      const expected = computeIsSuspicious(skill)
+      if (skill.isSuspicious !== expected) {
+        await ctx.db.patch(skill._id, { isSuspicious: expected })
+        patched++
+      }
+    }
+
+    if (!isDone) {
+      await ctx.scheduler.runAfter(0, internal.maintenance.backfillIsSuspiciousInternal, {
+        cursor: continueCursor,
+        batchSize: args.batchSize,
+      })
+    }
+
+    return { patched, isDone, scanned: page.length }
+  },
+})
+
 function clampInt(value: number, min: number, max: number) {
   const rounded = Math.trunc(value)
   if (!Number.isFinite(rounded)) return min

diff --git a/convex/schema.ts b/convex/schema.ts
@@ -46,6 +46,15 @@ const skills = defineTable({
     }),
   ),
   latestVersionId: v.optional(v.id('skillVersions')),
+  latestVersionSummary: v.optional(
+    v.object({
+      version: v.string(),
+      createdAt: v.number(),
+      changelog: v.string(),
+      changelogSource: v.optional(v.union(v.literal('auto'), v.literal('user'))),
+      clawdis: v.optional(v.any()),
+    }),
+  ),
   tags: v.record(v.string(), v.id('skillVersions')),
   softDeletedAt: v.optional(v.number()),
   badges: v.optional(
@@ -101,6 +110,7 @@ const skills = defineTable({
       evaluatedAt: v.number(),
     }),
   ),
+  isSuspicious: v.optional(v.boolean()),
   moderationFlags: v.optional(v.array(v.string())),
   lastReviewedAt: v.optional(v.number()),
   // VT scan tracking
@@ -146,6 +156,23 @@ const skills = defineTable({
   ])
   .index('by_canonical', ['canonicalSkillId'])
   .index('by_fork_of', ['forkOf.skillId'])
+  .index('by_moderation', ['moderationStatus', 'moderationReason'])
+  .index('by_nonsuspicious_updated', ['softDeletedAt', 'isSuspicious', 'updatedAt'])
+  .index('by_nonsuspicious_created', ['softDeletedAt', 'isSuspicious', 'createdAt'])
+  .index('by_nonsuspicious_name', ['softDeletedAt', 'isSuspicious', 'displayName'])
+  .index('by_nonsuspicious_downloads', [
+    'softDeletedAt',
+    'isSuspicious',
+    'statsDownloads',
+    'updatedAt',
+  ])
+  .index('by_nonsuspicious_stars', ['softDeletedAt', 'isSuspicious', 'statsStars', 'updatedAt'])
+  .index('by_nonsuspicious_installs', [
+    'softDeletedAt',
+    'isSuspicious',
+    'statsInstallsAllTime',
+    'updatedAt',
+  ])
 
 const souls = defineTable({
   slug: v.string(),