openclaw · sethconvex · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -47,3 +47,11 @@
 - New Convex functions must be pushed before `convex run`: use `bunx convex dev --once` (dev) or `bunx convex deploy` (prod).
 - For non-interactive prod deploys, use `bunx convex deploy -y` to skip confirmation.
 - If `bunx convex run --env-file .env.local ...` returns `401 MissingAccessToken` despite `bunx convex login`, workaround: omit `--env-file` and use `--deployment-name <name>` / `--prod`.
+
+## Convex Query & Bandwidth Rules
+- **Always use `.withIndex()` instead of `.filter()` for fields that can be indexed.** `.filter()` causes full table scans — every doc is read and billed. Even a single `.filter()` on a 16K-row table reads ~16 MB per call.
+- **Convex reads entire documents** — no field projections. If you only need a few fields from large docs (~6 KB+), denormalize a lightweight summary onto the parent doc or use a lookup table (see `embeddingSkillMap`, `skill.latestVersionSummary`, `skill.badges` for examples).
+- **Denormalization pattern**: persist computed fields so they can be indexed. Every mutation that updates source fields must also update the denormalized field. Always write a cursor-based backfill for new fields (see `backfillIsSuspiciousInternal`, `backfillLatestVersionSummaryInternal`, `backfillDenormalizedBadgesInternal` for examples).
+- **Cron jobs must never scan entire tables.** Use indexed queries with equality filters. Use cursor-based pagination for large datasets. Prefer incremental/delta tracking over full recounts.
+- **32K document limit per query.** Split `.collect()` calls by a partition field (e.g., one day at a time instead of a 7-day range). See `buildTrendingLeaderboard` for an example.
+- **Common mistakes**: `.filter().collect()` without an index; `ctx.db.get()` on large docs in a loop for list views; while loops that paginate the whole table to find filtered results.
diff --git a/convex/crons.ts b/convex/crons.ts
@@ -45,7 +45,7 @@ crons.interval(
 
 crons.interval(
   'global-stats-update',
-  { minutes: 60 },
+  { hours: 24 },
   internal.statsMaintenance.updateGlobalStatsInternal,
   {},
 )

diff --git a/convex/lib/leaderboards.ts b/convex/lib/leaderboards.ts
@@ -27,17 +27,21 @@ export async function buildTrendingLeaderboard(
 ) {
   const now = params.now ?? Date.now()
   const { startDay, endDay } = getTrendingRange(now)
-  const rows = await ctx.db
-    .query('skillDailyStats')
-    .withIndex('by_day', (q) => q.gte('day', startDay).lte('day', endDay))
-    .collect()
 
+  // Query one day at a time to stay well under the 32K document limit.
+  // Each daily query reads ~4,500 docs instead of 32K for the full 7-day range.
   const totals = new Map<Id<'skills'>, { installs: number; downloads: number }>()
-  for (const row of rows) {
-    const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
-    current.installs += row.installs
-    current.downloads += row.downloads
-    totals.set(row.skillId, current)
+  for (let day = startDay; day <= endDay; day++) {
+    const rows = await ctx.db
+      .query('skillDailyStats')
+      .withIndex('by_day', (q) => q.eq('day', day))
+      .collect()
+    for (const row of rows) {
+      const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
+      current.installs += row.installs
+      current.downloads += row.downloads
+      totals.set(row.skillId, current)
+    }
   }
-  for (let day = startDay; day <= endDay; day++) {
-    const rows = await ctx.db
-      .query('skillDailyStats')
-      .withIndex('by_day', (q) => q.eq('day', day))
-      .collect()
-    for (const row of rows) {
-      const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
-      current.installs += row.installs
-      current.downloads += row.downloads
-      totals.set(row.skillId, current)
-    }
-  }
+  const dayKeys = Array.from({ length: endDay - startDay + 1 }, (_, i) => startDay + i)
+  const perDayRows = await Promise.all(
+    dayKeys.map((day) =>
+      ctx.db
+        .query('skillDailyStats')
+        .withIndex('by_day', (q) => q.eq('day', day))
+        .collect(),
+    ),
+  )
+  for (const rows of perDayRows) {
+    for (const row of rows) {
+      const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
+      current.installs += row.installs
+      current.downloads += row.downloads
+      totals.set(row.skillId, current)
+    }
+  }
-  for (let day = startDay; day <= endDay; day++) {
-    const rows = await ctx.db
-      .query('skillDailyStats')
-      .withIndex('by_day', (q) => q.eq('day', day))
-      .collect()
-    for (const row of rows) {
-      const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
-      current.installs += row.installs
-      current.downloads += row.downloads
-      totals.set(row.skillId, current)
-    }
-  }
+  const dayKeys = Array.from({ length: endDay - startDay + 1 }, (_, i) => startDay + i)
+  const perDayRows = await Promise.all(
+    dayKeys.map((day) =>
+      ctx.db
+        .query('skillDailyStats')
+        .withIndex('by_day', (q) => q.eq('day', day))
+        .collect(),
+    ),
+  )
+  for (const rows of perDayRows) {
+    for (const row of rows) {
+      const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
+      current.installs += row.installs
+      current.downloads += row.downloads
+      totals.set(row.skillId, current)
+    }
+  }
 
   const entries = Array.from(totals, ([skillId, totalsEntry]) => ({

diff --git a/convex/lib/skillSafety.ts b/convex/lib/skillSafety.ts
@@ -11,3 +11,13 @@ export function isSkillSuspicious(
   if (skill.moderationFlags?.includes('flagged.suspicious')) return true
   return isScannerSuspiciousReason(skill.moderationReason)
 }
+
+/**
+ * Compute the denormalized `isSuspicious` boolean for a skill.
+ * Use at every mutation site that writes `moderationFlags` or `moderationReason`.
+ */
+export function computeIsSuspicious(
+  skill: Pick<Doc<'skills'>, 'moderationFlags' | 'moderationReason'>,
+): boolean {
+  return isSkillSuspicious(skill)
+}
diff --git a/convex/maintenance.ts b/convex/maintenance.ts
@@ -12,6 +12,7 @@ import {
   type TrustTier,
 } from './lib/skillQuality'
 import { generateSkillSummary } from './lib/skillSummary'
+import { computeIsSuspicious } from './lib/skillSafety'
 import { hashSkillFiles } from './lib/skills'
 
 const DEFAULT_BATCH_SIZE = 50
@@ -1529,6 +1530,91 @@ export const backfillDenormalizedBadgesInternal = internalMutation({
   },
 })
 
+/**
+ * Backfill `latestVersionSummary` on all skills. Cursor-based paginated mutation
+ * that self-schedules until done. Reads each skill's latestVersionId, extracts
+ * the summary fields, and patches the skill.
+ */
+export const backfillLatestVersionSummaryInternal = internalMutation({
+  args: {
+    cursor: v.optional(v.string()),
+    batchSize: v.optional(v.number()),
+  },
+  handler: async (ctx, args) => {
+    const batchSize = clampInt(args.batchSize ?? 50, 10, 200)
+    const { page, continueCursor, isDone } = await ctx.db
+      .query('skills')
+      .paginate({ cursor: args.cursor ?? null, numItems: batchSize })
+
+    let patched = 0
+    for (const skill of page) {
+      if (skill.latestVersionSummary) continue
+      if (!skill.latestVersionId) continue
+      const version = await ctx.db.get(skill.latestVersionId)
+      if (!version) continue
+
+      await ctx.db.patch(skill._id, {
+        latestVersionSummary: {
+          version: version.version,
+          createdAt: version.createdAt,
+          changelog: version.changelog,
+          changelogSource: version.changelogSource,
+          clawdis: version.parsed?.clawdis,
+        },
+      })
+      patched++
+    }
+
+    if (!isDone) {
+      await ctx.scheduler.runAfter(
+        0,
+        internal.maintenance.backfillLatestVersionSummaryInternal,
+        {
+          cursor: continueCursor,
+          batchSize: args.batchSize,
+        },
+      )
+    }
+
+    return { patched, isDone, scanned: page.length }
+  },
+})
+
+/**
+ * Backfill `isSuspicious` on all skills. Cursor-based paginated mutation
+ * that self-schedules until done.
+ */
+export const backfillIsSuspiciousInternal = internalMutation({
+  args: {
+    cursor: v.optional(v.string()),
+    batchSize: v.optional(v.number()),
+  },
+  handler: async (ctx, args) => {
+    const batchSize = clampInt(args.batchSize ?? 100, 10, 200)
+    const { page, continueCursor, isDone } = await ctx.db
+      .query('skills')
+      .paginate({ cursor: args.cursor ?? null, numItems: batchSize })
+
+    let patched = 0
+    for (const skill of page) {
+      const expected = computeIsSuspicious(skill)
+      if (skill.isSuspicious !== expected) {
+        await ctx.db.patch(skill._id, { isSuspicious: expected })
+        patched++
+      }
+    }
+
+    if (!isDone) {
+      await ctx.scheduler.runAfter(0, internal.maintenance.backfillIsSuspiciousInternal, {
+        cursor: continueCursor,
+        batchSize: args.batchSize,
+      })
+    }
+
+    return { patched, isDone, scanned: page.length }
+  },
+})
+
 function clampInt(value: number, min: number, max: number) {
   const rounded = Math.trunc(value)
   if (!Number.isFinite(rounded)) return min

diff --git a/convex/schema.ts b/convex/schema.ts
@@ -46,6 +46,15 @@ const skills = defineTable({
     }),
   ),
   latestVersionId: v.optional(v.id('skillVersions')),
+  latestVersionSummary: v.optional(
+    v.object({
+      version: v.string(),
+      createdAt: v.number(),
+      changelog: v.string(),
+      changelogSource: v.optional(v.union(v.literal('auto'), v.literal('user'))),
+      clawdis: v.optional(v.any()),
+    }),
+  ),
   tags: v.record(v.string(), v.id('skillVersions')),
   softDeletedAt: v.optional(v.number()),
   badges: v.optional(
@@ -101,6 +110,7 @@ const skills = defineTable({
       evaluatedAt: v.number(),
     }),
   ),
+  isSuspicious: v.optional(v.boolean()),
   moderationFlags: v.optional(v.array(v.string())),
   lastReviewedAt: v.optional(v.number()),
   // VT scan tracking
@@ -146,6 +156,23 @@ const skills = defineTable({
   ])
   .index('by_canonical', ['canonicalSkillId'])
   .index('by_fork_of', ['forkOf.skillId'])
+  .index('by_moderation', ['moderationStatus', 'moderationReason'])
+  .index('by_nonsuspicious_updated', ['softDeletedAt', 'isSuspicious', 'updatedAt'])
+  .index('by_nonsuspicious_created', ['softDeletedAt', 'isSuspicious', 'createdAt'])
+  .index('by_nonsuspicious_name', ['softDeletedAt', 'isSuspicious', 'displayName'])
+  .index('by_nonsuspicious_downloads', [
+    'softDeletedAt',
+    'isSuspicious',
+    'statsDownloads',
+    'updatedAt',
+  ])
+  .index('by_nonsuspicious_stars', ['softDeletedAt', 'isSuspicious', 'statsStars', 'updatedAt'])
+  .index('by_nonsuspicious_installs', [
+    'softDeletedAt',
+    'isSuspicious',
+    'statsInstallsAllTime',
+    'updatedAt',
+  ])
 
 const souls = defineTable({
   slug: v.string(),