Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,12 @@
- New Convex functions must be pushed before `convex run`: use `bunx convex dev --once` (dev) or `bunx convex deploy` (prod).
- For non-interactive prod deploys, use `bunx convex deploy -y` to skip confirmation.
- If `bunx convex run --env-file .env.local ...` returns `401 MissingAccessToken` despite `bunx convex login`, workaround: omit `--env-file` and use `--deployment-name <name>` / `--prod`.

## Convex Query & Bandwidth Rules
- **Always use `.withIndex()` instead of `.filter()` for fields that can be indexed.** `.filter()` causes full table scans — every doc is read and billed. Even a single `.filter()` on a 16K-row table reads ~16 MB per call.
- **Convex reads entire documents** — no field projections. If you only need a few fields from large docs (~6 KB+), denormalize a lightweight summary onto the parent doc or use a lookup table (see `embeddingSkillMap`, `skill.latestVersionSummary`, `skill.badges` for examples).
- **Denormalization pattern**: persist computed fields so they can be indexed. Every mutation that updates source fields must also update the denormalized field. Always write a cursor-based backfill for new fields (see `backfillIsSuspiciousInternal`, `backfillLatestVersionSummaryInternal`, `backfillDenormalizedBadgesInternal` for examples).
- **Cron jobs must never scan entire tables.** Use indexed queries with equality filters. Use cursor-based pagination for large datasets. Prefer incremental/delta tracking over full recounts.
- **32K document limit per query.** Split `.collect()` calls by a partition field (e.g., one day at a time instead of a 7-day range). See `buildTrendingLeaderboard` for an example.
- **Common mistakes**: `.filter().collect()` without an index; `ctx.db.get()` on large docs in a loop for list views; while loops that paginate the whole table to find filtered results.
- **Before writing or reviewing Convex queries, check deployment health.** Run `bunx convex insights` to check for OCC conflicts, `bytesReadLimit`, and `documentsReadLimit` errors. Run `bunx convex logs --failure` to see individual error messages and stack traces. This helps identify which functions are causing bandwidth issues so you can prioritize fixes.
6 changes: 3 additions & 3 deletions convex/comments.query.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* @vitest-environment node */
import { describe, expect, it } from 'vitest'
import { listBySkill } from './comments'
import { listBySkillHandler } from './comments'

function makeCtx(args: {
comments: Array<Record<string, unknown>>
Expand Down Expand Up @@ -45,7 +45,7 @@ describe('comments.listBySkill', () => {
},
})

const result = await listBySkill._handler(ctx, {
const result = await listBySkillHandler(ctx, {
skillId: 'skills:1',
limit: 50,
} as never)
Expand Down Expand Up @@ -115,7 +115,7 @@ describe('comments.listBySkill', () => {
},
})

const result = await listBySkill._handler(ctx, {
const result = await listBySkillHandler(ctx, {
skillId: 'skills:1',
limit: 50,
} as never)
Expand Down
38 changes: 20 additions & 18 deletions convex/comments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,28 @@ import { type PublicUser, toPublicUser } from './lib/public'

export const listBySkill = query({
args: { skillId: v.id('skills'), limit: v.optional(v.number()) },
handler: async (ctx, args) => {
const limit = args.limit ?? 50
const comments = await ctx.db
.query('comments')
.withIndex('by_skill', (q) => q.eq('skillId', args.skillId))
.order('desc')
.take(limit)

const rows = await Promise.all(
comments.map(async (comment): Promise<{ comment: Doc<'comments'>; user: PublicUser } | null> => {
if (comment.softDeletedAt) return null
const user = toPublicUser(await ctx.db.get(comment.userId))
if (!user) return null
return { comment, user }
}),
)
return rows.filter((row): row is { comment: Doc<'comments'>; user: PublicUser } => row !== null)
},
handler: listBySkillHandler,
})

export async function listBySkillHandler(ctx: import('./_generated/server').QueryCtx, args: { skillId: import('./_generated/dataModel').Id<'skills'>; limit?: number }) {
const limit = args.limit ?? 50
const comments = await ctx.db
.query('comments')
.withIndex('by_skill', (q) => q.eq('skillId', args.skillId))
.order('desc')
.take(limit)

const rows = await Promise.all(
comments.map(async (comment): Promise<{ comment: Doc<'comments'>; user: PublicUser } | null> => {
if (comment.softDeletedAt) return null
const user = toPublicUser(await ctx.db.get(comment.userId))
if (!user) return null
return { comment, user }
}),
)
return rows.filter((row): row is { comment: Doc<'comments'>; user: PublicUser } => row !== null)
}

export const add = mutation({
args: { skillId: v.id('skills'), body: v.string() },
handler: addHandler,
Expand Down
2 changes: 1 addition & 1 deletion convex/crons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ crons.interval(

crons.interval(
'global-stats-update',
{ minutes: 60 },
{ hours: 24 },
internal.statsMaintenance.updateGlobalStatsInternal,
{},
)
Expand Down
28 changes: 19 additions & 9 deletions convex/lib/leaderboards.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,27 @@ export async function buildTrendingLeaderboard(
) {
const now = params.now ?? Date.now()
const { startDay, endDay } = getTrendingRange(now)
const rows = await ctx.db
.query('skillDailyStats')
.withIndex('by_day', (q) => q.gte('day', startDay).lte('day', endDay))
.collect()

// Query one day at a time to stay well under the 32K document limit.
// Each daily query reads ~4,500 docs instead of 32K for the full 7-day range.
// Parallelized since there are no cross-day dependencies.
const dayKeys = Array.from({ length: endDay - startDay + 1 }, (_, i) => startDay + i)
const perDayRows = await Promise.all(
dayKeys.map((day) =>
ctx.db
.query('skillDailyStats')
.withIndex('by_day', (q) => q.eq('day', day))
.collect(),
),
)
const totals = new Map<Id<'skills'>, { installs: number; downloads: number }>()
for (const row of rows) {
const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
current.installs += row.installs
current.downloads += row.downloads
totals.set(row.skillId, current)
for (const rows of perDayRows) {
for (const row of rows) {
const current = totals.get(row.skillId) ?? { installs: 0, downloads: 0 }
current.installs += row.installs
current.downloads += row.downloads
totals.set(row.skillId, current)
}
}

const entries = Array.from(totals, ([skillId, totalsEntry]) => ({
Expand Down
10 changes: 10 additions & 0 deletions convex/lib/skillSafety.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,13 @@ export function isSkillSuspicious(
if (skill.moderationFlags?.includes('flagged.suspicious')) return true
return isScannerSuspiciousReason(skill.moderationReason)
}

/**
* Compute the denormalized `isSuspicious` boolean for a skill.
* Use at every mutation site that writes `moderationFlags` or `moderationReason`.
*/
export function computeIsSuspicious(
skill: Pick<Doc<'skills'>, 'moderationFlags' | 'moderationReason'>,
): boolean {
return isSkillSuspicious(skill)
}
100 changes: 100 additions & 0 deletions convex/maintenance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
type TrustTier,
} from './lib/skillQuality'
import { generateSkillSummary } from './lib/skillSummary'
import { computeIsSuspicious } from './lib/skillSafety'
import { hashSkillFiles } from './lib/skills'

const DEFAULT_BATCH_SIZE = 50
Expand Down Expand Up @@ -1529,6 +1530,105 @@ export const backfillDenormalizedBadgesInternal = internalMutation({
},
})

/**
* Backfill `latestVersionSummary` on all skills. Cursor-based paginated mutation
* that self-schedules until done. Reads each skill's latestVersionId, extracts
* the summary fields, and patches the skill.
*
* Always reconciles against the current `latestVersionId` — if the summary is
* stale (e.g. from a tag retarget), it will be rewritten. To force a full
* re-backfill, simply re-run the function; every row is re-evaluated.
*/
export const backfillLatestVersionSummaryInternal = internalMutation({
args: {
cursor: v.optional(v.string()),
batchSize: v.optional(v.number()),
},
handler: async (ctx, args) => {
const batchSize = clampInt(args.batchSize ?? 50, 10, 200)
const { page, continueCursor, isDone } = await ctx.db
.query('skills')
.paginate({ cursor: args.cursor ?? null, numItems: batchSize })

let patched = 0
for (const skill of page) {
if (!skill.latestVersionId) continue
const version = await ctx.db.get(skill.latestVersionId)
if (!version) continue

const expected = {
version: version.version,
createdAt: version.createdAt,
changelog: version.changelog,
changelogSource: version.changelogSource,
clawdis: version.parsed?.clawdis,
}

// Skip if already in sync
const existing = skill.latestVersionSummary
if (
existing &&
existing.version === expected.version &&
existing.createdAt === expected.createdAt &&
existing.changelog === expected.changelog
) {
continue
}

await ctx.db.patch(skill._id, { latestVersionSummary: expected })
patched++
}

if (!isDone) {
await ctx.scheduler.runAfter(
0,
internal.maintenance.backfillLatestVersionSummaryInternal,
{
cursor: continueCursor,
batchSize: args.batchSize,
},
)
}

return { patched, isDone, scanned: page.length }
},
})

/**
* Backfill `isSuspicious` on all skills. Cursor-based paginated mutation
* that self-schedules until done.
*/
export const backfillIsSuspiciousInternal = internalMutation({
args: {
cursor: v.optional(v.string()),
batchSize: v.optional(v.number()),
},
handler: async (ctx, args) => {
const batchSize = clampInt(args.batchSize ?? 100, 10, 200)
const { page, continueCursor, isDone } = await ctx.db
.query('skills')
.paginate({ cursor: args.cursor ?? null, numItems: batchSize })

let patched = 0
for (const skill of page) {
const expected = computeIsSuspicious(skill)
if (skill.isSuspicious !== expected) {
await ctx.db.patch(skill._id, { isSuspicious: expected })
patched++
}
}

if (!isDone) {
await ctx.scheduler.runAfter(0, internal.maintenance.backfillIsSuspiciousInternal, {
cursor: continueCursor,
batchSize: args.batchSize,
})
}

return { patched, isDone, scanned: page.length }
},
})

function clampInt(value: number, min: number, max: number) {
const rounded = Math.trunc(value)
if (!Number.isFinite(rounded)) return min
Expand Down
27 changes: 27 additions & 0 deletions convex/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ const skills = defineTable({
}),
),
latestVersionId: v.optional(v.id('skillVersions')),
latestVersionSummary: v.optional(
v.object({
version: v.string(),
createdAt: v.number(),
changelog: v.string(),
changelogSource: v.optional(v.union(v.literal('auto'), v.literal('user'))),
clawdis: v.optional(v.any()),
}),
),
tags: v.record(v.string(), v.id('skillVersions')),
softDeletedAt: v.optional(v.number()),
badges: v.optional(
Expand Down Expand Up @@ -101,6 +110,7 @@ const skills = defineTable({
evaluatedAt: v.number(),
}),
),
isSuspicious: v.optional(v.boolean()),
moderationFlags: v.optional(v.array(v.string())),
lastReviewedAt: v.optional(v.number()),
// VT scan tracking
Expand Down Expand Up @@ -146,6 +156,23 @@ const skills = defineTable({
])
.index('by_canonical', ['canonicalSkillId'])
.index('by_fork_of', ['forkOf.skillId'])
.index('by_moderation', ['moderationStatus', 'moderationReason'])
.index('by_nonsuspicious_updated', ['softDeletedAt', 'isSuspicious', 'updatedAt'])
.index('by_nonsuspicious_created', ['softDeletedAt', 'isSuspicious', 'createdAt'])
.index('by_nonsuspicious_name', ['softDeletedAt', 'isSuspicious', 'displayName'])
.index('by_nonsuspicious_downloads', [
'softDeletedAt',
'isSuspicious',
'statsDownloads',
'updatedAt',
])
.index('by_nonsuspicious_stars', ['softDeletedAt', 'isSuspicious', 'statsStars', 'updatedAt'])
.index('by_nonsuspicious_installs', [
'softDeletedAt',
'isSuspicious',
'statsInstallsAllTime',
'updatedAt',
])

const souls = defineTable({
slug: v.string(),
Expand Down
Loading