MotiaDev · Bot-Rakshit · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,5 @@ dist
 .env
 .env.production
 api/lib/*
-deploy.sh
+deploy.sh
+MOTIA_DOCS.md
diff --git a/api/motia-workbench.json b/api/motia-workbench.json
@@ -20,6 +20,22 @@
   {
     "id": "chess",
     "config": {
+      "steps/chess/12-play-vs-ai.step.ts": {
+        "x": 350,
+        "y": 1056
+      },
+      "steps/chess/11-get-game-history-detail.step.ts": {
+        "x": 0,
+        "y": 0
+      },
+      "steps/chess/10b-export-game-history.step.ts": {
+        "x": 6,
+        "y": 224
+      },
+      "steps/chess/10-get-game-history.step.ts": {
+        "x": 26,
+        "y": 428
+      },
       "steps/chess/09-purge-stuck-games.step.ts": {
         "x": -237,
         "y": 580
@@ -50,17 +66,17 @@
         "y": 112
       },
       "steps/chess/02-get-game.step.ts": {
-        "x": 1045,
-        "y": 114
+        "x": 1019,
+        "y": 92
       },
       "steps/chess/01-create-game.step.ts": {
         "x": 432,
         "y": 111,
         "targetHandlePosition": "left"
       },
       "steps/chess/00-available-models-api.step.ts": {
-        "x": -209,
-        "y": 86,
+        "x": -460,
+        "y": 111,
         "sourceHandlePosition": "right"
       },
       "steps/chess/access/request-access.step.ts": {
@@ -76,5 +92,62 @@
         "y": 680
       }
     }
+  },
+  {
+    "id": "benchmark",
+    "config": {
+      "steps/benchmark/11-stockfish-leaderboard.step.ts": {
+        "x": 0,
+        "y": 2444
+      },
+      "steps/benchmark/10-run-stockfish-benchmark.step.ts": {
+        "x": 0,
+        "y": 2220
+      },
+      "steps/benchmark/09-run-all-benchmarks.step.ts": {
+        "x": -282,
+        "y": 337
+      },
+      "steps/benchmark/08-get-puzzle-sets.step.ts": {
+        "x": 435,
+        "y": 30
+      },
+      "steps/benchmark/07-get-puzzle-leaderboard.step.ts": {
+        "x": 3,
+        "y": 204
+      },
+      "steps/benchmark/06b-run-all-puzzle-benchmarks.step.ts": {
+        "x": 0,
+        "y": 1120
+      },
+      "steps/benchmark/06-run-puzzle-benchmark.step.ts": {
+        "x": 465,
+        "y": 360
+      },
+      "steps/benchmark/05-fetch-puzzle-set.step.ts": {
+        "x": 0,
+        "y": 612
+      },
+      "steps/benchmark/04-get-benchmark-leaderboard.step.ts": {
+        "x": 0,
+        "y": 816
+      },
+      "steps/benchmark/03-get-benchmark-run-detail.step.ts": {
+        "x": 0,
+        "y": 1060
+      },
+      "steps/benchmark/02-get-benchmark-runs.step.ts": {
+        "x": 0,
+        "y": 1304
+      },
+      "steps/benchmark/01-run-legal-move-benchmark.step.ts": {
+        "x": 0,
+        "y": 1528
+      },
+      "steps/benchmark/00-generate-position-set.step.ts": {
+        "x": 0,
+        "y": 1752
+      }
+    }
   }
 ]
diff --git a/api/services/ai/claude.ts b/api/services/ai/claude.ts
@@ -2,19 +2,23 @@ import { streamObject } from 'ai'
 import { createAnthropic } from '@ai-sdk/anthropic'
 import { AiPlayerPromptSchema } from '@chessarena/types/ai-models'
 import { models } from './models'
+import { getMaxReasoningProviderOptions } from './provider-options'
 import { Handler } from './types'
 
 export const claude: Handler = async ({ prompt, logger, model, onThoughtUpdate }) => {
   const anthropic = createAnthropic({
     apiKey: process.env.ANTHROPIC_API_KEY,
   })
 
+  const modelId = model ?? models.claude
   const { partialObjectStream, object } = streamObject({
-    model: anthropic(model ?? models.claude),
+    model: anthropic(modelId),
     prompt,
     schema: AiPlayerPromptSchema,
+    mode: 'json',
     maxRetries: 0,
     abortSignal: AbortSignal.timeout(180000),
+    providerOptions: getMaxReasoningProviderOptions('claude', modelId),
   })
 
   for await (const partialObject of partialObjectStream) {

diff --git a/api/services/ai/gemini.ts b/api/services/ai/gemini.ts
@@ -2,19 +2,22 @@ import { streamObject } from 'ai'
 import { createGoogleGenerativeAI } from '@ai-sdk/google'
 import { AiPlayerPromptSchema } from '@chessarena/types/ai-models'
 import { models } from './models'
+import { getMaxReasoningProviderOptions } from './provider-options'
 import { Handler } from './types'
 
 export const gemini: Handler = async ({ prompt, logger, model, onThoughtUpdate }) => {
   const googleAI = createGoogleGenerativeAI({
     apiKey: process.env.GEMINI_API_KEY,
   })
 
+  const modelId = model ?? models.gemini
   const { partialObjectStream, object } = streamObject({
-    model: googleAI(model ?? models.gemini),
+    model: googleAI(modelId),
     prompt,
     schema: AiPlayerPromptSchema,
     maxRetries: 0,
     abortSignal: AbortSignal.timeout(180000),
+    providerOptions: getMaxReasoningProviderOptions('gemini', modelId),
   })
 
   for await (const partialObject of partialObjectStream) {

diff --git a/api/services/ai/grok.ts b/api/services/ai/grok.ts
@@ -2,19 +2,23 @@ import { streamObject } from 'ai'
 import { createXai } from '@ai-sdk/xai'
 import { AiPlayerPromptSchema } from '@chessarena/types/ai-models'
 import { models } from './models'
+import { getMaxReasoningProviderOptions } from './provider-options'
 import { Handler } from './types'
 
 export const grok: Handler = async ({ prompt, logger, model, onThoughtUpdate }) => {
   const xai = createXai({
     apiKey: process.env.XAI_API_KEY,
   })
 
+  const modelId = model ?? models.grok
   const { partialObjectStream, object } = streamObject({
-    model: xai(model ?? models.grok),
+    model: xai(modelId),
     prompt,
     schema: AiPlayerPromptSchema,
+    mode: 'json',
     maxRetries: 0,
     abortSignal: AbortSignal.timeout(180000),
+    providerOptions: getMaxReasoningProviderOptions('grok', modelId),
   })
 
   for await (const partialObject of partialObjectStream) {

diff --git a/api/services/ai/models.ts b/api/services/ai/models.ts
@@ -1,47 +1,84 @@
-import { AiModels, AiProviderDefaultModel } from '@chessarena/types/ai-models'
+import { AiModels, AiModelProvider, AiProviderDefaultModel } from '@chessarena/types/ai-models'
 
 // NOTE: these are the models used for AI vs AI games, it is also used for backwards compatibility for existing games that don't have a model assigned to a player
+// IMPORTANT: These must match model names in supportedModelsByProvider below!
 export const models: AiProviderDefaultModel = {
-  openai: 'gpt-5-2025-08-07',
+  openai: 'gpt-5.2',
   gemini: 'gemini-2.5-flash',
-  claude: 'claude-sonnet-4-5-20250929',
-  grok: 'grok-4-fast',
+  claude: 'claude-sonnet-4-5',
+  grok: 'grok-4-fast-non-reasoning',
 }
 
-// NOTE: these are all the models supported by provider that users can pick in order to play human vs AI games
+/**
+ * ============================================
+ * BENCHMARK MODELS - Add new models here!
+ * ============================================
+ *
+ * To add a new model for benchmarking:
+ * 1. Add it to the appropriate provider array below
+ * 2. Restart the dev server
+ * 3. Run the benchmark: POST /benchmark/legal-moves/run-all
+ *
+ * To run benchmark for a single model:
+ * POST /benchmark/legal-moves/run { "provider": "claude", "model": "claude-3-5-haiku-20241022" }
+ *
+ * Provider documentation:
+ * - OpenAI: https://platform.openai.com/docs/models
+ * - Gemini: https://ai.google.dev/gemini-api/docs/models
+ * - Claude: https://docs.anthropic.com/en/docs/about-claude/models/overview
+ * - Grok: https://docs.x.ai/docs/models
+ */
 export const supportedModelsByProvider: AiModels = {
+  // From AI SDK docs: https://sdk.vercel.ai/providers/ai-sdk-providers/openai
   openai: [
-    // https://platform.openai.com/docs/models
-    'gpt-5-2025-08-07',
-    'gpt-5-mini-2025-08-07',
-    'gpt-5-nano-2025-08-07',
-    'gpt-4.1-nano-2025-04-14',
-    'gpt-4.1-mini-2025-04-14',
-    'gpt-4o-mini-2024-07-18',
-    'o4-mini-2025-04-16',
+    'gpt-5.2', // Latest
+    'gpt-5.1', // Previous flagship
+    'gpt-5', // GPT-5
+    'gpt-5-mini', // Fast
+    'gpt-4.1', // GPT-4.1
+    'gpt-4.1-mini', // Fast GPT-4.1
+    'gpt-4o', // GPT-4o
+    'gpt-4o-mini', // Fast GPT-4o
   ],
+  // From AI SDK docs: https://sdk.vercel.ai/providers/ai-sdk-providers/google-generative-ai
   gemini: [
-    // https://ai.google.dev/gemini-api/docs/models
-    'gemini-2.5-flash',
-    'gemini-2.5-flash-lite',
-    'gemini-2.0-flash-001',
-    'gemini-2.0-flash-lite-001',
+    'gemini-3-pro-preview', // Latest preview
+    'gemini-2.5-pro', // Latest pro
+    'gemini-2.5-flash', // Fast
+    'gemini-2.5-flash-lite', // Ultra fast
+    'gemini-2.0-flash', // Stable flash
   ],
+  // From AI SDK docs: https://sdk.vercel.ai/providers/ai-sdk-providers/anthropic
   claude: [
-    // https://docs.anthropic.com/en/docs/about-claude/models/overview
-    'claude-opus-4-1-20250805',
-    'claude-opus-4-20250514',
-    'claude-sonnet-4-5-20250929',
-    'claude-sonnet-4-20250514',
-    'claude-3-7-sonnet-20250219',
-    'claude-haiku-4-5-20251001',
-    'claude-3-5-haiku-20241022',
-  ],
-  grok: [
-    // https://docs.x.ai/docs/models
-    'grok-4-fast',
-    'grok-4-fast-non-reasoning',
-    'grok-3-mini',
-    'grok-3',
+    'claude-opus-4-5', // Latest opus (no dot!)
+    'claude-sonnet-4-5', // Latest sonnet (no dot!)
+    'claude-haiku-4-5', // Latest haiku (no dot!)
+    'claude-opus-4-0', // Opus 4.0
+    'claude-sonnet-4-0', // Sonnet 4.0
+    'claude-3-7-sonnet-latest', // Claude 3.7
+    'claude-3-5-haiku-latest', // Claude 3.5 Haiku
   ],
+  // From AI SDK docs: https://sdk.vercel.ai/providers/ai-sdk-providers/xai
+  grok: ['grok-4-fast-non-reasoning', 'grok-4-fast-reasoning', 'grok-3-fast'],
+}
+
+/**
+ * Helper to get all models as a flat array with provider info
+ * Used by benchmarks
+ */
+export const getAllModels = (): { provider: AiModelProvider; model: string }[] => {
+  const allModels: { provider: AiModelProvider; model: string }[] = []
+  for (const [provider, models] of Object.entries(supportedModelsByProvider)) {
+    for (const model of models) {
+      allModels.push({ provider: provider as AiModelProvider, model })
+    }
+  }
+  return allModels
+}
+
+/**
+ * Get models for a specific provider
+ */
+export const getModelsForProvider = (provider: AiModelProvider): string[] => {
+  return supportedModelsByProvider[provider] || []
 }
diff --git a/api/services/ai/openai.ts b/api/services/ai/openai.ts
@@ -2,19 +2,22 @@ import { AiPlayerPromptSchema } from '@chessarena/types/ai-models'
 import { createOpenAI } from '@ai-sdk/openai'
 import { streamObject } from 'ai'
 import { models } from './models'
+import { getMaxReasoningProviderOptions } from './provider-options'
 import { Handler } from './types'
 
 export const openai: Handler = async ({ model, logger, prompt, onThoughtUpdate }) => {
   const openai = createOpenAI({
     apiKey: process.env.OPENAI_API_KEY,
   })
 
+  const modelId = model ?? models.openai
   const { partialObjectStream, object } = streamObject({
-    model: openai(model ?? models.openai),
+    model: openai(modelId),
     prompt,
     schema: AiPlayerPromptSchema,
     maxRetries: 0,
     abortSignal: AbortSignal.timeout(180000),
+    providerOptions: getMaxReasoningProviderOptions('openai', modelId),
   })
 
   for await (const partialObject of partialObjectStream) {