feat(model): update default model to codex 5.3 (#110)

tuannvm · web-flow · commit 420cb81a882e · 2026-02-05T18:22:51.000-08:00
* feat(model): update default model to codex 5.3

Signed-off-by: Tommy Nguyen &lt;tuannvm@hotmail.com&gt;

* fix(codex): fix linting

Signed-off-by: Tommy Nguyen &lt;tuannvm@hotmail.com&gt;

---------

Signed-off-by: Tommy Nguyen &lt;tuannvm@hotmail.com&gt;
diff --git a/docs/TODO.md b/docs/TODO.md
@@ -0,0 +1,90 @@
+# Codex MCP Server - TODO
+
+## Features from Codex CLI v0.98.0
+
+These features were introduced/stabilized in Codex CLI v0.98.0 but are not yet implemented in this MCP server.
+
+### High Priority
+
+#### [ ] Steer Mode Support
+- **Status**: Stable & enabled by default in Codex CLI v0.98.0
+- **Description**: Allow redirecting agents during execution without stopping them
+- **CLI Flag**: `--steer` (now default)
+- **Implementation Notes**:
+  - Add `steerMode` parameter to CodexToolSchema
+  - Pass `--steer` flag to codex exec commands
+  - Consider whether MCP needs to handle streaming input for steering
+- **Reference**: [v0.98.0 Release Notes](https://github.com/openai/codex/releases/tag/rust-v0.98.0)
+
+### Medium Priority
+
+#### [ ] Collaboration Mode
+- **Status**: Naming unified in v0.98.0
+- **Description**: Multi-agent parallel collaboration support
+- **Implementation Notes**:
+  - Add `collaborationMode` parameter (enum: `none`, `collaborate`)
+  - Update command flags accordingly
+- **Reference**: Collaboration mode naming synced across prompts, tools, and TUI
+
+#### [ ] Enhanced Structured Content
+- **Status**: Text + image content items for dynamic tool outputs in v0.98.0
+- **Description**: Better support for dynamic tool outputs with mixed content
+- **Implementation Notes**:
+  - Current `structuredContent` support is partial
+  - May need enhancement to handle text + image content items
+- **Reference**: #10567
+
+### Low Priority
+
+#### [ ] Personality Mode
+- **Status**: Pragmatic restored as default in v0.98.0
+- **Description**: Control Codex's response personality
+- **Options**: `pragmatic` (default), `verbose`
+- **CLI Config**: `personality = "pragmatic"` or `personality = "verbose"`
+- **Implementation Notes**:
+  - Add `personality` parameter to CodexToolSchema
+  - Pass via `-c personality="..."`
+- **Reference**: #10705
+
+---
+
+## Implemented in v1.3.4+
+
+### ✅ GPT-5.3-Codex Model
+- **Status**: Implemented
+- **Description**: New default model
+- **Changes**:
+  - Updated `DEFAULT_CODEX_MODEL` constant to `'gpt-5.3-codex'`
+  - Updated tool definitions to reflect new default
+  - Single source of truth for model updates
+
+### ✅ Reasoning Effort: 'none' and 'xhigh'
+- **Status**: Implemented (commit 448fa3c)
+- **Description**: Extended reasoning effort options
+- **Changes**:
+  - Added `'none'` and `'xhigh'` to reasoningEffort enum
+  - Full range: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`
+
+---
+
+## Future Considerations
+
+### Model Version Management
+- Consider adding a `getAvailableModels()` tool to query Codex CLI for available models
+- This would make the server more resilient to future model additions
+
+### Configuration File Support
+- Codex CLI supports config files (`.codexrc.toml`)
+- Consider whether MCP server should expose config file options
+
+### Streaming Support
+- Codex CLI supports SSE streaming for responses
+- Consider adding streaming support for long-running tasks
+
+---
+
+## References
+
+- [Codex CLI Releases](https://github.com/openai/codex/releases)
+- [Codex Changelog](https://developers.openai.com/codex/changelog/)
+- [v0.98.0 Release](https://github.com/openai/codex/releases/tag/rust-v0.98.0)
diff --git a/src/__tests__/context-building.test.ts b/src/__tests__/context-building.test.ts
@@ -61,7 +61,7 @@ describe('Context Building Analysis', () => {
 
     // Check what prompt was sent to Codex - should be enhanced but not conversational
     const call = mockedExecuteCommand.mock.calls[0];
-    const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt
+    const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
     expect(sentPrompt).toContain('Previous code context:');
     expect(sentPrompt).toContain('Task: Make it more efficient');
     expect(sentPrompt).not.toContain('Previous: What is recursion?'); // No conversational format
diff --git a/src/__tests__/default-model.test.ts b/src/__tests__/default-model.test.ts
@@ -40,13 +40,13 @@ describe('Default Model Configuration', () => {
     delete process.env.CODEX_MCP_CALLBACK_URI;
   });
 
-  test('should use gpt-5.2-codex as default model when no model specified', async () => {
+  test('should use gpt-5.3-codex as default model when no model specified', async () => {
     await handler.execute({ prompt: 'Test prompt' });
 
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '--skip-git-repo-check',
       'Test prompt',
     ]);
@@ -55,8 +55,8 @@ describe('Default Model Configuration', () => {
   test('should include default model in response metadata', async () => {
     const result = await handler.execute({ prompt: 'Test prompt' });
 
-    expect(result.content[0]._meta?.model).toBe('gpt-5.2-codex');
-    expect(result.structuredContent?.model).toBe('gpt-5.2-codex');
+    expect(result.content[0]._meta?.model).toBe('gpt-5.3-codex');
+    expect(result.structuredContent?.model).toBe('gpt-5.3-codex');
     expect(result._meta?.callbackUri).toBeUndefined();
   });
 
@@ -86,7 +86,7 @@ describe('Default Model Configuration', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '--skip-git-repo-check',
       'Test prompt',
     ]);
@@ -106,7 +106,7 @@ describe('Default Model Configuration', () => {
       'exec',
       '--skip-git-repo-check',
       '-c',
-      'model="gpt-5.2-codex"',
+      'model="gpt-5.3-codex"',
       'resume',
       'existing-conv-id',
       'Resume with default model',
@@ -122,7 +122,7 @@ describe('Default Model Configuration', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '-c',
       'model_reasoning_effort="high"',
       '--skip-git-repo-check',
diff --git a/src/__tests__/edge-cases.test.ts b/src/__tests__/edge-cases.test.ts
@@ -133,7 +133,7 @@ describe('Edge Cases and Integration Issues', () => {
 
     // Should only use recent turns, not crash with too much context
     const call = mockedExecuteCommand.mock.calls[0];
-    const prompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt
+    const prompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
     expect(typeof prompt).toBe('string');
     if (prompt) {
       expect(prompt.length).toBeLessThan(5000); // Reasonable limit
diff --git a/src/__tests__/error-scenarios.test.ts b/src/__tests__/error-scenarios.test.ts
@@ -158,7 +158,7 @@ describe('Error Handling Scenarios', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '--skip-git-repo-check',
       longPrompt,
     ]);
diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts
@@ -148,7 +148,7 @@ describe('Codex MCP Server', () => {
       const result = {
         content: [{ type: 'text', text: 'ok', _meta: { threadId: 'th_123' } }],
         structuredContent: { threadId: 'th_123' },
-        _meta: { model: 'gpt-5.2-codex' },
+        _meta: { model: 'gpt-5.3-codex' },
       };
 
       const parsed = CallToolResultSchema.safeParse(result);
diff --git a/src/__tests__/model-selection.test.ts b/src/__tests__/model-selection.test.ts
@@ -63,7 +63,7 @@ describe('Model Selection and Reasoning Effort', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '-c',
       'model_reasoning_effort="high"',
       '--skip-git-repo-check',
@@ -133,7 +133,7 @@ describe('Model Selection and Reasoning Effort', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '-c',
       'model_reasoning_effort="minimal"',
       '--skip-git-repo-check',
@@ -150,7 +150,7 @@ describe('Model Selection and Reasoning Effort', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '-c',
       'model_reasoning_effort="none"',
       '--skip-git-repo-check',
@@ -167,7 +167,7 @@ describe('Model Selection and Reasoning Effort', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '-c',
       'model_reasoning_effort="xhigh"',
       '--skip-git-repo-check',
diff --git a/src/__tests__/resume-functionality.test.ts b/src/__tests__/resume-functionality.test.ts
@@ -51,7 +51,7 @@ describe('Codex Resume Functionality', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '--skip-git-repo-check',
       'First message',
     ]);
@@ -156,7 +156,7 @@ describe('Codex Resume Functionality', () => {
       'exec',
       '--skip-git-repo-check',
       '-c',
-      'model="gpt-5.2-codex"',
+      'model="gpt-5.3-codex"',
       'resume',
       'existing-codex-session-id',
       'Continue the task',
@@ -182,7 +182,7 @@ describe('Codex Resume Functionality', () => {
     expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
       'exec',
       '--model',
-      'gpt-5.2-codex',
+      'gpt-5.3-codex',
       '--skip-git-repo-check',
       'Reset and start new',
     ]);
@@ -213,7 +213,7 @@ describe('Codex Resume Functionality', () => {
 
     // Should build enhanced prompt since no codex session ID
     const call = mockedExecuteCommand.mock.calls[0];
-    const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt
+    const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
     expect(sentPrompt).toContain('Context:');
     expect(sentPrompt).toContain('Task: Follow up question');
   });
diff --git a/src/tools/definitions.ts b/src/tools/definitions.ts
@@ -1,4 +1,4 @@
-import { TOOLS, type ToolDefinition } from '../types.js';
+import { TOOLS, getModelDescription, type ToolDefinition } from '../types.js';
 
 export const toolDefinitions: ToolDefinition[] = [
   {
@@ -23,8 +23,7 @@ export const toolDefinitions: ToolDefinition[] = [
         },
         model: {
           type: 'string',
-          description:
-            'Specify which model to use (defaults to gpt-5.2-codex). Options: gpt-5.2-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5-codex, gpt-4o, gpt-4, o3, o4-mini',
+          description: getModelDescription('codex'),
         },
         reasoningEffort: {
           type: 'string',
@@ -102,8 +101,7 @@ export const toolDefinitions: ToolDefinition[] = [
         },
         model: {
           type: 'string',
-          description:
-            'Specify which model to use for the review (defaults to gpt-5.2-codex)',
+          description: getModelDescription('review'),
         },
         workingDirectory: {
           type: 'string',
diff --git a/src/tools/handlers.ts b/src/tools/handlers.ts
@@ -1,5 +1,7 @@
 import {
   TOOLS,
+  DEFAULT_CODEX_MODEL,
+  CODEX_DEFAULT_MODEL_ENV_VAR,
   type ToolResult,
   type ToolHandlerContext,
   type CodexToolArgs,
@@ -83,7 +85,9 @@ export class CodexToolHandler {
 
       // Build command arguments with v0.75.0+ features
       const selectedModel =
-        model || process.env.CODEX_DEFAULT_MODEL || 'gpt-5.2-codex'; // Default to gpt-5.2-codex
+        model ||
+        process.env[CODEX_DEFAULT_MODEL_ENV_VAR] ||
+        DEFAULT_CODEX_MODEL;
 
       const effectiveCallbackUri =
         callbackUri || process.env.CODEX_MCP_CALLBACK_URI;
@@ -391,7 +395,9 @@ export class ReviewToolHandler {
 
       // Add model parameter via config
       const selectedModel =
-        model || process.env.CODEX_DEFAULT_MODEL || 'gpt-5.2-codex';
+        model ||
+        process.env[CODEX_DEFAULT_MODEL_ENV_VAR] ||
+        DEFAULT_CODEX_MODEL;
       cmdArgs.push('-c', `model="${selectedModel}"`);
 
       cmdArgs.push('review');
diff --git a/src/types.ts b/src/types.ts
@@ -11,6 +11,32 @@ export const TOOLS = {
 
 export type ToolName = typeof TOOLS[keyof typeof TOOLS];
 
+// Codex model constants
+export const DEFAULT_CODEX_MODEL = 'gpt-5.3-codex' as const;
+export const CODEX_DEFAULT_MODEL_ENV_VAR = 'CODEX_DEFAULT_MODEL' as const;
+
+// Available model options (for documentation/reference)
+export const AVAILABLE_CODEX_MODELS = [
+  'gpt-5.3-codex',
+  'gpt-5.2-codex',
+  'gpt-5.1-codex',
+  'gpt-5.1-codex-max',
+  'gpt-5-codex',
+  'gpt-4o',
+  'gpt-4',
+  'o3',
+  'o4-mini',
+] as const;
+
+// Helper function to generate model description
+export const getModelDescription = (toolType: 'codex' | 'review') => {
+  const modelList = AVAILABLE_CODEX_MODELS.join(', ');
+  if (toolType === 'codex') {
+    return `Specify which model to use (defaults to ${DEFAULT_CODEX_MODEL}). Options: ${modelList}`;
+  }
+  return `Specify which model to use for the review (defaults to ${DEFAULT_CODEX_MODEL})`;
+};
+
 // Tool annotations for MCP 2025-11-25 spec
 export interface ToolAnnotations {
   title?: string;