diff --git a/docs/TODO.md b/docs/TODO.md new file mode 100644 index 0000000..023acd5 --- /dev/null +++ b/docs/TODO.md @@ -0,0 +1,90 @@ +# Codex MCP Server - TODO + +## Features from Codex CLI v0.98.0 + +These features were introduced/stabilized in Codex CLI v0.98.0 but are not yet implemented in this MCP server. + +### High Priority + +#### [ ] Steer Mode Support +- **Status**: Stable & enabled by default in Codex CLI v0.98.0 +- **Description**: Allow redirecting agents during execution without stopping them +- **CLI Flag**: `--steer` (now default) +- **Implementation Notes**: + - Add `steerMode` parameter to CodexToolSchema + - Pass `--steer` flag to codex exec commands + - Consider whether MCP needs to handle streaming input for steering +- **Reference**: [v0.98.0 Release Notes](https://github.com/openai/codex/releases/tag/rust-v0.98.0) + +### Medium Priority + +#### [ ] Collaboration Mode +- **Status**: Naming unified in v0.98.0 +- **Description**: Multi-agent parallel collaboration support +- **Implementation Notes**: + - Add `collaborationMode` parameter (enum: `none`, `collaborate`) + - Update command flags accordingly +- **Reference**: Collaboration mode naming synced across prompts, tools, and TUI + +#### [ ] Enhanced Structured Content +- **Status**: Text + image content items for dynamic tool outputs in v0.98.0 +- **Description**: Better support for dynamic tool outputs with mixed content +- **Implementation Notes**: + - Current `structuredContent` support is partial + - May need enhancement to handle text + image content items +- **Reference**: #10567 + +### Low Priority + +#### [ ] Personality Mode +- **Status**: Pragmatic restored as default in v0.98.0 +- **Description**: Control Codex's response personality +- **Options**: `pragmatic` (default), `verbose` +- **CLI Config**: `personality = "pragmatic"` or `personality = "verbose"` +- **Implementation Notes**: + - Add `personality` parameter to CodexToolSchema + - Pass via `-c personality="..."` +- **Reference**: #10705 + +--- + +## Implemented in v1.3.4+ + +### ✅ GPT-5.3-Codex Model +- **Status**: Implemented +- **Description**: New default model +- **Changes**: + - Updated `DEFAULT_CODEX_MODEL` constant to `'gpt-5.3-codex'` + - Updated tool definitions to reflect new default + - Single source of truth for model updates + +### ✅ Reasoning Effort: 'none' and 'xhigh' +- **Status**: Implemented (commit 448fa3c) +- **Description**: Extended reasoning effort options +- **Changes**: + - Added `'none'` and `'xhigh'` to reasoningEffort enum + - Full range: `none`, `minimal`, `low`, `medium`, `high`, `xhigh` + +--- + +## Future Considerations + +### Model Version Management +- Consider adding a `getAvailableModels()` tool to query Codex CLI for available models +- This would make the server more resilient to future model additions + +### Configuration File Support +- Codex CLI supports config files (`.codexrc.toml`) +- Consider whether MCP server should expose config file options + +### Streaming Support +- Codex CLI supports SSE streaming for responses +- Consider adding streaming support for long-running tasks + +--- + +## References + +- [Codex CLI Releases](https://github.com/openai/codex/releases) +- [Codex Changelog](https://developers.openai.com/codex/changelog/) +- [v0.98.0 Release](https://github.com/openai/codex/releases/tag/rust-v0.98.0) diff --git a/src/__tests__/context-building.test.ts b/src/__tests__/context-building.test.ts index 6a84177..74d81ef 100644 --- a/src/__tests__/context-building.test.ts +++ b/src/__tests__/context-building.test.ts @@ -61,7 +61,7 @@ describe('Context Building Analysis', () => { // Check what prompt was sent to Codex - should be enhanced but not conversational const call = mockedExecuteCommand.mock.calls[0]; - const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt + const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt expect(sentPrompt).toContain('Previous code context:'); expect(sentPrompt).toContain('Task: Make it more efficient'); expect(sentPrompt).not.toContain('Previous: What is recursion?'); // No conversational format diff --git a/src/__tests__/default-model.test.ts b/src/__tests__/default-model.test.ts index 9cec5e7..82107c8 100644 --- a/src/__tests__/default-model.test.ts +++ b/src/__tests__/default-model.test.ts @@ -40,13 +40,13 @@ describe('Default Model Configuration', () => { delete process.env.CODEX_MCP_CALLBACK_URI; }); - test('should use gpt-5.2-codex as default model when no model specified', async () => { + test('should use gpt-5.3-codex as default model when no model specified', async () => { await handler.execute({ prompt: 'Test prompt' }); expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '--skip-git-repo-check', 'Test prompt', ]); @@ -55,8 +55,8 @@ describe('Default Model Configuration', () => { test('should include default model in response metadata', async () => { const result = await handler.execute({ prompt: 'Test prompt' }); - expect(result.content[0]._meta?.model).toBe('gpt-5.2-codex'); - expect(result.structuredContent?.model).toBe('gpt-5.2-codex'); + expect(result.content[0]._meta?.model).toBe('gpt-5.3-codex'); + expect(result.structuredContent?.model).toBe('gpt-5.3-codex'); expect(result._meta?.callbackUri).toBeUndefined(); }); @@ -86,7 +86,7 @@ describe('Default Model Configuration', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '--skip-git-repo-check', 'Test prompt', ]); @@ -106,7 +106,7 @@ describe('Default Model Configuration', () => { 'exec', '--skip-git-repo-check', '-c', - 'model="gpt-5.2-codex"', + 'model="gpt-5.3-codex"', 'resume', 'existing-conv-id', 'Resume with default model', @@ -122,7 +122,7 @@ describe('Default Model Configuration', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '-c', 'model_reasoning_effort="high"', '--skip-git-repo-check', diff --git a/src/__tests__/edge-cases.test.ts b/src/__tests__/edge-cases.test.ts index 082d124..2ec8f6e 100644 --- a/src/__tests__/edge-cases.test.ts +++ b/src/__tests__/edge-cases.test.ts @@ -133,7 +133,7 @@ describe('Edge Cases and Integration Issues', () => { // Should only use recent turns, not crash with too much context const call = mockedExecuteCommand.mock.calls[0]; - const prompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt + const prompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt expect(typeof prompt).toBe('string'); if (prompt) { expect(prompt.length).toBeLessThan(5000); // Reasonable limit diff --git a/src/__tests__/error-scenarios.test.ts b/src/__tests__/error-scenarios.test.ts index ad9626b..f8dd89b 100644 --- a/src/__tests__/error-scenarios.test.ts +++ b/src/__tests__/error-scenarios.test.ts @@ -158,7 +158,7 @@ describe('Error Handling Scenarios', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '--skip-git-repo-check', longPrompt, ]); diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index 102a3bf..4d3e096 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -148,7 +148,7 @@ describe('Codex MCP Server', () => { const result = { content: [{ type: 'text', text: 'ok', _meta: { threadId: 'th_123' } }], structuredContent: { threadId: 'th_123' }, - _meta: { model: 'gpt-5.2-codex' }, + _meta: { model: 'gpt-5.3-codex' }, }; const parsed = CallToolResultSchema.safeParse(result); diff --git a/src/__tests__/model-selection.test.ts b/src/__tests__/model-selection.test.ts index 7e905d2..5d69662 100644 --- a/src/__tests__/model-selection.test.ts +++ b/src/__tests__/model-selection.test.ts @@ -63,7 +63,7 @@ describe('Model Selection and Reasoning Effort', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '-c', 'model_reasoning_effort="high"', '--skip-git-repo-check', @@ -133,7 +133,7 @@ describe('Model Selection and Reasoning Effort', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '-c', 'model_reasoning_effort="minimal"', '--skip-git-repo-check', @@ -150,7 +150,7 @@ describe('Model Selection and Reasoning Effort', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '-c', 'model_reasoning_effort="none"', '--skip-git-repo-check', @@ -167,7 +167,7 @@ describe('Model Selection and Reasoning Effort', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '-c', 'model_reasoning_effort="xhigh"', '--skip-git-repo-check', diff --git a/src/__tests__/resume-functionality.test.ts b/src/__tests__/resume-functionality.test.ts index fbb897c..2c26ce0 100644 --- a/src/__tests__/resume-functionality.test.ts +++ b/src/__tests__/resume-functionality.test.ts @@ -51,7 +51,7 @@ describe('Codex Resume Functionality', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '--skip-git-repo-check', 'First message', ]); @@ -156,7 +156,7 @@ describe('Codex Resume Functionality', () => { 'exec', '--skip-git-repo-check', '-c', - 'model="gpt-5.2-codex"', + 'model="gpt-5.3-codex"', 'resume', 'existing-codex-session-id', 'Continue the task', @@ -182,7 +182,7 @@ describe('Codex Resume Functionality', () => { expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [ 'exec', '--model', - 'gpt-5.2-codex', + 'gpt-5.3-codex', '--skip-git-repo-check', 'Reset and start new', ]); @@ -213,7 +213,7 @@ describe('Codex Resume Functionality', () => { // Should build enhanced prompt since no codex session ID const call = mockedExecuteCommand.mock.calls[0]; - const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt + const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt expect(sentPrompt).toContain('Context:'); expect(sentPrompt).toContain('Task: Follow up question'); }); diff --git a/src/tools/definitions.ts b/src/tools/definitions.ts index cf5da97..947fb73 100644 --- a/src/tools/definitions.ts +++ b/src/tools/definitions.ts @@ -1,4 +1,4 @@ -import { TOOLS, type ToolDefinition } from '../types.js'; +import { TOOLS, getModelDescription, type ToolDefinition } from '../types.js'; export const toolDefinitions: ToolDefinition[] = [ { @@ -23,8 +23,7 @@ export const toolDefinitions: ToolDefinition[] = [ }, model: { type: 'string', - description: - 'Specify which model to use (defaults to gpt-5.2-codex). Options: gpt-5.2-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5-codex, gpt-4o, gpt-4, o3, o4-mini', + description: getModelDescription('codex'), }, reasoningEffort: { type: 'string', @@ -102,8 +101,7 @@ export const toolDefinitions: ToolDefinition[] = [ }, model: { type: 'string', - description: - 'Specify which model to use for the review (defaults to gpt-5.2-codex)', + description: getModelDescription('review'), }, workingDirectory: { type: 'string', diff --git a/src/tools/handlers.ts b/src/tools/handlers.ts index 4b94366..a4e7337 100644 --- a/src/tools/handlers.ts +++ b/src/tools/handlers.ts @@ -1,5 +1,7 @@ import { TOOLS, + DEFAULT_CODEX_MODEL, + CODEX_DEFAULT_MODEL_ENV_VAR, type ToolResult, type ToolHandlerContext, type CodexToolArgs, @@ -83,7 +85,9 @@ export class CodexToolHandler { // Build command arguments with v0.75.0+ features const selectedModel = - model || process.env.CODEX_DEFAULT_MODEL || 'gpt-5.2-codex'; // Default to gpt-5.2-codex + model || + process.env[CODEX_DEFAULT_MODEL_ENV_VAR] || + DEFAULT_CODEX_MODEL; const effectiveCallbackUri = callbackUri || process.env.CODEX_MCP_CALLBACK_URI; @@ -391,7 +395,9 @@ export class ReviewToolHandler { // Add model parameter via config const selectedModel = - model || process.env.CODEX_DEFAULT_MODEL || 'gpt-5.2-codex'; + model || + process.env[CODEX_DEFAULT_MODEL_ENV_VAR] || + DEFAULT_CODEX_MODEL; cmdArgs.push('-c', `model="${selectedModel}"`); cmdArgs.push('review'); diff --git a/src/types.ts b/src/types.ts index a00b01e..8a39924 100644 --- a/src/types.ts +++ b/src/types.ts @@ -11,6 +11,32 @@ export const TOOLS = { export type ToolName = typeof TOOLS[keyof typeof TOOLS]; +// Codex model constants +export const DEFAULT_CODEX_MODEL = 'gpt-5.3-codex' as const; +export const CODEX_DEFAULT_MODEL_ENV_VAR = 'CODEX_DEFAULT_MODEL' as const; + +// Available model options (for documentation/reference) +export const AVAILABLE_CODEX_MODELS = [ + 'gpt-5.3-codex', + 'gpt-5.2-codex', + 'gpt-5.1-codex', + 'gpt-5.1-codex-max', + 'gpt-5-codex', + 'gpt-4o', + 'gpt-4', + 'o3', + 'o4-mini', +] as const; + +// Helper function to generate model description +export const getModelDescription = (toolType: 'codex' | 'review') => { + const modelList = AVAILABLE_CODEX_MODELS.join(', '); + if (toolType === 'codex') { + return `Specify which model to use (defaults to ${DEFAULT_CODEX_MODEL}). Options: ${modelList}`; + } + return `Specify which model to use for the review (defaults to ${DEFAULT_CODEX_MODEL})`; +}; + // Tool annotations for MCP 2025-11-25 spec export interface ToolAnnotations { title?: string;