Skip to content

Commit 420cb81

Browse files
authored
feat(model): update default model to codex 5.3 (#110)
* feat(model): update default model to codex 5.3 Signed-off-by: Tommy Nguyen <[email protected]> * fix(codex): fix linting Signed-off-by: Tommy Nguyen <[email protected]> --------- Signed-off-by: Tommy Nguyen <[email protected]>
1 parent 01070fd commit 420cb81

11 files changed

Lines changed: 146 additions & 26 deletions

docs/TODO.md

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Codex MCP Server - TODO
2+
3+
## Features from Codex CLI v0.98.0
4+
5+
These features were introduced/stabilized in Codex CLI v0.98.0 but are not yet implemented in this MCP server.
6+
7+
### High Priority
8+
9+
#### [ ] Steer Mode Support
10+
- **Status**: Stable & enabled by default in Codex CLI v0.98.0
11+
- **Description**: Allow redirecting agents during execution without stopping them
12+
- **CLI Flag**: `--steer` (now default)
13+
- **Implementation Notes**:
14+
- Add `steerMode` parameter to CodexToolSchema
15+
- Pass `--steer` flag to codex exec commands
16+
- Consider whether MCP needs to handle streaming input for steering
17+
- **Reference**: [v0.98.0 Release Notes](https://github.com/openai/codex/releases/tag/rust-v0.98.0)
18+
19+
### Medium Priority
20+
21+
#### [ ] Collaboration Mode
22+
- **Status**: Naming unified in v0.98.0
23+
- **Description**: Multi-agent parallel collaboration support
24+
- **Implementation Notes**:
25+
- Add `collaborationMode` parameter (enum: `none`, `collaborate`)
26+
- Update command flags accordingly
27+
- **Reference**: Collaboration mode naming synced across prompts, tools, and TUI
28+
29+
#### [ ] Enhanced Structured Content
30+
- **Status**: Text + image content items for dynamic tool outputs in v0.98.0
31+
- **Description**: Better support for dynamic tool outputs with mixed content
32+
- **Implementation Notes**:
33+
- Current `structuredContent` support is partial
34+
- May need enhancement to handle text + image content items
35+
- **Reference**: #10567
36+
37+
### Low Priority
38+
39+
#### [ ] Personality Mode
40+
- **Status**: Pragmatic restored as default in v0.98.0
41+
- **Description**: Control Codex's response personality
42+
- **Options**: `pragmatic` (default), `verbose`
43+
- **CLI Config**: `personality = "pragmatic"` or `personality = "verbose"`
44+
- **Implementation Notes**:
45+
- Add `personality` parameter to CodexToolSchema
46+
- Pass via `-c personality="..."`
47+
- **Reference**: #10705
48+
49+
---
50+
51+
## Implemented in v1.3.4+
52+
53+
### ✅ GPT-5.3-Codex Model
54+
- **Status**: Implemented
55+
- **Description**: New default model
56+
- **Changes**:
57+
- Updated `DEFAULT_CODEX_MODEL` constant to `'gpt-5.3-codex'`
58+
- Updated tool definitions to reflect new default
59+
- Single source of truth for model updates
60+
61+
### ✅ Reasoning Effort: 'none' and 'xhigh'
62+
- **Status**: Implemented (commit 448fa3c)
63+
- **Description**: Extended reasoning effort options
64+
- **Changes**:
65+
- Added `'none'` and `'xhigh'` to reasoningEffort enum
66+
- Full range: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`
67+
68+
---
69+
70+
## Future Considerations
71+
72+
### Model Version Management
73+
- Consider adding a `getAvailableModels()` tool to query Codex CLI for available models
74+
- This would make the server more resilient to future model additions
75+
76+
### Configuration File Support
77+
- Codex CLI supports config files (`.codexrc.toml`)
78+
- Consider whether MCP server should expose config file options
79+
80+
### Streaming Support
81+
- Codex CLI supports SSE streaming for responses
82+
- Consider adding streaming support for long-running tasks
83+
84+
---
85+
86+
## References
87+
88+
- [Codex CLI Releases](https://github.com/openai/codex/releases)
89+
- [Codex Changelog](https://developers.openai.com/codex/changelog/)
90+
- [v0.98.0 Release](https://github.com/openai/codex/releases/tag/rust-v0.98.0)

src/__tests__/context-building.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ describe('Context Building Analysis', () => {
6161

6262
// Check what prompt was sent to Codex - should be enhanced but not conversational
6363
const call = mockedExecuteCommand.mock.calls[0];
64-
const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt
64+
const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
6565
expect(sentPrompt).toContain('Previous code context:');
6666
expect(sentPrompt).toContain('Task: Make it more efficient');
6767
expect(sentPrompt).not.toContain('Previous: What is recursion?'); // No conversational format

src/__tests__/default-model.test.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@ describe('Default Model Configuration', () => {
4040
delete process.env.CODEX_MCP_CALLBACK_URI;
4141
});
4242

43-
test('should use gpt-5.2-codex as default model when no model specified', async () => {
43+
test('should use gpt-5.3-codex as default model when no model specified', async () => {
4444
await handler.execute({ prompt: 'Test prompt' });
4545

4646
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
4747
'exec',
4848
'--model',
49-
'gpt-5.2-codex',
49+
'gpt-5.3-codex',
5050
'--skip-git-repo-check',
5151
'Test prompt',
5252
]);
@@ -55,8 +55,8 @@ describe('Default Model Configuration', () => {
5555
test('should include default model in response metadata', async () => {
5656
const result = await handler.execute({ prompt: 'Test prompt' });
5757

58-
expect(result.content[0]._meta?.model).toBe('gpt-5.2-codex');
59-
expect(result.structuredContent?.model).toBe('gpt-5.2-codex');
58+
expect(result.content[0]._meta?.model).toBe('gpt-5.3-codex');
59+
expect(result.structuredContent?.model).toBe('gpt-5.3-codex');
6060
expect(result._meta?.callbackUri).toBeUndefined();
6161
});
6262

@@ -86,7 +86,7 @@ describe('Default Model Configuration', () => {
8686
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
8787
'exec',
8888
'--model',
89-
'gpt-5.2-codex',
89+
'gpt-5.3-codex',
9090
'--skip-git-repo-check',
9191
'Test prompt',
9292
]);
@@ -106,7 +106,7 @@ describe('Default Model Configuration', () => {
106106
'exec',
107107
'--skip-git-repo-check',
108108
'-c',
109-
'model="gpt-5.2-codex"',
109+
'model="gpt-5.3-codex"',
110110
'resume',
111111
'existing-conv-id',
112112
'Resume with default model',
@@ -122,7 +122,7 @@ describe('Default Model Configuration', () => {
122122
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
123123
'exec',
124124
'--model',
125-
'gpt-5.2-codex',
125+
'gpt-5.3-codex',
126126
'-c',
127127
'model_reasoning_effort="high"',
128128
'--skip-git-repo-check',

src/__tests__/edge-cases.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ describe('Edge Cases and Integration Issues', () => {
133133

134134
// Should only use recent turns, not crash with too much context
135135
const call = mockedExecuteCommand.mock.calls[0];
136-
const prompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt
136+
const prompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
137137
expect(typeof prompt).toBe('string');
138138
if (prompt) {
139139
expect(prompt.length).toBeLessThan(5000); // Reasonable limit

src/__tests__/error-scenarios.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ describe('Error Handling Scenarios', () => {
158158
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
159159
'exec',
160160
'--model',
161-
'gpt-5.2-codex',
161+
'gpt-5.3-codex',
162162
'--skip-git-repo-check',
163163
longPrompt,
164164
]);

src/__tests__/index.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ describe('Codex MCP Server', () => {
148148
const result = {
149149
content: [{ type: 'text', text: 'ok', _meta: { threadId: 'th_123' } }],
150150
structuredContent: { threadId: 'th_123' },
151-
_meta: { model: 'gpt-5.2-codex' },
151+
_meta: { model: 'gpt-5.3-codex' },
152152
};
153153

154154
const parsed = CallToolResultSchema.safeParse(result);

src/__tests__/model-selection.test.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ describe('Model Selection and Reasoning Effort', () => {
6363
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
6464
'exec',
6565
'--model',
66-
'gpt-5.2-codex',
66+
'gpt-5.3-codex',
6767
'-c',
6868
'model_reasoning_effort="high"',
6969
'--skip-git-repo-check',
@@ -133,7 +133,7 @@ describe('Model Selection and Reasoning Effort', () => {
133133
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
134134
'exec',
135135
'--model',
136-
'gpt-5.2-codex',
136+
'gpt-5.3-codex',
137137
'-c',
138138
'model_reasoning_effort="minimal"',
139139
'--skip-git-repo-check',
@@ -150,7 +150,7 @@ describe('Model Selection and Reasoning Effort', () => {
150150
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
151151
'exec',
152152
'--model',
153-
'gpt-5.2-codex',
153+
'gpt-5.3-codex',
154154
'-c',
155155
'model_reasoning_effort="none"',
156156
'--skip-git-repo-check',
@@ -167,7 +167,7 @@ describe('Model Selection and Reasoning Effort', () => {
167167
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
168168
'exec',
169169
'--model',
170-
'gpt-5.2-codex',
170+
'gpt-5.3-codex',
171171
'-c',
172172
'model_reasoning_effort="xhigh"',
173173
'--skip-git-repo-check',

src/__tests__/resume-functionality.test.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ describe('Codex Resume Functionality', () => {
5151
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
5252
'exec',
5353
'--model',
54-
'gpt-5.2-codex',
54+
'gpt-5.3-codex',
5555
'--skip-git-repo-check',
5656
'First message',
5757
]);
@@ -156,7 +156,7 @@ describe('Codex Resume Functionality', () => {
156156
'exec',
157157
'--skip-git-repo-check',
158158
'-c',
159-
'model="gpt-5.2-codex"',
159+
'model="gpt-5.3-codex"',
160160
'resume',
161161
'existing-codex-session-id',
162162
'Continue the task',
@@ -182,7 +182,7 @@ describe('Codex Resume Functionality', () => {
182182
expect(mockedExecuteCommand).toHaveBeenCalledWith('codex', [
183183
'exec',
184184
'--model',
185-
'gpt-5.2-codex',
185+
'gpt-5.3-codex',
186186
'--skip-git-repo-check',
187187
'Reset and start new',
188188
]);
@@ -213,7 +213,7 @@ describe('Codex Resume Functionality', () => {
213213

214214
// Should build enhanced prompt since no codex session ID
215215
const call = mockedExecuteCommand.mock.calls[0];
216-
const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.2-codex, --skip-git-repo-check, prompt
216+
const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
217217
expect(sentPrompt).toContain('Context:');
218218
expect(sentPrompt).toContain('Task: Follow up question');
219219
});

src/tools/definitions.ts

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { TOOLS, type ToolDefinition } from '../types.js';
1+
import { TOOLS, getModelDescription, type ToolDefinition } from '../types.js';
22

33
export const toolDefinitions: ToolDefinition[] = [
44
{
@@ -23,8 +23,7 @@ export const toolDefinitions: ToolDefinition[] = [
2323
},
2424
model: {
2525
type: 'string',
26-
description:
27-
'Specify which model to use (defaults to gpt-5.2-codex). Options: gpt-5.2-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5-codex, gpt-4o, gpt-4, o3, o4-mini',
26+
description: getModelDescription('codex'),
2827
},
2928
reasoningEffort: {
3029
type: 'string',
@@ -102,8 +101,7 @@ export const toolDefinitions: ToolDefinition[] = [
102101
},
103102
model: {
104103
type: 'string',
105-
description:
106-
'Specify which model to use for the review (defaults to gpt-5.2-codex)',
104+
description: getModelDescription('review'),
107105
},
108106
workingDirectory: {
109107
type: 'string',

src/tools/handlers.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import {
22
TOOLS,
3+
DEFAULT_CODEX_MODEL,
4+
CODEX_DEFAULT_MODEL_ENV_VAR,
35
type ToolResult,
46
type ToolHandlerContext,
57
type CodexToolArgs,
@@ -83,7 +85,9 @@ export class CodexToolHandler {
8385

8486
// Build command arguments with v0.75.0+ features
8587
const selectedModel =
86-
model || process.env.CODEX_DEFAULT_MODEL || 'gpt-5.2-codex'; // Default to gpt-5.2-codex
88+
model ||
89+
process.env[CODEX_DEFAULT_MODEL_ENV_VAR] ||
90+
DEFAULT_CODEX_MODEL;
8791

8892
const effectiveCallbackUri =
8993
callbackUri || process.env.CODEX_MCP_CALLBACK_URI;
@@ -391,7 +395,9 @@ export class ReviewToolHandler {
391395

392396
// Add model parameter via config
393397
const selectedModel =
394-
model || process.env.CODEX_DEFAULT_MODEL || 'gpt-5.2-codex';
398+
model ||
399+
process.env[CODEX_DEFAULT_MODEL_ENV_VAR] ||
400+
DEFAULT_CODEX_MODEL;
395401
cmdArgs.push('-c', `model="${selectedModel}"`);
396402

397403
cmdArgs.push('review');

0 commit comments

Comments
 (0)