-
Notifications
You must be signed in to change notification settings - Fork 91
fix(openai-responses): emit reasoning blocks from responses stream #1156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9d194bb
b32af10
7ca2ba9
7fed2d2
37318d5
8c191ae
268aa73
75be1cb
508c975
cb389aa
e0b23fc
bd2fb76
ba82d58
8d3401a
c6112a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -200,6 +200,38 @@ describe('runNonInteractive', () => { | |
| expect(mockShutdownTelemetry).toHaveBeenCalled(); | ||
| }); | ||
|
|
||
| it('should coalesce thought output before content', async () => { | ||
| mockConfig.getEphemeralSetting = vi | ||
| .fn<(key: string) => boolean | undefined>() | ||
| .mockReturnValue(true); | ||
|
|
||
| const events: ServerGeminiStreamEvent[] = [ | ||
| { | ||
| type: GeminiEventType.Thought, | ||
| value: { subject: 'First', description: '' }, | ||
| }, | ||
| { | ||
| type: GeminiEventType.Thought, | ||
| value: { subject: 'Second', description: '' }, | ||
| }, | ||
| { type: GeminiEventType.Content, value: 'Content' }, | ||
| ]; | ||
| mockGeminiClient.sendMessageStream.mockReturnValue( | ||
| createStreamFromEvents(events), | ||
| ); | ||
|
|
||
| await runNonInteractive({ | ||
| config: mockConfig, | ||
| settings: mockSettings, | ||
| input: 'Test input', | ||
| prompt_id: 'prompt-id-thought', | ||
| }); | ||
|
|
||
| const writes = processStdoutSpy.mock.calls.map(([value]) => value); | ||
| const output = writes.join(''); | ||
| expect(output).toContain('<think>First Second</think>'); | ||
| }); | ||
|
|
||
| it('should handle a single tool call and respond', async () => { | ||
| const toolCallEvent: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.ToolCallRequest, | ||
|
|
@@ -726,6 +758,108 @@ describe('runNonInteractive', () => { | |
| expect(processStdoutSpy).toHaveBeenCalledWith('file.txt'); | ||
| }); | ||
|
|
||
| // Skipped tests from issue922 branch - thought buffering tests for deduplication | ||
| it.skip('should accumulate multiple Thought events and flush once on content boundary', async () => { | ||
| const thoughtEvent1: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.Thought, | ||
| value: { | ||
| subject: 'First', | ||
| description: 'thought', | ||
| }, | ||
| }; | ||
| const thoughtEvent2: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.Thought, | ||
| value: { | ||
| subject: 'Second', | ||
| description: 'thought', | ||
| }, | ||
| }; | ||
| const contentEvent: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.Content, | ||
| value: 'Response text', | ||
| }; | ||
| const finishedEvent: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.Finished, | ||
| value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, | ||
| }; | ||
|
|
||
| mockGeminiClient.sendMessageStream.mockReturnValueOnce( | ||
| createStreamFromEvents([ | ||
| thoughtEvent1, | ||
| thoughtEvent2, | ||
| contentEvent, | ||
| finishedEvent, | ||
| ]), | ||
| ); | ||
|
|
||
| await runNonInteractive({ | ||
| config: mockConfig, | ||
| settings: mockSettings, | ||
| input: 'test query', | ||
| prompt_id: 'test-prompt-id', | ||
| }); | ||
|
|
||
| const thinkingOutputs = processStdoutSpy.mock.calls.filter( | ||
| ([output]: [string]) => output.includes('<think>'), | ||
| ); | ||
|
|
||
| expect(thinkingOutputs).toHaveLength(1); | ||
| const thinkingText = thinkingOutputs[0][0]; | ||
| expect(thinkingText).toContain('First thought'); | ||
| expect(thinkingText).toContain('Second thought'); | ||
| }); | ||
|
|
||
| it.skip('should NOT emit pyramid-style repeated prefixes in non-interactive CLI', async () => { | ||
| const thoughtEvent1: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.Thought, | ||
| value: { | ||
| subject: 'Analyzing', | ||
| description: '', | ||
| }, | ||
| }; | ||
| const thoughtEvent2: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.Thought, | ||
| value: { | ||
| subject: 'request', | ||
| description: '', | ||
| }, | ||
| }; | ||
| const contentEvent: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.Content, | ||
| value: 'Response', | ||
| }; | ||
| const finishedEvent: ServerGeminiStreamEvent = { | ||
| type: GeminiEventType.Finished, | ||
| value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, | ||
| }; | ||
|
|
||
| mockGeminiClient.sendMessageStream.mockReturnValueOnce( | ||
| createStreamFromEvents([ | ||
| thoughtEvent1, | ||
| thoughtEvent2, | ||
| contentEvent, | ||
| finishedEvent, | ||
| ]), | ||
| ); | ||
|
|
||
| await runNonInteractive({ | ||
| config: mockConfig, | ||
| settings: mockSettings, | ||
| input: 'test query', | ||
| prompt_id: 'test-prompt-id', | ||
| }); | ||
|
|
||
| const thinkingOutputs = processStdoutSpy.mock.calls.filter( | ||
| ([output]: [string]) => output.includes('<think>'), | ||
| ); | ||
|
|
||
| expect(thinkingOutputs).toHaveLength(1); | ||
| const thinkingText = thinkingOutputs[0][0]; | ||
| const thoughtCount = (thinkingText.match(/Analyzing/g) || []).length; | ||
| expect(thoughtCount).toBe(1); | ||
| }); | ||
|
Comment on lines
+812
to
+860
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same issue: missing ephemeral setting mock. This skipped test also needs the ephemeral setting mock when unskipped, consistent with the first skipped test and the active test at line 203. 🤖 Prompt for AI Agents |
||
|
|
||
| // Tests from main branch | ||
| it('should display a deprecation warning if hasDeprecatedPromptArg is true', async () => { | ||
| const events: ServerGeminiStreamEvent[] = [ | ||
| { type: GeminiEventType.Content, value: 'Final Answer' }, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| /** | ||
| * @license | ||
| * Copyright 2025 Vybestack LLC | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| * | ||
| * TDD tests for codex.config reasoning.summary default | ||
| * @issue #922 - GPT-5.2-Codex thinking blocks not visible | ||
| */ | ||
|
|
||
| import { describe, it, expect } from 'vitest'; | ||
| import * as fs from 'fs'; | ||
| import * as path from 'path'; | ||
| import { fileURLToPath } from 'url'; | ||
| import stripJsonComments from 'strip-json-comments'; | ||
|
|
||
| const __filename = fileURLToPath(import.meta.url); | ||
| const __dirname = path.dirname(__filename); | ||
|
|
||
| describe('codex.config reasoning.summary default @issue:922', () => { | ||
| it('should have a codex.config file', () => { | ||
| const codexConfigPath = path.join(__dirname, 'aliases', 'codex.config'); | ||
| expect(fs.existsSync(codexConfigPath)).toBe(true); | ||
| }); | ||
|
|
||
| it('should set reasoning.summary=auto in ephemerals', () => { | ||
| // Read the config file directly to avoid vitest module resolution issues | ||
| const codexConfigPath = path.join(__dirname, 'aliases', 'codex.config'); | ||
| const raw = fs.readFileSync(codexConfigPath, 'utf-8'); | ||
| const config = JSON.parse(stripJsonComments(raw)); | ||
|
|
||
| expect(config.ephemeralSettings).toBeDefined(); | ||
| expect(config.ephemeralSettings['reasoning.summary']).toBe('auto'); | ||
| }); | ||
|
|
||
| it('should set reasoning.effort in ephemerals (existing behavior)', () => { | ||
| // Read the config file directly to avoid vitest module resolution issues | ||
| const codexConfigPath = path.join(__dirname, 'aliases', 'codex.config'); | ||
| const raw = fs.readFileSync(codexConfigPath, 'utf-8'); | ||
| const config = JSON.parse(stripJsonComments(raw)); | ||
|
|
||
| expect(config.ephemeralSettings).toBeDefined(); | ||
| // Codex should have some default effort level | ||
| expect(config.ephemeralSettings['reasoning.effort']).toBeDefined(); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,34 @@ | ||||||||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||||||||
| * @license | ||||||||||||||||||||||||||||||||||||||||||||
| * Copyright 2025 Vybestack LLC | ||||||||||||||||||||||||||||||||||||||||||||
| * SPDX-License-Identifier: Apache-2.0 | ||||||||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||||||||
| * TDD tests for reasoning.summary profile save/load | ||||||||||||||||||||||||||||||||||||||||||||
| * @issue #922 - GPT-5.2-Codex thinking blocks not visible | ||||||||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| import { describe, it, expect } from 'vitest'; | ||||||||||||||||||||||||||||||||||||||||||||
| import { PROFILE_EPHEMERAL_KEYS } from './runtimeSettings.js'; | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| describe('reasoning.summary profile save/load @issue:922', () => { | ||||||||||||||||||||||||||||||||||||||||||||
| it('should include reasoning.summary in PROFILE_EPHEMERAL_KEYS', () => { | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.summary'); | ||||||||||||||||||||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| it('should include all reasoning.* keys in PROFILE_EPHEMERAL_KEYS', () => { | ||||||||||||||||||||||||||||||||||||||||||||
| // Verify all reasoning settings are saveable | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.enabled'); | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.includeInContext'); | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.includeInResponse'); | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.format'); | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.stripFromContext'); | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.effort'); | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.maxTokens'); | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.summary'); | ||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+18
to
+27
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add reasoning.verbosity to the “all reasoning. keys” assertion.* The test promises full reasoning coverage but omits ✅ Proposed fix expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.maxTokens');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.summary');
+ expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.verbosity');📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| it('should include text.verbosity in PROFILE_EPHEMERAL_KEYS', () => { | ||||||||||||||||||||||||||||||||||||||||||||
| // text.verbosity is for OpenAI Responses API response verbosity control | ||||||||||||||||||||||||||||||||||||||||||||
| expect(PROFILE_EPHEMERAL_KEYS).toContain('text.verbosity'); | ||||||||||||||||||||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Skipped tests are missing ephemeral setting mock for thinking output.
When these tests are unskipped, they will likely fail because
getEphemeralSettingis not mocked to returntruefor the reasoning setting. The active test at line 204-206 demonstrates the required setup. Additionally, verify that the expected assertion format matches the implementation—the active test expects subjects concatenated ("First Second"), but this test expects"First thought"and"Second thought"which may not match howthoughtTextis constructed.🛠️ Suggested fix when unskipping
Also verify the expected output format matches the implementation (e.g.,
"First: thought"vs"First thought").📝 Committable suggestion
🤖 Prompt for AI Agents