Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions packages/cli/src/nonInteractiveCli.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,38 @@ describe('runNonInteractive', () => {
expect(mockShutdownTelemetry).toHaveBeenCalled();
});

it('should coalesce thought output before content', async () => {
mockConfig.getEphemeralSetting = vi
.fn<(key: string) => boolean | undefined>()
.mockReturnValue(true);

const events: ServerGeminiStreamEvent[] = [
{
type: GeminiEventType.Thought,
value: { subject: 'First', description: '' },
},
{
type: GeminiEventType.Thought,
value: { subject: 'Second', description: '' },
},
{ type: GeminiEventType.Content, value: 'Content' },
];
mockGeminiClient.sendMessageStream.mockReturnValue(
createStreamFromEvents(events),
);

await runNonInteractive({
config: mockConfig,
settings: mockSettings,
input: 'Test input',
prompt_id: 'prompt-id-thought',
});

const writes = processStdoutSpy.mock.calls.map(([value]) => value);
const output = writes.join('');
expect(output).toContain('<think>First Second</think>');
});

it('should handle a single tool call and respond', async () => {
const toolCallEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.ToolCallRequest,
Expand Down Expand Up @@ -726,6 +758,108 @@ describe('runNonInteractive', () => {
expect(processStdoutSpy).toHaveBeenCalledWith('file.txt');
});

// Skipped tests from issue922 branch - thought buffering tests for deduplication
it.skip('should accumulate multiple Thought events and flush once on content boundary', async () => {
const thoughtEvent1: ServerGeminiStreamEvent = {
type: GeminiEventType.Thought,
value: {
subject: 'First',
description: 'thought',
},
};
const thoughtEvent2: ServerGeminiStreamEvent = {
type: GeminiEventType.Thought,
value: {
subject: 'Second',
description: 'thought',
},
};
const contentEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.Content,
value: 'Response text',
};
const finishedEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.Finished,
value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } },
};

mockGeminiClient.sendMessageStream.mockReturnValueOnce(
createStreamFromEvents([
thoughtEvent1,
thoughtEvent2,
contentEvent,
finishedEvent,
]),
);

await runNonInteractive({
config: mockConfig,
settings: mockSettings,
input: 'test query',
prompt_id: 'test-prompt-id',
});

const thinkingOutputs = processStdoutSpy.mock.calls.filter(
([output]: [string]) => output.includes('<think>'),
);

expect(thinkingOutputs).toHaveLength(1);
const thinkingText = thinkingOutputs[0][0];
expect(thinkingText).toContain('First thought');
expect(thinkingText).toContain('Second thought');
});
Comment on lines +762 to +810
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Skipped tests are missing ephemeral setting mock for thinking output.

When these tests are unskipped, they will likely fail because getEphemeralSetting is not mocked to return true for the reasoning setting. The active test at line 204-206 demonstrates the required setup. Additionally, verify that the expected assertion format matches the implementation—the active test expects subjects concatenated ("First Second"), but this test expects "First thought" and "Second thought" which may not match how thoughtText is constructed.

🛠️ Suggested fix when unskipping
-  it.skip('should accumulate multiple Thought events and flush once on content boundary', async () => {
+  it('should accumulate multiple Thought events and flush once on content boundary', async () => {
+    mockConfig.getEphemeralSetting = vi
+      .fn<(key: string) => boolean | undefined>()
+      .mockReturnValue(true);
+
     const thoughtEvent1: ServerGeminiStreamEvent = {

Also verify the expected output format matches the implementation (e.g., "First: thought" vs "First thought").

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
it.skip('should accumulate multiple Thought events and flush once on content boundary', async () => {
const thoughtEvent1: ServerGeminiStreamEvent = {
type: GeminiEventType.Thought,
value: {
subject: 'First',
description: 'thought',
},
};
const thoughtEvent2: ServerGeminiStreamEvent = {
type: GeminiEventType.Thought,
value: {
subject: 'Second',
description: 'thought',
},
};
const contentEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.Content,
value: 'Response text',
};
const finishedEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.Finished,
value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } },
};
mockGeminiClient.sendMessageStream.mockReturnValueOnce(
createStreamFromEvents([
thoughtEvent1,
thoughtEvent2,
contentEvent,
finishedEvent,
]),
);
await runNonInteractive({
config: mockConfig,
settings: mockSettings,
input: 'test query',
prompt_id: 'test-prompt-id',
});
const thinkingOutputs = processStdoutSpy.mock.calls.filter(
([output]: [string]) => output.includes('<think>'),
);
expect(thinkingOutputs).toHaveLength(1);
const thinkingText = thinkingOutputs[0][0];
expect(thinkingText).toContain('First thought');
expect(thinkingText).toContain('Second thought');
});
it('should accumulate multiple Thought events and flush once on content boundary', async () => {
mockConfig.getEphemeralSetting = vi
.fn<(key: string) => boolean | undefined>()
.mockReturnValue(true);
const thoughtEvent1: ServerGeminiStreamEvent = {
type: GeminiEventType.Thought,
value: {
subject: 'First',
description: 'thought',
},
};
const thoughtEvent2: ServerGeminiStreamEvent = {
type: GeminiEventType.Thought,
value: {
subject: 'Second',
description: 'thought',
},
};
const contentEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.Content,
value: 'Response text',
};
const finishedEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.Finished,
value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } },
};
mockGeminiClient.sendMessageStream.mockReturnValueOnce(
createStreamFromEvents([
thoughtEvent1,
thoughtEvent2,
contentEvent,
finishedEvent,
]),
);
await runNonInteractive({
config: mockConfig,
settings: mockSettings,
input: 'test query',
prompt_id: 'test-prompt-id',
});
const thinkingOutputs = processStdoutSpy.mock.calls.filter(
([output]: [string]) => output.includes('<think>'),
);
expect(thinkingOutputs).toHaveLength(1);
const thinkingText = thinkingOutputs[0][0];
expect(thinkingText).toContain('First thought');
expect(thinkingText).toContain('Second thought');
});
🤖 Prompt for AI Agents
In `@packages/cli/src/nonInteractiveCli.test.ts` around lines 762 - 810, The
skipped test "should accumulate multiple Thought events and flush once on
content boundary" is missing a mock for getEphemeralSetting to enable ephemeral
reasoning output and its assertions don't match how thought subjects are
concatenated; before calling runNonInteractive mock getEphemeralSetting to
return true for the reasoning setting (same setup used in the active test) so
thinking output is produced, and update the assertions against processStdoutSpy
to match the actual thoughtText construction used by the code (e.g., check for
the concatenated subject format produced by the Thought events from
GeminiEventType.Thought rather than "First thought"/"Second thought").


it.skip('should NOT emit pyramid-style repeated prefixes in non-interactive CLI', async () => {
const thoughtEvent1: ServerGeminiStreamEvent = {
type: GeminiEventType.Thought,
value: {
subject: 'Analyzing',
description: '',
},
};
const thoughtEvent2: ServerGeminiStreamEvent = {
type: GeminiEventType.Thought,
value: {
subject: 'request',
description: '',
},
};
const contentEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.Content,
value: 'Response',
};
const finishedEvent: ServerGeminiStreamEvent = {
type: GeminiEventType.Finished,
value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } },
};

mockGeminiClient.sendMessageStream.mockReturnValueOnce(
createStreamFromEvents([
thoughtEvent1,
thoughtEvent2,
contentEvent,
finishedEvent,
]),
);

await runNonInteractive({
config: mockConfig,
settings: mockSettings,
input: 'test query',
prompt_id: 'test-prompt-id',
});

const thinkingOutputs = processStdoutSpy.mock.calls.filter(
([output]: [string]) => output.includes('<think>'),
);

expect(thinkingOutputs).toHaveLength(1);
const thinkingText = thinkingOutputs[0][0];
const thoughtCount = (thinkingText.match(/Analyzing/g) || []).length;
expect(thoughtCount).toBe(1);
});
Comment on lines +812 to +860
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Same issue: missing ephemeral setting mock.

This skipped test also needs the ephemeral setting mock when unskipped, consistent with the first skipped test and the active test at line 203.

🤖 Prompt for AI Agents
In `@packages/cli/src/nonInteractiveCli.test.ts` around lines 812 - 860, The test
"should NOT emit pyramid-style repeated prefixes in non-interactive CLI" is
missing the ephemeral setting mock; before calling runNonInteractive, add the
same ephemeral stub used in the other tests by configuring mockSettings (e.g.,
set mockSettings.ephemeral or stub
mockSettings.get('ephemeral')/mockSettings.getSetting to return the same value
used in the active test) so runNonInteractive and its use of mockSettings behave
consistently with the other tests that already include the ephemeral mock.


// Tests from main branch
it('should display a deprecation warning if hasDeprecatedPromptArg is true', async () => {
const events: ServerGeminiStreamEvent[] = [
{ type: GeminiEventType.Content, value: 'Final Answer' },
Expand Down
47 changes: 31 additions & 16 deletions packages/cli/src/nonInteractiveCli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,28 @@ export async function runNonInteractive({
);
}
const functionCalls: ToolCallRequestInfo[] = [];
let thoughtBuffer = '';
// Only emit thinking in plain text mode (not JSON or STREAM_JSON)
// In STREAM_JSON mode, thinking would corrupt the JSON event stream
const includeThinking =
!jsonOutput &&
!streamJsonOutput &&
(typeof config.getEphemeralSetting === 'function'
? config.getEphemeralSetting('reasoning.includeInResponse') !== false
: true);

const flushThoughtBuffer = () => {
if (!includeThinking) {
thoughtBuffer = '';
return;
}
if (!thoughtBuffer.trim()) {
thoughtBuffer = '';
return;
}
process.stdout.write(`<think>${thoughtBuffer.trim()}</think>\n`);
thoughtBuffer = '';
};

const responseStream = geminiClient.sendMessageStream(
currentMessages[0]?.parts || [],
Expand All @@ -291,17 +313,6 @@ export async function runNonInteractive({
}

if (event.type === GeminiEventType.Thought) {
// Output thinking/reasoning content with <think> tags
// Check if reasoning.includeInResponse is enabled
if (jsonOutput) {
continue;
}
const includeThinking =
typeof config.getEphemeralSetting === 'function'
? (config.getEphemeralSetting('reasoning.includeInResponse') ??
true)
: true;

if (includeThinking) {
const thoughtEvent = event as ServerGeminiThoughtEvent;
const thought = thoughtEvent.value;
Expand All @@ -312,6 +323,7 @@ export async function runNonInteractive({
: thought.subject || thought.description || '';

if (thoughtText.trim()) {
// Apply emoji filter if enabled
if (emojiFilter) {
const filterResult = emojiFilter.filterText(thoughtText);
if (filterResult.blocked) {
Expand All @@ -321,19 +333,20 @@ export async function runNonInteractive({
thoughtText = filterResult.filtered;
}
}
process.stdout.write(`<think>${thoughtText}</think>\n`);
// Buffer thoughts to prevent duplicate/pyramid output
thoughtBuffer = thoughtBuffer
? `${thoughtBuffer} ${thoughtText}`
: thoughtText;
}
}
} else if (event.type === GeminiEventType.Content) {
// Apply emoji filtering to content output
// Note: <think> tags are preserved in output to show thinking vs non-thinking content
flushThoughtBuffer();
let outputValue = event.value;

if (emojiFilter) {
const filterResult = emojiFilter.filterStreamChunk(outputValue);

if (filterResult.blocked) {
// In error mode: output error message and continue
if (!jsonOutput) {
process.stderr.write(
'[Error: Response blocked due to emoji detection]\n',
Expand All @@ -347,7 +360,6 @@ export async function runNonInteractive({
? (filterResult.filtered as string)
: '';

// Output system feedback if needed
if (filterResult.systemFeedback) {
if (!jsonOutput) {
process.stderr.write(
Expand All @@ -371,6 +383,7 @@ export async function runNonInteractive({
process.stdout.write(outputValue);
}
} else if (event.type === GeminiEventType.ToolCallRequest) {
flushThoughtBuffer();
const toolCallRequest = event.value;
if (streamFormatter) {
streamFormatter.emitEvent({
Expand Down Expand Up @@ -411,6 +424,8 @@ export async function runNonInteractive({
}
}

flushThoughtBuffer();

const remainingBuffered = emojiFilter?.flushBuffer?.();
if (remainingBuffered) {
if (jsonOutput) {
Expand Down
4 changes: 3 additions & 1 deletion packages/cli/src/providers/aliases/codex.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
"description": "OpenAI Codex (ChatGPT backend with OAuth)",
"ephemeralSettings": {
"context-limit": 262144,
"prompt-caching": "24h"
"prompt-caching": "24h",
"reasoning.effort": "medium",
"reasoning.summary": "auto"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/**
* @license
* Copyright 2025 Vybestack LLC
* SPDX-License-Identifier: Apache-2.0
*
* TDD tests for codex.config reasoning.summary default
* @issue #922 - GPT-5.2-Codex thinking blocks not visible
*/

import { describe, it, expect } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import { fileURLToPath } from 'url';
import stripJsonComments from 'strip-json-comments';

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

describe('codex.config reasoning.summary default @issue:922', () => {
it('should have a codex.config file', () => {
const codexConfigPath = path.join(__dirname, 'aliases', 'codex.config');
expect(fs.existsSync(codexConfigPath)).toBe(true);
});

it('should set reasoning.summary=auto in ephemerals', () => {
// Read the config file directly to avoid vitest module resolution issues
const codexConfigPath = path.join(__dirname, 'aliases', 'codex.config');
const raw = fs.readFileSync(codexConfigPath, 'utf-8');
const config = JSON.parse(stripJsonComments(raw));

expect(config.ephemeralSettings).toBeDefined();
expect(config.ephemeralSettings['reasoning.summary']).toBe('auto');
});

it('should set reasoning.effort in ephemerals (existing behavior)', () => {
// Read the config file directly to avoid vitest module resolution issues
const codexConfigPath = path.join(__dirname, 'aliases', 'codex.config');
const raw = fs.readFileSync(codexConfigPath, 'utf-8');
const config = JSON.parse(stripJsonComments(raw));

expect(config.ephemeralSettings).toBeDefined();
// Codex should have some default effort level
expect(config.ephemeralSettings['reasoning.effort']).toBeDefined();
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/**
* @license
* Copyright 2025 Vybestack LLC
* SPDX-License-Identifier: Apache-2.0
*
* TDD tests for reasoning.summary profile save/load
* @issue #922 - GPT-5.2-Codex thinking blocks not visible
*/

import { describe, it, expect } from 'vitest';
import { PROFILE_EPHEMERAL_KEYS } from './runtimeSettings.js';

describe('reasoning.summary profile save/load @issue:922', () => {
it('should include reasoning.summary in PROFILE_EPHEMERAL_KEYS', () => {
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.summary');
});

it('should include all reasoning.* keys in PROFILE_EPHEMERAL_KEYS', () => {
// Verify all reasoning settings are saveable
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.enabled');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.includeInContext');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.includeInResponse');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.format');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.stripFromContext');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.effort');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.maxTokens');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.summary');
Comment on lines +18 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add reasoning.verbosity to the “all reasoning. keys” assertion.*

The test promises full reasoning coverage but omits reasoning.verbosity, so a regression could slip through unnoticed.

✅ Proposed fix
   expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.maxTokens');
   expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.summary');
+  expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.verbosity');
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
it('should include all reasoning.* keys in PROFILE_EPHEMERAL_KEYS', () => {
// Verify all reasoning settings are saveable
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.enabled');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.includeInContext');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.includeInResponse');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.format');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.stripFromContext');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.effort');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.maxTokens');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.summary');
it('should include all reasoning.* keys in PROFILE_EPHEMERAL_KEYS', () => {
// Verify all reasoning settings are saveable
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.enabled');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.includeInContext');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.includeInResponse');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.format');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.stripFromContext');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.effort');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.maxTokens');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.summary');
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.verbosity');
🤖 Prompt for AI Agents
In `@packages/cli/src/runtime/runtimeSettings.reasoningSummary.test.ts` around
lines 18 - 27, The test checking that PROFILE_EPHEMERAL_KEYS includes all
reasoning.* keys is missing the 'reasoning.verbosity' key; update the test in
runtimeSettings.reasoningSummary.test.ts to add an expectation that
PROFILE_EPHEMERAL_KEYS contains 'reasoning.verbosity' (i.e., add
expect(PROFILE_EPHEMERAL_KEYS).toContain('reasoning.verbosity'); alongside the
other reasoning.* assertions) so the PROFILE_EPHEMERAL_KEYS coverage remains
complete.

});

it('should include text.verbosity in PROFILE_EPHEMERAL_KEYS', () => {
// text.verbosity is for OpenAI Responses API response verbosity control
expect(PROFILE_EPHEMERAL_KEYS).toContain('text.verbosity');
});
});
1 change: 1 addition & 0 deletions packages/cli/src/runtime/runtimeSettings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,7 @@ export function clearActiveModelParam(name: string): void {
settingsService.setProviderSetting(providerName, name, undefined);
}

// Use centralized settings registry for profile-persistable keys
export const PROFILE_EPHEMERAL_KEYS: readonly string[] =
getProfilePersistableKeys();

Expand Down
Loading
Loading