Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/app/api/query/tour/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
export const maxDuration = 300;
import { serializeNodesForQuery, buildTourPrompt } from '@/lib/agents/query';
import type { TourResponse, TourChapter, QuerySerializedNode } from '@/lib/agents/query';
import { extractJsonObject } from '@/lib/utils/json';
import { parseLlmJsonLoose } from '@/lib/llm/parse';

const EMPTY_TOUR: TourResponse = {
chapters: [
Expand Down Expand Up @@ -43,7 +43,7 @@
return chapters.length > 0 ? { chapters } : null;
}

export async function GET(_request: Request): Promise<Response> {

Check warning on line 46 in src/app/api/query/tour/route.ts

View workflow job for this annotation

GitHub Actions / verify

'_request' is defined but never used
const supabase = await createClient();
const { data: { user }, error: authError } = await supabase.auth.getUser();
if (authError || !user) {
Expand All @@ -63,7 +63,7 @@
return Response.json({ tour: profile.guided_tour, generatedAt: profile.guided_tour_generated_at });
}

export async function POST(_request: Request): Promise<Response> {

Check warning on line 66 in src/app/api/query/tour/route.ts

View workflow job for this annotation

GitHub Actions / verify

'_request' is defined but never used
const supabase = await createClient();
const { data: { user }, error: authError } = await supabase.auth.getUser();
if (authError || !user) {
Expand Down Expand Up @@ -103,8 +103,7 @@
}

try {
const extracted = extractJsonObject(llmText);
const parsed = JSON.parse(extracted) as unknown;
const parsed = parseLlmJsonLoose(llmText);
const tour = normalizeTour(parsed);
if (!tour) {
console.error('[tour] Invalid tour structure after normalize. Raw (200):', llmText.slice(0, 200));
Expand Down
6 changes: 3 additions & 3 deletions src/lib/agents/distillation.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { callLLM } from '@/lib/llm';
import { z } from 'zod';
import { getDistillableTypes } from '@/lib/config/captureTypes';
import { extractJsonObject } from '@/lib/utils/json';
import { parseLlmJson } from '@/lib/llm/parse';
import type { SupabaseClient } from '@supabase/supabase-js';

interface NodeSummary {
Expand Down Expand Up @@ -53,7 +53,7 @@ export async function runDistillation(

let groups: Array<{ node_ids: string[]; rationale: string }> = [];
try {
const parsed = clusterSchema.parse(JSON.parse(extractJsonObject(clusterResult.content)));
const parsed = parseLlmJson(clusterResult.content, clusterSchema);
groups = parsed.groups ?? [];
} catch {
return { created: 0, errors: ['Cluster LLM response was not valid JSON or schema'] };
Expand Down Expand Up @@ -85,7 +85,7 @@ export async function runDistillation(
maxTokens: 1024,
});

const synthesis = synthesisSchema.parse(JSON.parse(extractJsonObject(synthResult.content)));
const synthesis = parseLlmJson(synthResult.content, synthesisSchema);

const { error } = await supabase.from('distillation_candidates').insert({
node_ids: groupNodes.map(n => n.id),
Expand Down
31 changes: 16 additions & 15 deletions src/lib/agents/extraction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
getLlmNodeTypeEnum,
getLlmNodeTypeDescriptions,
} from '@/lib/config/captureTypes';
import { extractJsonObject } from '@/lib/utils/json';
import { parseLlmJsonLoose } from '@/lib/llm/parse';

// Computed once at module load from taxonomy config — change captureTypes.ts to update these.
const LLM_NODE_TYPE_ENUM = getLlmNodeTypeEnum();
Expand Down Expand Up @@ -188,12 +188,9 @@ export function buildExtractionPrompt(
}

export function parseExtractionResponse(content: string): LlmExtraction {
const stripped = content.replace(/^```(?:json)?\n?/m, '').replace(/\n?```$/m, '').trim();
const cleaned = extractJsonObject(stripped);

let parsed: unknown;
try {
parsed = JSON.parse(cleaned);
parsed = parseLlmJsonLoose(content);
} catch {
// LLM returned natural language instead of JSON — likely a PDF it cannot read
throw new Error('PDF_UNREADABLE');
Expand Down Expand Up @@ -271,16 +268,18 @@ export function buildMeetingExtractionPrompt(
}

export function parseMeetingExtractionResponse(content: string): MeetingExtraction {
const stripped = content.replace(/^```(?:json)?\n?/m, '').replace(/\n?```$/m, '').trim();
const cleaned = extractJsonObject(stripped);
const parsed = JSON.parse(cleaned);
const parsed = parseLlmJsonLoose(content);
if (typeof parsed !== 'object' || parsed === null) {
throw new Error('Meeting extraction response must be a JSON object');
}
const obj = parsed as Record<string, unknown>;
const required = ['meeting_title', 'meeting_summary', 'extracted_nodes'];
for (const field of required) {
if (!(field in parsed)) {
if (!(field in obj)) {
throw new Error(`Missing required field: ${field}`);
}
}
if (!Array.isArray(parsed.extracted_nodes) || parsed.extracted_nodes.length === 0) {
if (!Array.isArray(obj.extracted_nodes) || obj.extracted_nodes.length === 0) {
throw new Error('extracted_nodes must be a non-empty array');
}
return parsed as MeetingExtraction;
Expand Down Expand Up @@ -349,14 +348,16 @@ export function buildDocumentExtractionPrompt(
}

export function parseDocumentExtractionResponse(content: string): DocumentExtraction {
const stripped = content.replace(/^```(?:json)?\n?/m, '').replace(/\n?```$/m, '').trim();
const cleaned = extractJsonObject(stripped);
const parsed = JSON.parse(cleaned);
const parsed = parseLlmJsonLoose(content);
if (typeof parsed !== 'object' || parsed === null) {
throw new Error('Document extraction response must be a JSON object');
}
const obj = parsed as Record<string, unknown>;
const required = ['document_title', 'document_summary', 'extracted_nodes'];
for (const field of required) {
if (!(field in parsed)) throw new Error(`Missing required field: ${field}`);
if (!(field in obj)) throw new Error(`Missing required field: ${field}`);
}
if (!Array.isArray(parsed.extracted_nodes) || parsed.extracted_nodes.length === 0) {
if (!Array.isArray(obj.extracted_nodes) || obj.extracted_nodes.length === 0) {
throw new Error('extracted_nodes must be a non-empty array');
}
return parsed as DocumentExtraction;
Expand Down
6 changes: 4 additions & 2 deletions src/lib/agents/process.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { Node } from '@/lib/types/nodes';
import { callLLM } from '@/lib/llm';
import { parseLlmJsonLoose } from '@/lib/llm/parse';
import type { GoalContext } from '@/lib/agents/extraction';

export interface SuggestedNodeImpact {
Expand Down Expand Up @@ -59,8 +60,9 @@ Return ONLY valid JSON array:
Only include commitments where the learning is clearly relevant. If a commitment is unaffected, exclude it.`;

function parseJsonResponse<T>(content: string): T {
const cleaned = content.replace(/^```(?:json)?\n?/m, '').replace(/\n?```$/m, '').trim();
return JSON.parse(cleaned) as T;
// Handles both object and array responses (step 3 returns an array), plus
// ```json fences and trailing prose.
return parseLlmJsonLoose(content) as T;
}

export async function suggestAffectedNodes(
Expand Down
10 changes: 3 additions & 7 deletions src/lib/agents/reflection.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { parseLlmJsonLoose } from '@/lib/llm/parse';

// ─── Types ────────────────────────────────────────────────────────────────────

export interface ReflectionContext {
Expand Down Expand Up @@ -175,13 +177,7 @@ const REQUIRED_FIELDS = [
] as const;

export function parseReflectionResponse(content: string): ReflectionReport {
// Strip markdown code fences if present
const cleaned = content
.replace(/^```(?:json)?\n?/m, '')
.replace(/\n?```$/m, '')
.trim();

const parsed: unknown = JSON.parse(cleaned);
const parsed: unknown = parseLlmJsonLoose(content);

if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) {
throw new Error('Reflection response must be a JSON object');
Expand Down
5 changes: 3 additions & 2 deletions src/lib/agents/setup.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { callLLM } from '@/lib/llm';
import { parseLlmJsonLoose } from '@/lib/llm/parse';

const GOAL_SUGGEST_PROMPT = `You are helping a team articulate their strategic goals for a knowledge management system.
The user will describe what they're trying to do in plain language.
Expand Down Expand Up @@ -51,7 +52,7 @@ export async function suggestGoal(userInput: string): Promise<GoalSuggestion> {

let parsed: unknown;
try {
parsed = JSON.parse(response.content);
parsed = parseLlmJsonLoose(response.content);
} catch {
throw new Error('Failed to parse goal suggestion');
}
Expand Down Expand Up @@ -80,7 +81,7 @@ export async function processSeedChat(input: SeedChatInput): Promise<SeedChatRes

let parsed: unknown;
try {
parsed = JSON.parse(response.content);
parsed = parseLlmJsonLoose(response.content);
} catch {
throw new Error('Failed to parse seed chat response');
}
Expand Down
3 changes: 2 additions & 1 deletion src/lib/correction/agent.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { SupabaseClient } from '@supabase/supabase-js';
import { callLLM } from '@/lib/llm';
import { parseLlmJsonLoose } from '@/lib/llm/parse';

const VALID_NODE_TYPES = new Set(['hunch', 'learning', 'commitment', 'signal', 'option', 'test']);

Expand Down Expand Up @@ -60,7 +61,7 @@ export function buildCorrectionPrompt(

export function parseCorrectionActions(rawJson: string): CorrectionResult {
try {
const parsed = JSON.parse(rawJson) as unknown;
const parsed = parseLlmJsonLoose(rawJson) as unknown;
if (typeof parsed !== 'object' || parsed === null) return { reasoning: '', actions: [] };
const obj = parsed as Record<string, unknown>;
const reasoning = typeof obj['reasoning'] === 'string' ? obj['reasoning'] : '';
Expand Down
74 changes: 74 additions & 0 deletions src/lib/llm/__tests__/parse.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import { describe, it, expect } from 'vitest';
import { z } from 'zod';
import { extractJson, parseLlmJson, tryParseLlmJson, parseLlmJsonLoose } from '../parse';

describe('extractJson', () => {
it('returns a bare object unchanged', () => {
expect(extractJson('{"a":1}')).toBe('{"a":1}');
});

it('strips ```json code fences', () => {
expect(JSON.parse(extractJson('```json\n{"a":1}\n```'))).toEqual({ a: 1 });
});

it('strips plain ``` fences and leading prose', () => {
expect(JSON.parse(extractJson('Here you go:\n```\n{"a":1}\n```'))).toEqual({ a: 1 });
});

it('drops trailing commentary after the object', () => {
expect(JSON.parse(extractJson('{"a":1}\n\nHope that helps!'))).toEqual({ a: 1 });
});

it('extracts a top-level array (process.ts step-3 shape)', () => {
expect(JSON.parse(extractJson('```json\n[{"id":1},{"id":2}]\n```'))).toEqual([{ id: 1 }, { id: 2 }]);
});

it('picks whichever of { or [ comes first', () => {
// a brace inside prose before the real array should not win if the array is the payload...
// here the object genuinely comes first, so it wins:
expect(JSON.parse(extractJson('{"wrap":[1,2]}'))).toEqual({ wrap: [1, 2] });
});

it('ignores braces inside strings', () => {
expect(JSON.parse(extractJson('{"text":"a } b { c"}'))).toEqual({ text: 'a } b { c' });
});
});

describe('parseLlmJson', () => {
const schema = z.object({ name: z.string(), n: z.number() });

it('parses + validates a fenced response', () => {
expect(parseLlmJson('```json\n{"name":"x","n":2}\n```', schema)).toEqual({ name: 'x', n: 2 });
});

it('throws on schema mismatch', () => {
expect(() => parseLlmJson('{"name":"x"}', schema)).toThrow();
});

it('throws on invalid JSON', () => {
expect(() => parseLlmJson('not json', schema)).toThrow();
});
});

describe('tryParseLlmJson', () => {
const schema = z.object({ ok: z.boolean() });

it('returns the value on success', () => {
expect(tryParseLlmJson('{"ok":true}', schema)).toEqual({ ok: true });
});

it('returns null on failure instead of throwing', () => {
expect(tryParseLlmJson('garbage', schema)).toBeNull();
expect(tryParseLlmJson('{"ok":"nope"}', schema)).toBeNull();
});
});

describe('parseLlmJsonLoose', () => {
it('parses fenced JSON without a schema', () => {
expect(parseLlmJsonLoose('```json\n{"a":1}\n```')).toEqual({ a: 1 });
});

it('throws on invalid JSON', () => {
expect(() => parseLlmJsonLoose('definitely not json')).toThrow();
});
});
63 changes: 63 additions & 0 deletions src/lib/llm/parse.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import type { z } from 'zod';

/**
* Extracts a JSON value from LLM output, tolerating ```json code fences,
* leading prose, and trailing commentary. Returns the substring spanning the
* first balanced object `{...}` or array `[...]` (whichever appears first).
*
* This is the single seam every agent parses LLM JSON through, replacing the
* six bespoke variants that previously diverged (raw JSON.parse with no fence
* handling, ad-hoc fence regexes, object-only extraction that broke on arrays).
*/
export function extractJson(text: string): string {
const objIdx = text.indexOf('{');
const arrIdx = text.indexOf('[');
if (objIdx === -1 && arrIdx === -1) return text.trim();

let start: number;
let open: string;
let close: string;
if (arrIdx === -1 || (objIdx !== -1 && objIdx < arrIdx)) {
start = objIdx; open = '{'; close = '}';
} else {
start = arrIdx; open = '['; close = ']';
}

let depth = 0;
let inString = false;
let escaped = false;
for (let i = start; i < text.length; i++) {
const c = text[i];
if (escaped) { escaped = false; continue; }
if (c === '\\' && inString) { escaped = true; continue; }
if (c === '"') { inString = !inString; continue; }
if (!inString) {
if (c === open) depth++;
else if (c === close) { depth--; if (depth === 0) return text.slice(start, i + 1); }
}
}
return text.slice(start);
}

/** Parse + validate against a Zod schema. Throws on invalid JSON or schema mismatch. */
export function parseLlmJson<T>(content: string, schema: z.ZodType<T>): T {
return schema.parse(JSON.parse(extractJson(content)));
}

/** Non-throwing variant — returns null on any parse or validation failure. */
export function tryParseLlmJson<T>(content: string, schema: z.ZodType<T>): T | null {
try {
return parseLlmJson(content, schema);
} catch {
return null;
}
}

/**
* Parse without schema validation, for callers that validate downstream or
* accept arbitrary shapes. Throws on invalid JSON. Returns `unknown` — narrow
* or validate before use.
*/
export function parseLlmJsonLoose(content: string): unknown {
return JSON.parse(extractJson(content));
}
3 changes: 2 additions & 1 deletion src/lib/signals/webScanner.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { extractKeywords, filterRelevant } from './relevanceFilter';
import { ingestSignals, type SignalInput } from './signalIngestor';
import { callLLM } from '@/lib/llm';
import { parseLlmJson } from '@/lib/llm/parse';
import { z } from 'zod';

const extractedSignalSchema = z.object({
Expand Down Expand Up @@ -114,7 +115,7 @@ export async function scanWebForTopics(userId: string): Promise<{ created: numbe

let extracted: z.infer<typeof extractedSignalSchema>[] = [];
try {
const parsed = extractionOutputSchema.parse(JSON.parse(extractionResult.content));
const parsed = parseLlmJson(extractionResult.content, extractionOutputSchema);
extracted = parsed.signals ?? [];
} catch {
// non-fatal: malformed or schema-invalid LLM output
Expand Down
Loading