From d71b83ae0d7ed894f5ea63f9e3f7cc937b357298 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 15:29:32 +0000 Subject: [PATCH 01/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 255 +++++++++++++++++++++++++++--------------- src/logic/cache.ts | 127 +++++++++++++-------- src/logic/rank.ts | 220 ++++++++++++++++++++++-------------- src/logic/synth.ts | 270 ++++++++++++++++++++++++++++++--------------- src/logic/types.ts | 11 ++ 5 files changed, 577 insertions(+), 306 deletions(-) create mode 100644 src/logic/types.ts diff --git a/src/logic/brave.ts b/src/logic/brave.ts index 69b4c70..d0102e3 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,127 +1,206 @@ -/** - * Brave Search API client. - * Wraps the Brave Web Search API and normalizes results. - */ - -export interface SearchResult { - title: string; - url: string; - snippet: string; - published?: string; - score?: number; -} +import type { Freshness, SearchResult } from "./types"; export interface BraveSearchOptions { - freshness?: "day" | "week" | "month"; + freshness?: Freshness; count?: number; - type?: "web" | "news"; + offset?: number; + signal?: AbortSignal; } -export class BraveApiError extends Error { - constructor( - public statusCode: number, - message: string, - ) { +export class BraveError extends Error { + status?: number; + + constructor(message: string, status?: number) { super(message); + this.name = "BraveError"; + this.status = status; + } +} + +export class BraveApiError extends BraveError { + constructor(message: string, status?: number) { + super(message, status); this.name = "BraveApiError"; } } export class BraveRateLimitError extends BraveApiError { - constructor() { - super(429, "Brave API rate limit exceeded"); + retryAfterSeconds?: number; + + constructor(message: string, retryAfterSeconds?: number) { + super(message, 429); this.name = "BraveRateLimitError"; + this.retryAfterSeconds = retryAfterSeconds; } } -export class BraveAuthError extends BraveApiError { - constructor() { - super(401, "Invalid Brave API key"); - this.name = "BraveAuthError"; +const FRESHNESS_MAP: Record = { + day: "pd", + week: "pw", + month: "pm", +}; + +const DEFAULT_COUNT = 10; +const MAX_COUNT = 20; + +function toStringValue(value: unknown): string { + return typeof value === "string" ? value.trim() : ""; +} + +function extractDomain(url: string): string | null { + try { + const parsed = new URL(url); + return parsed.hostname.toLowerCase(); + } catch { + return null; } } -const BRAVE_API_BASE = "https://api.search.brave.com/res/v1"; +function parsePublishedAt(value: unknown): string | null { + if (typeof value === "string") { + const trimmed = value.trim(); + if (!trimmed) return null; + const parsed = Date.parse(trimmed); + return Number.isNaN(parsed) ? null : new Date(parsed).toISOString(); + } -export async function braveSearch( - query: string, - opts?: BraveSearchOptions, -): Promise { - const apiKey = process.env.BRAVE_API_KEY; - if (!apiKey) { - throw new BraveAuthError(); + if (typeof value === "number") { + if (!Number.isFinite(value)) return null; + const asMs = value > 10_000_000_000 ? value : value * 1000; + const date = new Date(asMs); + return Number.isNaN(date.getTime()) ? null : date.toISOString(); } - const count = opts?.count ?? 10; - const searchType = opts?.type ?? "web"; - const endpoint = searchType === "news" ? "news/search" : "web/search"; + return null; +} + +function normalizeCount(count?: number): number { + if (typeof count !== "number" || !Number.isFinite(count)) return DEFAULT_COUNT; + if (count < 1) return 1; + if (count > MAX_COUNT) return MAX_COUNT; + return Math.floor(count); +} + +function normalizeOffset(offset?: number): number { + if (typeof offset !== "number" || !Number.isFinite(offset)) return 0; + return Math.max(0, Math.floor(offset)); +} + +function normalizeBraveResult(rawItem: unknown): SearchResult | null { + if (!rawItem || typeof rawItem !== "object") return null; + + const item = rawItem as Record; + const title = toStringValue(item.title); + const url = toStringValue(item.url); + const snippet = toStringValue(item.description) || toStringValue(item.snippet); + + if (!title || !url) return null; - const params = new URLSearchParams({ - q: query, - count: String(count), - }); + const metaUrl = item.meta_url && typeof item.meta_url === "object" + ? (item.meta_url as Record) + : undefined; - if (opts?.freshness) { - params.set("freshness", opts.freshness); + const domainFromMeta = toStringValue(metaUrl?.hostname); + const domain = (domainFromMeta || extractDomain(url) || "").toLowerCase(); + + if (!domain) return null; + + const publishedAt = parsePublishedAt(item.page_age ?? item.published ?? item.age); + + return { + title, + url, + snippet, + domain, + source: "brave", + publishedAt, + }; +} + +export function normaliseBraveResponse(payload: unknown): SearchResult[] { + const root = payload as { web?: { results?: unknown } }; + const results = root?.web?.results; + + if (!Array.isArray(results)) return []; + + const normalized: SearchResult[] = []; + for (const item of results) { + const parsed = normalizeBraveResult(item); + if (parsed) normalized.push(parsed); } + return normalized; +} - const url = `${BRAVE_API_BASE}/${endpoint}?${params}`; +export async function searchBrave( + query: string, + options: BraveSearchOptions = {}, +): Promise { + const trimmedQuery = query.trim(); + if (!trimmedQuery) return []; - const response = await fetch(url, { - headers: { - Accept: "application/json", - "Accept-Encoding": "gzip", - "X-Subscription-Token": apiKey, - }, - }); + const apiKey = process.env.BRAVE_API_KEY?.trim(); + const endpoint = process.env.BRAVE_API_URL?.trim(); - if (response.status === 429) throw new BraveRateLimitError(); - if (response.status === 401) throw new BraveAuthError(); - if (!response.ok) { - throw new BraveApiError( - response.status, - `Brave API error: ${response.status} ${response.statusText}`, - ); + if (!apiKey) { + throw new BraveApiError("BRAVE_API_KEY is not set"); + } + + if (!endpoint) { + throw new BraveApiError("BRAVE_API_URL is not set"); } - const body = await response.json(); + const params = new URLSearchParams(); + params.set("q", trimmedQuery); + params.set("count", String(normalizeCount(options.count))); + params.set("offset", String(normalizeOffset(options.offset))); - if (searchType === "news") { - return normalizeNewsResults(body); + if (options.freshness) { + params.set("freshness", FRESHNESS_MAP[options.freshness]); } - return normalizeWebResults(body); -} -function normalizeWebResults(body: any): SearchResult[] { - const results: SearchResult[] = []; - const webResults = body?.web?.results ?? []; - - for (const r of webResults) { - results.push({ - title: r.title ?? "", - url: r.url ?? "", - snippet: r.description ?? "", - published: r.page_age ?? undefined, - score: r.relevance_score ?? undefined, + const requestUrl = `${endpoint}${endpoint.includes("?") ? "&" : "?"}${params.toString()}`; + + let response: Response; + try { + response = await fetch(requestUrl, { + method: "GET", + headers: { + Accept: "application/json", + "X-Subscription-Token": apiKey, + }, + signal: options.signal, }); + } catch (error) { + const message = error instanceof Error ? error.message : "Unknown network error"; + throw new BraveApiError(`Failed to call Brave API: ${message}`); } - return results; -} + if (response.status === 429) { + const retryAfterHeader = response.headers.get("Retry-After"); + const retryAfterSeconds = retryAfterHeader ? Number.parseInt(retryAfterHeader, 10) : undefined; + throw new BraveRateLimitError("Brave API rate limit exceeded", retryAfterSeconds); + } -function normalizeNewsResults(body: any): SearchResult[] { - const results: SearchResult[] = []; - const newsResults = body?.results ?? []; - - for (const r of newsResults) { - results.push({ - title: r.title ?? "", - url: r.url ?? "", - snippet: r.description ?? "", - published: r.age ?? undefined, - score: r.relevance_score ?? undefined, - }); + if (!response.ok) { + let body = ""; + try { + body = await response.text(); + } catch { + body = ""; + } + const suffix = body ? `: ${body}` : ""; + throw new BraveApiError(`Brave API request failed (${response.status})${suffix}`, response.status); } - return results; + let payload: unknown; + try { + payload = await response.json(); + } catch { + throw new BraveApiError("Brave API returned invalid JSON", response.status); + } + + return normaliseBraveResponse(payload); } + +export type { SearchResult, Freshness } from "./types"; +export default searchBrave; \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index cee17f5..6f41ed3 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -1,83 +1,95 @@ -/** - * In-memory cache with TTL. - * Reduces Brave API calls on repeated queries. - */ - -interface CacheEntry { - value: T; - expiresAt: number; -} +import { createHash } from "node:crypto"; export interface CacheStats { hits: number; misses: number; - size: number; hitRate: number; + size: number; +} + +interface CacheEntry { + value: T; + expiresAt: number; } -export class Cache { - private store = new Map>(); +function getDefaultTtlSeconds(): number { + const raw = process.env.CACHE_TTL_SECONDS; + const parsed = raw ? Number(raw) : 300; + if (!Number.isFinite(parsed) || parsed <= 0) return 300; + return parsed; +} + +export function normalizeQuery(query: string): string { + return query.trim().toLowerCase().replace(/\s+/g, " "); +} + +export function hashQuery(normalizedQuery: string): string { + return createHash("sha256").update(normalizedQuery).digest("hex"); +} + +export function getCacheKey(query: string): string { + return hashQuery(normalizeQuery(query)); +} + +export class InMemoryCache { + private readonly store = new Map(); + private readonly defaultTtlSeconds: number; private hits = 0; private misses = 0; - private ttlMs: number; - constructor(ttlSeconds?: number) { - this.ttlMs = (ttlSeconds ?? Number(process.env.CACHE_TTL_SECONDS) ?? 300) * 1000; + constructor(defaultTtlSeconds = getDefaultTtlSeconds()) { + this.defaultTtlSeconds = defaultTtlSeconds; } - /** - * Normalize a query string into a stable cache key. - */ - static normalizeKey(query: string, params?: Record): string { - const normalized = query.toLowerCase().trim().replace(/\s+/g, " "); - if (!params || Object.keys(params).length === 0) return normalized; - const sorted = Object.entries(params) - .sort(([a], [b]) => a.localeCompare(b)) - .map(([k, v]) => `${k}=${v}`) - .join("&"); - return `${normalized}|${sorted}`; + private purgeExpired(now = Date.now()): void { + for (const [key, entry] of this.store.entries()) { + if (entry.expiresAt <= now) { + this.store.delete(key); + } + } } - get(key: string): { value: T; stale: boolean } | null { + get(query: string): T | undefined { + const key = getCacheKey(query); const entry = this.store.get(key); + if (!entry) { - this.misses++; - return null; + this.misses += 1; + return undefined; } - const now = Date.now(); - if (now > entry.expiresAt) { + if (entry.expiresAt <= Date.now()) { this.store.delete(key); - this.misses++; - return null; + this.misses += 1; + return undefined; } - this.hits++; - return { value: entry.value, stale: false }; + this.hits += 1; + return entry.value as T; } - set(key: string, value: T): void { - this.store.set(key, { - value, - expiresAt: Date.now() + this.ttlMs, - }); + set(query: string, value: T, ttlSeconds?: number): void { + const resolvedTtlSeconds = + typeof ttlSeconds === "number" && Number.isFinite(ttlSeconds) && ttlSeconds > 0 + ? ttlSeconds + : this.defaultTtlSeconds; + + const expiresAt = Date.now() + resolvedTtlSeconds * 1000; + const key = getCacheKey(query); + + this.store.set(key, { value, expiresAt }); + this.purgeExpired(); } stats(): CacheStats { - // Clean expired entries - const now = Date.now(); - for (const [key, entry] of this.store) { - if (now > entry.expiresAt) { - this.store.delete(key); - } - } - + this.purgeExpired(); const total = this.hits + this.misses; + return { hits: this.hits, misses: this.misses, + hitRate: total === 0 ? 0 : this.hits / total, size: this.store.size, - hitRate: total > 0 ? this.hits / total : 0, }; } @@ -87,3 +99,20 @@ export class Cache { this.misses = 0; } } + +const singletonCache = new InMemoryCache(); + +export function get(query: string): T | undefined { + return singletonCache.get(query); +} + +export function set(query: string, value: T, ttlSeconds?: number): void { + singletonCache.set(query, value, ttlSeconds); +} + +export function stats(): CacheStats { + return singletonCache.stats(); +} + +export const cache = singletonCache; +export default singletonCache; \ No newline at end of file diff --git a/src/logic/rank.ts b/src/logic/rank.ts index 371b20c..389b9d3 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,107 +1,163 @@ -/** - * Source ranking and deduplication. - * Deduplicates by domain, boosts recency, filters low quality. - */ - -import type { SearchResult } from "./brave"; - -const SUSPICIOUS_DOMAINS = new Set([ - "pinterest.com", - "quora.com", - "slideshare.net", - "scribd.com", -]); +import type { SearchResult } from "./types"; + +export interface RankOptions { + maxPerDomain?: number; + minSnippetLength?: number; + minTitleLength?: number; + now?: Date; +} -const MAX_PER_DOMAIN = 2; +const TRACKING_PARAM_EXACT = new Set([ + "fbclid", + "gclid", + "ref", + "source", + "mc_cid", + "mc_eid", +]); -function extractDomain(url: string): string { +function extractDomain(url: string): string | null { try { - const hostname = new URL(url).hostname; - // Strip www. - return hostname.replace(/^www\./, ""); + const parsed = new URL(url); + return parsed.hostname.toLowerCase(); } catch { - return url; + return null; } } -function recencyScore(published?: string): number { - if (!published) return 0; - +function canonicalizeUrl(url: string): string | null { try { - const pubDate = new Date(published); - const now = new Date(); - const ageMs = now.getTime() - pubDate.getTime(); - const ageDays = ageMs / (1000 * 60 * 60 * 24); + const parsed = new URL(url); + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { + return null; + } + + parsed.hash = ""; + + const keys = [...parsed.searchParams.keys()]; + for (const key of keys) { + const lower = key.toLowerCase(); + if (lower.startsWith("utm_") || TRACKING_PARAM_EXACT.has(lower)) { + parsed.searchParams.delete(key); + } + } + + parsed.pathname = parsed.pathname.replace(/\/+$/, "") || "/"; - // More recent = higher score (max 1.0 for today, decays over 90 days) - return Math.max(0, 1 - ageDays / 90); + const query = parsed.searchParams.toString(); + return `${parsed.protocol}//${parsed.host}${parsed.pathname}${query ? `?${query}` : ""}`; } catch { - return 0; + return null; } } +function toTimestamp(value?: string | null): number | null { + if (!value) return null; + const parsed = Date.parse(value); + return Number.isNaN(parsed) ? null : parsed; +} + +function recencyBoost(publishedAt: string | null | undefined, now: Date): number { + const ts = toTimestamp(publishedAt); + if (ts === null) return 0; + + const ageMs = Math.max(0, now.getTime() - ts); + const days = ageMs / (1000 * 60 * 60 * 24); + + if (days <= 1) return 0.35; + if (days <= 7) return 0.25; + if (days <= 30) return 0.15; + if (days <= 90) return 0.08; + if (days <= 365) return 0.03; + return 0; +} + function qualityScore(result: SearchResult): number { - let score = 0; + const titleLen = result.title.trim().length; + const snippetLen = result.snippet.trim().length; - // Has snippet (essential) - if (result.snippet && result.snippet.length > 20) { - score += 0.4; - } else { - return 0; // No snippet = filtered out - } + const titleScore = Math.min(120, titleLen) / 120 * 0.3; + const snippetScore = Math.min(320, snippetLen) / 320 * 0.45; + const httpsBoost = result.url.startsWith("https://") ? 0.05 : 0; + const authorityBoost = /\.(gov|edu)$/.test(result.domain) ? 0.05 : 0; - // Snippet length bonus - score += Math.min(result.snippet.length / 500, 0.2); + return titleScore + snippetScore + httpsBoost + authorityBoost; +} - // Has title - if (result.title && result.title.length > 5) { - score += 0.1; - } +function isLowQuality( + result: SearchResult, + minTitleLength: number, + minSnippetLength: number, +): boolean { + if (!result.title || !result.url) return true; + if (result.title.trim().length < minTitleLength) return true; + if (result.snippet.trim().length < minSnippetLength) return true; + return canonicalizeUrl(result.url) === null; +} - // Recency - score += recencyScore(result.published) * 0.2; +function pickPreferred(existing: SearchResult, incoming: SearchResult): SearchResult { + const existingTs = toTimestamp(existing.publishedAt ?? null) ?? 0; + const incomingTs = toTimestamp(incoming.publishedAt ?? null) ?? 0; - // Existing relevance score from Brave - if (result.score) { - score += Math.min(result.score, 0.1); - } + if (incomingTs > existingTs) return incoming; + if (incomingTs < existingTs) return existing; - return score; + return incoming.snippet.length > existing.snippet.length ? incoming : existing; } -export function rank( - results: SearchResult[], - topN?: number, -): SearchResult[] { - const n = topN ?? 10; - - // Filter suspicious domains and low quality - const filtered = results.filter((r) => { - const domain = extractDomain(r.url); - if (SUSPICIOUS_DOMAINS.has(domain)) return false; - if (!r.snippet || r.snippet.length < 20) return false; - return true; - }); - - // Score and sort - const scored = filtered.map((r) => ({ - result: r, - score: qualityScore(r), - })); - scored.sort((a, b) => b.score - a.score); - - // Deduplicate by domain (max 2 per domain) - const domainCount = new Map(); - const deduped: SearchResult[] = []; - - for (const { result } of scored) { - const domain = extractDomain(result.url); - const count = domainCount.get(domain) ?? 0; - if (count >= MAX_PER_DOMAIN) continue; - domainCount.set(domain, count + 1); - deduped.push(result); - if (deduped.length >= n) break; +export function rankResults(results: SearchResult[], options: RankOptions = {}): SearchResult[] { + if (!Array.isArray(results) || results.length === 0) return []; + + const now = options.now ?? new Date(); + const maxPerDomain = options.maxPerDomain ?? 2; + const minSnippetLength = options.minSnippetLength ?? 40; + const minTitleLength = options.minTitleLength ?? 8; + + const byCanonicalUrl = new Map(); + + for (const result of results) { + if (isLowQuality(result, minTitleLength, minSnippetLength)) continue; + + const canonicalUrl = canonicalizeUrl(result.url); + if (!canonicalUrl) continue; + + const domain = (result.domain || extractDomain(canonicalUrl) || "").toLowerCase(); + if (!domain) continue; + + const normalized: SearchResult = { + ...result, + url: canonicalUrl, + domain, + }; + + const existing = byCanonicalUrl.get(canonicalUrl); + if (!existing) { + byCanonicalUrl.set(canonicalUrl, normalized); + } else { + byCanonicalUrl.set(canonicalUrl, pickPreferred(existing, normalized)); + } } - return deduped; + const scored = [...byCanonicalUrl.values()] + .map((result) => ({ + ...result, + score: qualityScore(result) + recencyBoost(result.publishedAt, now), + })) + .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)); + + const domainCounts = new Map(); + const ranked: SearchResult[] = []; + + for (const result of scored) { + const count = domainCounts.get(result.domain) ?? 0; + if (count >= maxPerDomain) continue; + domainCounts.set(result.domain, count + 1); + ranked.push(result); + } + + return ranked; } + +export { canonicalizeUrl }; +export type { SearchResult } from "./types"; +export default rankResults; \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index 63e006f..6e15cb9 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,118 +1,214 @@ -/** - * GPT-4o-mini synthesis layer. - * Takes query + search results, produces a concise answer with confidence scoring. - */ +import type { SearchResult } from "./types"; -import type { SearchResult } from "./brave"; +export interface SynthTokens { + in: number; + out: number; +} export interface SynthResult { answer: string; confidence: number; - tokens: { in: number; out: number }; + tokens: SynthTokens; model: string; } -const SYSTEM_PROMPT = `You are a search synthesis engine for AI agents. Given a query and search results, produce a concise, factual answer. - -Rules: -- Be direct and factual. No filler, no hedging. -- Cite information from the provided sources. -- If sources disagree, note the disagreement. -- If sources are insufficient, say so clearly. -- Keep answers under 300 words unless the query demands more. -- Use ISO 8601 dates when referencing time.`; - -function buildUserPrompt(query: string, sources: SearchResult[]): string { - const sourceBlock = sources - .map( - (s, i) => - `[${i + 1}] ${s.title}\n${s.url}\n${s.snippet}${s.published ? `\nPublished: ${s.published}` : ""}`, - ) - .join("\n\n"); - - return `Query: ${query}\n\nSources:\n${sourceBlock}\n\nSynthesize an answer from these sources.`; +export interface SynthOptions { + model?: string; + temperature?: number; + maxInputResults?: number; + signal?: AbortSignal; + endpoint?: string; } -export function scoreConfidence(sources: SearchResult[]): number { - if (sources.length === 0) return 0; - - let score = 0; - - // Base score from number of sources (max 0.4) - score += Math.min(sources.length / 10, 0.4); - - // Snippet quality (max 0.3) - const avgSnippetLen = - sources.reduce((sum, s) => sum + (s.snippet?.length ?? 0), 0) / - sources.length; - score += Math.min(avgSnippetLen / 500, 0.3); - - // Source agreement - check overlap in snippets (max 0.2) - if (sources.length >= 2) { - const words0 = new Set( - (sources[0].snippet ?? "").toLowerCase().split(/\s+/), - ); - const words1 = new Set( - (sources[1].snippet ?? "").toLowerCase().split(/\s+/), - ); - const overlap = [...words0].filter((w) => words1.has(w)).length; - const overlapRatio = overlap / Math.max(words0.size, 1); - score += Math.min(overlapRatio, 0.2); +export class SynthError extends Error { + status?: number; + + constructor(message: string, status?: number) { + super(message); + this.name = "SynthError"; + this.status = status; } +} + +const DEFAULT_MODEL = "gpt-4o-mini"; + +const SYSTEM_PROMPT = [ + "You are Queryx synthesis engine.", + "Return strict JSON only with shape:", + '{"answer":"string","confidence":number}.', + "Make answer concise and directly useful for agent consumers.", + "Use only provided sources, avoid speculation, no markdown.", +].join(" "); + +function clamp01(value: number): number { + if (!Number.isFinite(value)) return 0; + if (value < 0) return 0; + if (value > 1) return 1; + return value; +} - // Recency bonus (max 0.1) - const hasPublished = sources.some((s) => s.published); - if (hasPublished) score += 0.1; +function estimateTokens(text: string): number { + const normalized = text.trim(); + if (!normalized) return 0; + return Math.max(1, Math.ceil(normalized.length / 4)); +} - return Math.min(Math.max(score, 0), 1); +function safeJsonParse(value: string): T | null { + try { + return JSON.parse(value) as T; + } catch { + return null; + } } -export async function synthesise( +function heuristicConfidence(results: SearchResult[], answer: string): number { + const sourceFactor = Math.min(0.55, results.length * 0.1); + const averageSnippetLength = + results.length === 0 + ? 0 + : results.reduce((sum, item) => sum + item.snippet.trim().length, 0) / results.length; + const evidenceFactor = Math.min(0.25, averageSnippetLength / 220); + const answerPenalty = answer.trim().length < 30 ? 0.15 : 0; + return clamp01(0.2 + sourceFactor + evidenceFactor - answerPenalty); +} + +function buildUserPrompt(query: string, results: SearchResult[]): string { + const compactResults = results.map((result, index) => ({ + id: index + 1, + title: result.title, + url: result.url, + domain: result.domain, + snippet: result.snippet, + publishedAt: result.publishedAt ?? null, + })); + + return JSON.stringify({ + query, + results: compactResults, + instructions: "Synthesize a direct answer and confidence based only on these results.", + }); +} + +function toNumber(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + +export async function synthesizeAnswer( query: string, - sources: SearchResult[], + results: SearchResult[], + options: SynthOptions = {}, ): Promise { - const apiKey = process.env.OPENAI_API_KEY; + const model = options.model ?? DEFAULT_MODEL; + + if (results.length === 0) { + return { + answer: "No relevant sources were provided.", + confidence: 0, + tokens: { in: 0, out: 0 }, + model, + }; + } + + const endpoint = options.endpoint ?? process.env.OPENAI_API_URL?.trim(); + const apiKey = process.env.OPENAI_API_KEY?.trim(); + + if (!endpoint) { + throw new SynthError("OPENAI_API_URL is not set"); + } + if (!apiKey) { - throw new Error("OPENAI_API_KEY is required"); + throw new SynthError("OPENAI_API_KEY is not set"); } - const model = "gpt-4o-mini"; - const userPrompt = buildUserPrompt(query, sources); + const maxInputResults = options.maxInputResults ?? 8; + const selectedResults = results.slice(0, Math.max(1, maxInputResults)); + const userPrompt = buildUserPrompt(query, selectedResults); - const response = await fetch("https://api.openai.com/v1/chat/completions", { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - }, - body: JSON.stringify({ - model, - messages: [ - { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: userPrompt }, - ], - temperature: 0.3, - max_tokens: 800, - }), - }); + const requestBody = { + model, + temperature: options.temperature ?? 0.2, + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: userPrompt }, + ], + }; + + let response: Response; + try { + response = await fetch(endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify(requestBody), + signal: options.signal, + }); + } catch (error) { + const message = error instanceof Error ? error.message : "Unknown network error"; + throw new SynthError(`Failed to call synthesis model: ${message}`); + } if (!response.ok) { - throw new Error( - `OpenAI API error: ${response.status} ${response.statusText}`, - ); + let body = ""; + try { + body = await response.text(); + } catch { + body = ""; + } + const suffix = body ? `: ${body}` : ""; + throw new SynthError(`Synthesis request failed (${response.status})${suffix}`, response.status); } - const body = await response.json(); - const choice = body.choices?.[0]; - const usage = body.usage ?? {}; + type CompletionPayload = { + model?: string; + usage?: { + prompt_tokens?: number; + completion_tokens?: number; + }; + choices?: Array<{ + message?: { + content?: string; + }; + }>; + }; + + let payload: CompletionPayload; + try { + payload = (await response.json()) as CompletionPayload; + } catch { + throw new SynthError("Synthesis model returned invalid JSON"); + } + + const content = payload.choices?.[0]?.message?.content; + if (typeof content !== "string" || !content.trim()) { + throw new SynthError("Synthesis response did not include message content"); + } + + const parsed = safeJsonParse>(content); + const answerFromJson = parsed && typeof parsed.answer === "string" ? parsed.answer.trim() : ""; + const answer = answerFromJson || content.trim(); + + const confidenceFromJson = parsed ? toNumber(parsed.confidence) : undefined; + const confidence = + confidenceFromJson === undefined + ? heuristicConfidence(selectedResults, answer) + : clamp01(confidenceFromJson); + + const promptTokens = toNumber(payload.usage?.prompt_tokens) ?? estimateTokens(SYSTEM_PROMPT + userPrompt); + const completionTokens = toNumber(payload.usage?.completion_tokens) ?? estimateTokens(answer); return { - answer: choice?.message?.content ?? "No answer generated.", - confidence: scoreConfidence(sources), + answer, + confidence, tokens: { - in: usage.prompt_tokens ?? 0, - out: usage.completion_tokens ?? 0, + in: promptTokens, + out: completionTokens, }, - model, + model: typeof payload.model === "string" && payload.model ? payload.model : model, }; } + +export type { SearchResult } from "./types"; +export default synthesizeAnswer; \ No newline at end of file diff --git a/src/logic/types.ts b/src/logic/types.ts new file mode 100644 index 0000000..85203c5 --- /dev/null +++ b/src/logic/types.ts @@ -0,0 +1,11 @@ +export type Freshness = "day" | "week" | "month"; + +export interface SearchResult { + title: string; + url: string; + snippet: string; + domain: string; + source?: string; + publishedAt?: string | null; + score?: number; +} \ No newline at end of file From 62df3f2666d36a0d9fe93b138d4a0b83bb96883b Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 15:44:36 +0000 Subject: [PATCH 02/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.test.ts | 74 +++++++++++ src/logic/brave.ts | 268 +++++++++++++++++++++------------------- src/logic/cache.test.ts | 40 ++++++ src/logic/cache.ts | 101 +++++++-------- src/logic/rank.test.ts | 106 ++++++++++++++++ src/logic/rank.ts | 218 ++++++++++++++------------------ src/logic/synth.test.ts | 81 ++++++++++++ src/logic/synth.ts | 258 +++++++++++++++++++------------------- 8 files changed, 702 insertions(+), 444 deletions(-) create mode 100644 src/logic/brave.test.ts create mode 100644 src/logic/cache.test.ts create mode 100644 src/logic/rank.test.ts create mode 100644 src/logic/synth.test.ts diff --git a/src/logic/brave.test.ts b/src/logic/brave.test.ts new file mode 100644 index 0000000..46e545a --- /dev/null +++ b/src/logic/brave.test.ts @@ -0,0 +1,74 @@ +import { afterEach, describe, expect, it } from "bun:test"; +import { + BraveApiError, + BraveRateLimitError, + normalizeBraveResponse, + searchBrave, +} from "./brave"; + +const originalFetch = globalThis.fetch; + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("brave.ts", () => { + it("normalizes Brave response into SearchResult[]", () => { + const results = normalizeBraveResponse({ + web: { + results: [ + { + title: "Example Title", + url: "https://www.example.com/path", + description: "Example description", + profile: { name: "Example Source" }, + published_at: "2026-03-01T12:00:00Z", + }, + { + title: "Second Result", + url: "https://docs.example.org/page", + snippet: "Second snippet", + }, + ], + }, + }); + + expect(results.length).toBe(2); + expect(results[0]).toEqual({ + title: "Example Title", + url: "https://www.example.com/path", + snippet: "Example description", + domain: "example.com", + source: "Example Source", + publishedAt: "2026-03-01T12:00:00.000Z", + }); + expect(results[1].domain).toBe("docs.example.org"); + }); + + it("throws BraveRateLimitError on 429", async () => { + globalThis.fetch = (async () => + new Response(JSON.stringify({ error: "too many requests" }), { + status: 429, + headers: { + "content-type": "application/json", + "retry-after": "7", + }, + })) as typeof fetch; + + await expect( + searchBrave("test query", { apiKey: "brave_test_key" }) + ).rejects.toBeInstanceOf(BraveRateLimitError); + }); + + it("throws BraveApiError on non-429 error", async () => { + globalThis.fetch = (async () => + new Response(JSON.stringify({ message: "server error" }), { + status: 500, + headers: { "content-type": "application/json" }, + })) as typeof fetch; + + await expect( + searchBrave("test query", { apiKey: "brave_test_key" }) + ).rejects.toBeInstanceOf(BraveApiError); + }); +}); \ No newline at end of file diff --git a/src/logic/brave.ts b/src/logic/brave.ts index d0102e3..65ab0ad 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,34 +1,42 @@ -import type { Freshness, SearchResult } from "./types"; +export type Freshness = "day" | "week" | "month"; + +export interface SearchResult { + title: string; + url: string; + snippet: string; + domain: string; + publishedAt?: string; + source?: string; + score?: number; +} export interface BraveSearchOptions { freshness?: Freshness; count?: number; offset?: number; signal?: AbortSignal; + apiKey?: string; + endpoint?: string; + fetchImpl?: typeof fetch; } -export class BraveError extends Error { - status?: number; +export class BraveApiError extends Error { + status: number; + details?: unknown; - constructor(message: string, status?: number) { + constructor(message: string, status: number, details?: unknown) { super(message); - this.name = "BraveError"; - this.status = status; - } -} - -export class BraveApiError extends BraveError { - constructor(message: string, status?: number) { - super(message, status); this.name = "BraveApiError"; + this.status = status; + this.details = details; } } export class BraveRateLimitError extends BraveApiError { retryAfterSeconds?: number; - constructor(message: string, retryAfterSeconds?: number) { - super(message, 429); + constructor(message: string, retryAfterSeconds?: number, details?: unknown) { + super(message, 429, details); this.name = "BraveRateLimitError"; this.retryAfterSeconds = retryAfterSeconds; } @@ -40,167 +48,167 @@ const FRESHNESS_MAP: Record = { month: "pm", }; -const DEFAULT_COUNT = 10; -const MAX_COUNT = 20; +function getApiKey(options: BraveSearchOptions): string { + return ( + options.apiKey ?? + process.env.BRAVE_API_KEY ?? + process.env.BRAVE_SEARCH_API_KEY ?? + "" + ); +} -function toStringValue(value: unknown): string { +function normalizeString(value: unknown): string { return typeof value === "string" ? value.trim() : ""; } -function extractDomain(url: string): string | null { +function safeDomain(url: string): string { try { const parsed = new URL(url); - return parsed.hostname.toLowerCase(); + return parsed.hostname.replace(/^www\./i, "").toLowerCase(); } catch { - return null; + return ""; } } -function parsePublishedAt(value: unknown): string | null { - if (typeof value === "string") { - const trimmed = value.trim(); - if (!trimmed) return null; - const parsed = Date.parse(trimmed); - return Number.isNaN(parsed) ? null : new Date(parsed).toISOString(); - } - - if (typeof value === "number") { - if (!Number.isFinite(value)) return null; - const asMs = value > 10_000_000_000 ? value : value * 1000; - const date = new Date(asMs); - return Number.isNaN(date.getTime()) ? null : date.toISOString(); - } - - return null; +function parsePublishedAt(value: unknown): string | undefined { + if (typeof value !== "string" || !value.trim()) return undefined; + const date = new Date(value); + if (Number.isNaN(date.getTime())) return undefined; + return date.toISOString(); } -function normalizeCount(count?: number): number { - if (typeof count !== "number" || !Number.isFinite(count)) return DEFAULT_COUNT; - if (count < 1) return 1; - if (count > MAX_COUNT) return MAX_COUNT; - return Math.floor(count); -} - -function normalizeOffset(offset?: number): number { - if (typeof offset !== "number" || !Number.isFinite(offset)) return 0; - return Math.max(0, Math.floor(offset)); +function firstNonEmpty(...values: unknown[]): string { + for (const value of values) { + const s = normalizeString(value); + if (s) return s; + } + return ""; } -function normalizeBraveResult(rawItem: unknown): SearchResult | null { - if (!rawItem || typeof rawItem !== "object") return null; - - const item = rawItem as Record; - const title = toStringValue(item.title); - const url = toStringValue(item.url); - const snippet = toStringValue(item.description) || toStringValue(item.snippet); - - if (!title || !url) return null; - - const metaUrl = item.meta_url && typeof item.meta_url === "object" - ? (item.meta_url as Record) - : undefined; +export function normalizeBraveResponse(payload: unknown): SearchResult[] { + const data = payload as { + web?: { results?: Array> }; + }; - const domainFromMeta = toStringValue(metaUrl?.hostname); - const domain = (domainFromMeta || extractDomain(url) || "").toLowerCase(); + const rows = data?.web?.results; + if (!Array.isArray(rows)) return []; - if (!domain) return null; + const normalized: SearchResult[] = []; - const publishedAt = parsePublishedAt(item.page_age ?? item.published ?? item.age); + for (const row of rows) { + const url = firstNonEmpty(row.url, (row as { profile?: { url?: string } }).profile?.url); + if (!url) continue; + + const domain = safeDomain(url); + if (!domain) continue; + + const title = firstNonEmpty(row.title); + const snippet = firstNonEmpty( + row.description, + row.snippet, + Array.isArray(row.extra_snippets) ? row.extra_snippets[0] : "" + ); + + normalized.push({ + title: title || domain, + url, + snippet, + domain, + source: firstNonEmpty( + (row as { profile?: { name?: string } }).profile?.name, + (row as { meta_url?: { hostname?: string } }).meta_url?.hostname, + domain + ), + publishedAt: parsePublishedAt( + firstNonEmpty( + row.page_age, + row.published, + row.published_at, + row.last_updated + ) + ), + }); + } - return { - title, - url, - snippet, - domain, - source: "brave", - publishedAt, - }; + return normalized; } -export function normaliseBraveResponse(payload: unknown): SearchResult[] { - const root = payload as { web?: { results?: unknown } }; - const results = root?.web?.results; - - if (!Array.isArray(results)) return []; - - const normalized: SearchResult[] = []; - for (const item of results) { - const parsed = normalizeBraveResult(item); - if (parsed) normalized.push(parsed); +async function parseErrorDetails(response: Response): Promise { + const contentType = response.headers.get("content-type") || ""; + try { + if (contentType.includes("application/json")) { + return await response.json(); + } + return await response.text(); + } catch { + return undefined; } - return normalized; } export async function searchBrave( query: string, - options: BraveSearchOptions = {}, + options: BraveSearchOptions = {} ): Promise { const trimmedQuery = query.trim(); if (!trimmedQuery) return []; - const apiKey = process.env.BRAVE_API_KEY?.trim(); - const endpoint = process.env.BRAVE_API_URL?.trim(); - + const apiKey = getApiKey(options); if (!apiKey) { - throw new BraveApiError("BRAVE_API_KEY is not set"); + throw new Error("Missing Brave API key. Set BRAVE_API_KEY."); } - if (!endpoint) { - throw new BraveApiError("BRAVE_API_URL is not set"); - } + const endpoint = + options.endpoint ?? + process.env.BRAVE_SEARCH_ENDPOINT ?? + "https://api.search.brave.com/res/v1/web/search"; - const params = new URLSearchParams(); - params.set("q", trimmedQuery); - params.set("count", String(normalizeCount(options.count))); - params.set("offset", String(normalizeOffset(options.offset))); + const requestUrl = new URL(endpoint); + requestUrl.searchParams.set("q", trimmedQuery); + requestUrl.searchParams.set("result_filter", "web"); + if (typeof options.count === "number") { + requestUrl.searchParams.set("count", String(options.count)); + } + if (typeof options.offset === "number") { + requestUrl.searchParams.set("offset", String(options.offset)); + } if (options.freshness) { - params.set("freshness", FRESHNESS_MAP[options.freshness]); + requestUrl.searchParams.set("freshness", FRESHNESS_MAP[options.freshness]); } - const requestUrl = `${endpoint}${endpoint.includes("?") ? "&" : "?"}${params.toString()}`; + const fetchImpl = options.fetchImpl ?? fetch; - let response: Response; - try { - response = await fetch(requestUrl, { - method: "GET", - headers: { - Accept: "application/json", - "X-Subscription-Token": apiKey, - }, - signal: options.signal, - }); - } catch (error) { - const message = error instanceof Error ? error.message : "Unknown network error"; - throw new BraveApiError(`Failed to call Brave API: ${message}`); - } + const response = await fetchImpl(requestUrl.toString(), { + method: "GET", + headers: { + Accept: "application/json", + "X-Subscription-Token": apiKey, + }, + signal: options.signal, + }); if (response.status === 429) { - const retryAfterHeader = response.headers.get("Retry-After"); - const retryAfterSeconds = retryAfterHeader ? Number.parseInt(retryAfterHeader, 10) : undefined; - throw new BraveRateLimitError("Brave API rate limit exceeded", retryAfterSeconds); + const retryAfterHeader = response.headers.get("retry-after"); + const retryAfterSeconds = retryAfterHeader ? Number(retryAfterHeader) : undefined; + const details = await parseErrorDetails(response); + throw new BraveRateLimitError( + "Brave API rate limit exceeded", + Number.isFinite(retryAfterSeconds) ? retryAfterSeconds : undefined, + details + ); } if (!response.ok) { - let body = ""; - try { - body = await response.text(); - } catch { - body = ""; - } - const suffix = body ? `: ${body}` : ""; - throw new BraveApiError(`Brave API request failed (${response.status})${suffix}`, response.status); - } - - let payload: unknown; - try { - payload = await response.json(); - } catch { - throw new BraveApiError("Brave API returned invalid JSON", response.status); + const details = await parseErrorDetails(response); + throw new BraveApiError( + `Brave API request failed with status ${response.status}`, + response.status, + details + ); } - return normaliseBraveResponse(payload); + const payload = await response.json(); + return normalizeBraveResponse(payload); } -export type { SearchResult, Freshness } from "./types"; export default searchBrave; \ No newline at end of file diff --git a/src/logic/cache.test.ts b/src/logic/cache.test.ts new file mode 100644 index 0000000..37e59b0 --- /dev/null +++ b/src/logic/cache.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, it } from "bun:test"; +import { QueryCache } from "./cache"; + +describe("cache.ts", () => { + it("expires entries based on TTL", () => { + let now = 0; + const cache = new QueryCache({ + ttlSeconds: 1, + now: () => now, + }); + + cache.set("Hello World", "value"); + expect(cache.get("hello world")).toBe("value"); + + now = 1001; + expect(cache.get("hello world")).toBeUndefined(); + }); + + it("tracks hit/miss stats and normalizes query keys", () => { + let now = 0; + const cache = new QueryCache({ + ttlSeconds: 60, + now: () => now, + }); + + expect(cache.get("missing key")).toBeUndefined(); // miss + cache.set(" Query X ", "cached"); + expect(cache.get("query x")).toBe("cached"); // hit + expect(cache.get("QUERY X")).toBe("cached"); // hit + + now = 61_000; + expect(cache.get("query x")).toBeUndefined(); // expired miss + + const stats = cache.stats(); + expect(stats.hits).toBe(2); + expect(stats.misses).toBe(2); + expect(stats.hitRate).toBe(0.5); + expect(stats.size).toBe(0); + }); +}); \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index 6f41ed3..331458d 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -7,50 +7,57 @@ export interface CacheStats { size: number; } -interface CacheEntry { +export interface CacheOptions { + ttlSeconds?: number; + now?: () => number; +} + +interface CacheEntry { value: T; expiresAt: number; } -function getDefaultTtlSeconds(): number { +const DEFAULT_TTL_SECONDS = 300; + +function readDefaultTtl(): number { const raw = process.env.CACHE_TTL_SECONDS; - const parsed = raw ? Number(raw) : 300; - if (!Number.isFinite(parsed) || parsed <= 0) return 300; - return parsed; + const parsed = raw ? Number(raw) : DEFAULT_TTL_SECONDS; + if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_TTL_SECONDS; + return Math.floor(parsed); } export function normalizeQuery(query: string): string { return query.trim().toLowerCase().replace(/\s+/g, " "); } -export function hashQuery(normalizedQuery: string): string { - return createHash("sha256").update(normalizedQuery).digest("hex"); +export function hashQuery(query: string): string { + return createHash("sha256").update(normalizeQuery(query)).digest("hex"); } -export function getCacheKey(query: string): string { - return hashQuery(normalizeQuery(query)); -} +export class QueryCache { + private readonly store = new Map>(); + private readonly now: () => number; + private readonly defaultTtlMs: number; -export class InMemoryCache { - private readonly store = new Map(); - private readonly defaultTtlSeconds: number; private hits = 0; private misses = 0; - constructor(defaultTtlSeconds = getDefaultTtlSeconds()) { - this.defaultTtlSeconds = defaultTtlSeconds; + constructor(options: CacheOptions = {}) { + const ttlSeconds = options.ttlSeconds ?? readDefaultTtl(); + this.defaultTtlMs = Math.max(1, ttlSeconds * 1000); + this.now = options.now ?? (() => Date.now()); } - private purgeExpired(now = Date.now()): void { + private pruneExpired(): void { + const now = this.now(); for (const [key, entry] of this.store.entries()) { - if (entry.expiresAt <= now) { - this.store.delete(key); - } + if (entry.expiresAt <= now) this.store.delete(key); } } - get(query: string): T | undefined { - const key = getCacheKey(query); + get(query: string): T | undefined { + const key = hashQuery(query); + const now = this.now(); const entry = this.store.get(key); if (!entry) { @@ -58,33 +65,33 @@ export class InMemoryCache { return undefined; } - if (entry.expiresAt <= Date.now()) { + if (entry.expiresAt <= now) { this.store.delete(key); this.misses += 1; return undefined; } this.hits += 1; - return entry.value as T; + return entry.value; } - set(query: string, value: T, ttlSeconds?: number): void { - const resolvedTtlSeconds = - typeof ttlSeconds === "number" && Number.isFinite(ttlSeconds) && ttlSeconds > 0 + set(query: string, value: T, ttlSeconds?: number): void { + const key = hashQuery(query); + const ttlMs = Math.max( + 1, + (ttlSeconds && Number.isFinite(ttlSeconds) ? ttlSeconds - : this.defaultTtlSeconds; - - const expiresAt = Date.now() + resolvedTtlSeconds * 1000; - const key = getCacheKey(query); - - this.store.set(key, { value, expiresAt }); - this.purgeExpired(); + : this.defaultTtlMs / 1000) * 1000 + ); + this.store.set(key, { + value, + expiresAt: this.now() + ttlMs, + }); } stats(): CacheStats { - this.purgeExpired(); + this.pruneExpired(); const total = this.hits + this.misses; - return { hits: this.hits, misses: this.misses, @@ -92,27 +99,7 @@ export class InMemoryCache { size: this.store.size, }; } - - clear(): void { - this.store.clear(); - this.hits = 0; - this.misses = 0; - } -} - -const singletonCache = new InMemoryCache(); - -export function get(query: string): T | undefined { - return singletonCache.get(query); -} - -export function set(query: string, value: T, ttlSeconds?: number): void { - singletonCache.set(query, value, ttlSeconds); -} - -export function stats(): CacheStats { - return singletonCache.stats(); } -export const cache = singletonCache; -export default singletonCache; \ No newline at end of file +export const cache = new QueryCache(); +export default cache; \ No newline at end of file diff --git a/src/logic/rank.test.ts b/src/logic/rank.test.ts new file mode 100644 index 0000000..08042b2 --- /dev/null +++ b/src/logic/rank.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, it } from "bun:test"; +import { rankSearchResults } from "./rank"; +import type { SearchResult } from "./brave"; + +describe("rank.ts", () => { + it("deduplicates and enforces max 2 results per domain", () => { + const input: SearchResult[] = [ + { + title: "A complete guide to Queryx", + url: "https://example.com/guide?utm_source=test", + snippet: "Long and useful article about using Queryx effectively in production.", + domain: "example.com", + publishedAt: "2026-03-01T00:00:00Z", + }, + { + title: "A complete guide to Queryx", + url: "https://example.com/guide", + snippet: "Duplicate of the same article with tracking params removed.", + domain: "example.com", + publishedAt: "2026-03-01T00:00:00Z", + }, + { + title: "Queryx architecture deep dive", + url: "https://example.com/architecture", + snippet: "Detailed architecture decisions and implementation details.", + domain: "example.com", + publishedAt: "2026-02-20T00:00:00Z", + }, + { + title: "Queryx API quickstart", + url: "https://example.com/quickstart", + snippet: "Quickstart tutorial for integrating Queryx APIs.", + domain: "example.com", + publishedAt: "2026-02-15T00:00:00Z", + }, + { + title: "Independent review of Queryx", + url: "https://another.com/review", + snippet: "Third-party review with practical examples and benchmark notes.", + domain: "another.com", + publishedAt: "2026-02-28T00:00:00Z", + }, + ]; + + const ranked = rankSearchResults(input, { + maxPerDomain: 2, + maxResults: 10, + now: new Date("2026-03-04T00:00:00Z"), + }); + + const exampleCount = ranked.filter((r) => r.domain === "example.com").length; + expect(exampleCount).toBe(2); + + const urls = ranked.map((r) => r.url); + const uniqueUrls = new Set(urls); + expect(uniqueUrls.size).toBe(urls.length); + }); + + it("boosts newer content over older content", () => { + const input: SearchResult[] = [ + { + title: "Queryx release today", + url: "https://fresh.dev/queryx-release", + snippet: "Today's update includes major improvements and migration notes.", + domain: "fresh.dev", + publishedAt: "2026-03-03T12:00:00Z", + }, + { + title: "Queryx release notes archive", + url: "https://old.dev/queryx-archive", + snippet: "Historical notes from previous years and legacy behavior.", + domain: "old.dev", + publishedAt: "2022-01-01T00:00:00Z", + }, + ]; + + const ranked = rankSearchResults(input, { + maxResults: 10, + now: new Date("2026-03-04T00:00:00Z"), + }); + + expect(ranked[0]?.url).toBe("https://fresh.dev/queryx-release"); + }); + + it("filters low-quality results", () => { + const input: SearchResult[] = [ + { + title: "ok", + url: "https://spam.dev/a", + snippet: "tiny", + domain: "spam.dev", + }, + { + title: "Useful Queryx guide for developers", + url: "https://good.dev/guide", + snippet: + "This guide explains setup, ranking behavior, and synthesis output patterns in depth.", + domain: "good.dev", + }, + ]; + + const ranked = rankSearchResults(input, { maxResults: 10 }); + expect(ranked.length).toBe(1); + expect(ranked[0].domain).toBe("good.dev"); + }); +}); \ No newline at end of file diff --git a/src/logic/rank.ts b/src/logic/rank.ts index 389b9d3..7f7304f 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,163 +1,133 @@ -import type { SearchResult } from "./types"; +import type { SearchResult } from "./brave"; export interface RankOptions { maxPerDomain?: number; - minSnippetLength?: number; - minTitleLength?: number; + maxResults?: number; + minQualityScore?: number; now?: Date; } -const TRACKING_PARAM_EXACT = new Set([ - "fbclid", - "gclid", - "ref", - "source", - "mc_cid", - "mc_eid", -]); +export interface RankedSearchResult extends SearchResult { + rankScore: number; +} -function extractDomain(url: string): string | null { - try { - const parsed = new URL(url); - return parsed.hostname.toLowerCase(); - } catch { - return null; - } +function clamp01(value: number): number { + if (!Number.isFinite(value)) return 0; + if (value < 0) return 0; + if (value > 1) return 1; + return value; } -function canonicalizeUrl(url: string): string | null { +function canonicalUrl(rawUrl: string): string { try { - const parsed = new URL(url); - if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { - return null; - } - - parsed.hash = ""; - - const keys = [...parsed.searchParams.keys()]; - for (const key of keys) { - const lower = key.toLowerCase(); - if (lower.startsWith("utm_") || TRACKING_PARAM_EXACT.has(lower)) { - parsed.searchParams.delete(key); + const url = new URL(rawUrl); + url.hash = ""; + for (const key of [...url.searchParams.keys()]) { + const k = key.toLowerCase(); + if (k.startsWith("utm_") || k === "fbclid" || k === "gclid" || k === "ref") { + url.searchParams.delete(key); } } - - parsed.pathname = parsed.pathname.replace(/\/+$/, "") || "/"; - - const query = parsed.searchParams.toString(); - return `${parsed.protocol}//${parsed.host}${parsed.pathname}${query ? `?${query}` : ""}`; + url.pathname = url.pathname.replace(/\/+$/g, "") || "/"; + return url.toString(); } catch { - return null; + return rawUrl.trim(); } } -function toTimestamp(value?: string | null): number | null { - if (!value) return null; - const parsed = Date.parse(value); - return Number.isNaN(parsed) ? null : parsed; -} - -function recencyBoost(publishedAt: string | null | undefined, now: Date): number { - const ts = toTimestamp(publishedAt); - if (ts === null) return 0; - - const ageMs = Math.max(0, now.getTime() - ts); - const days = ageMs / (1000 * 60 * 60 * 24); - - if (days <= 1) return 0.35; - if (days <= 7) return 0.25; - if (days <= 30) return 0.15; - if (days <= 90) return 0.08; - if (days <= 365) return 0.03; +function recencyBoost(publishedAt: string | undefined, now: Date): number { + if (!publishedAt) return 0; + const date = new Date(publishedAt); + if (Number.isNaN(date.getTime())) return 0; + + const ageMs = now.getTime() - date.getTime(); + if (ageMs < 0) return 0.3; + const day = 24 * 60 * 60 * 1000; + if (ageMs <= day) return 0.35; + if (ageMs <= 7 * day) return 0.22; + if (ageMs <= 30 * day) return 0.12; + if (ageMs <= 180 * day) return 0.05; return 0; } -function qualityScore(result: SearchResult): number { - const titleLen = result.title.trim().length; - const snippetLen = result.snippet.trim().length; +function isLowQuality(result: SearchResult): boolean { + if (!result.url || !result.domain) return true; + const title = result.title?.trim() ?? ""; + const snippet = result.snippet?.trim() ?? ""; - const titleScore = Math.min(120, titleLen) / 120 * 0.3; - const snippetScore = Math.min(320, snippetLen) / 320 * 0.45; - const httpsBoost = result.url.startsWith("https://") ? 0.05 : 0; - const authorityBoost = /\.(gov|edu)$/.test(result.domain) ? 0.05 : 0; + if (title.length < 8) return true; + if (snippet.length < 20) return true; - return titleScore + snippetScore + httpsBoost + authorityBoost; + return false; } -function isLowQuality( - result: SearchResult, - minTitleLength: number, - minSnippetLength: number, -): boolean { - if (!result.title || !result.url) return true; - if (result.title.trim().length < minTitleLength) return true; - if (result.snippet.trim().length < minSnippetLength) return true; - return canonicalizeUrl(result.url) === null; +function qualityScore(result: SearchResult, now: Date): number { + const titleLen = (result.title ?? "").trim().length; + const snippetLen = (result.snippet ?? "").trim().length; + const titleScore = Math.min(titleLen, 120) / 120 * 0.35; + const snippetScore = Math.min(snippetLen, 300) / 300 * 0.45; + const sourceScore = result.domain ? 0.1 : 0; + const explicitScore = clamp01(Number(result.score ?? 0)) * 0.2; + const freshness = recencyBoost(result.publishedAt, now); + return titleScore + snippetScore + sourceScore + explicitScore + freshness; } -function pickPreferred(existing: SearchResult, incoming: SearchResult): SearchResult { - const existingTs = toTimestamp(existing.publishedAt ?? null) ?? 0; - const incomingTs = toTimestamp(incoming.publishedAt ?? null) ?? 0; - - if (incomingTs > existingTs) return incoming; - if (incomingTs < existingTs) return existing; - - return incoming.snippet.length > existing.snippet.length ? incoming : existing; -} - -export function rankResults(results: SearchResult[], options: RankOptions = {}): SearchResult[] { - if (!Array.isArray(results) || results.length === 0) return []; - - const now = options.now ?? new Date(); +export function rankSearchResults( + results: SearchResult[], + options: RankOptions = {} +): SearchResult[] { const maxPerDomain = options.maxPerDomain ?? 2; - const minSnippetLength = options.minSnippetLength ?? 40; - const minTitleLength = options.minTitleLength ?? 8; + const maxResults = options.maxResults ?? 10; + const minQualityScore = options.minQualityScore ?? 0.25; + const now = options.now ?? new Date(); - const byCanonicalUrl = new Map(); + const seenUrls = new Set(); + const seenTitleDomain = new Set(); + const ranked: RankedSearchResult[] = []; for (const result of results) { - if (isLowQuality(result, minTitleLength, minSnippetLength)) continue; + const canonical = canonicalUrl(result.url); + if (seenUrls.has(canonical)) continue; + seenUrls.add(canonical); + + const titleDomainKey = `${result.domain}|${result.title.trim().toLowerCase()}`; + if (seenTitleDomain.has(titleDomainKey)) continue; + seenTitleDomain.add(titleDomainKey); - const canonicalUrl = canonicalizeUrl(result.url); - if (!canonicalUrl) continue; + if (isLowQuality(result)) continue; - const domain = (result.domain || extractDomain(canonicalUrl) || "").toLowerCase(); - if (!domain) continue; + const rankScore = qualityScore(result, now); + if (rankScore < minQualityScore) continue; - const normalized: SearchResult = { + ranked.push({ ...result, - url: canonicalUrl, - domain, - }; - - const existing = byCanonicalUrl.get(canonicalUrl); - if (!existing) { - byCanonicalUrl.set(canonicalUrl, normalized); - } else { - byCanonicalUrl.set(canonicalUrl, pickPreferred(existing, normalized)); - } + rankScore, + }); } - const scored = [...byCanonicalUrl.values()] - .map((result) => ({ - ...result, - score: qualityScore(result) + recencyBoost(result.publishedAt, now), - })) - .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)); - - const domainCounts = new Map(); - const ranked: SearchResult[] = []; - - for (const result of scored) { - const count = domainCounts.get(result.domain) ?? 0; - if (count >= maxPerDomain) continue; - domainCounts.set(result.domain, count + 1); - ranked.push(result); + ranked.sort((a, b) => { + if (b.rankScore !== a.rankScore) return b.rankScore - a.rankScore; + const ad = a.publishedAt ? new Date(a.publishedAt).getTime() : 0; + const bd = b.publishedAt ? new Date(b.publishedAt).getTime() : 0; + return bd - ad; + }); + + const perDomain = new Map(); + const output: SearchResult[] = []; + + for (const item of ranked) { + const domain = item.domain.toLowerCase(); + const current = perDomain.get(domain) ?? 0; + if (current >= maxPerDomain) continue; + + perDomain.set(domain, current + 1); + const { rankScore: _unused, ...result } = item; + output.push(result); + + if (output.length >= maxResults) break; } - return ranked; + return output; } -export { canonicalizeUrl }; -export type { SearchResult } from "./types"; -export default rankResults; \ No newline at end of file +export default rankSearchResults; \ No newline at end of file diff --git a/src/logic/synth.test.ts b/src/logic/synth.test.ts new file mode 100644 index 0000000..105cb92 --- /dev/null +++ b/src/logic/synth.test.ts @@ -0,0 +1,81 @@ +import { afterEach, describe, expect, it } from "bun:test"; +import { synthesizeAnswer } from "./synth"; +import type { SearchResult } from "./brave"; + +const originalFetch = globalThis.fetch; + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +const sampleResults: SearchResult[] = [ + { + title: "Queryx release notes", + url: "https://example.com/release", + snippet: "Queryx ships improvements to ranking and synthesis.", + domain: "example.com", + publishedAt: "2026-03-02T00:00:00Z", + }, +]; + +describe("synth.ts", () => { + it("clamps confidence to 0-1 and uses API token counts", async () => { + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: JSON.stringify({ + answer: "Queryx added ranking and caching updates.", + confidence: 1.7, + }), + }, + }, + ], + usage: { + prompt_tokens: 42, + completion_tokens: 13, + }, + }), + { status: 200, headers: { "content-type": "application/json" } } + )) as typeof fetch; + + const output = await synthesizeAnswer("What changed in Queryx?", sampleResults, { + apiKey: "openai_test_key", + }); + + expect(output.confidence).toBe(1); + expect(output.tokens.in).toBe(42); + expect(output.tokens.out).toBe(13); + expect(output.model).toBe("gpt-4o-mini"); + }); + + it("falls back to estimated tokens when usage is absent", async () => { + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + choices: [ + { + message: { + content: JSON.stringify({ + answer: "No definitive update found.", + confidence: -2, + }), + }, + }, + ], + }), + { status: 200, headers: { "content-type": "application/json" } } + )) as typeof fetch; + + const output = await synthesizeAnswer("Unknown query", sampleResults, { + apiKey: "openai_test_key", + }); + + expect(output.confidence).toBe(0); + expect(output.tokens.in).toBeGreaterThan(0); + expect(output.tokens.out).toBeGreaterThan(0); + }); +}); \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index 6e15cb9..bccdd17 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,44 +1,31 @@ -import type { SearchResult } from "./types"; +import type { SearchResult } from "./brave"; -export interface SynthTokens { +export interface SynthesisTokens { in: number; out: number; } -export interface SynthResult { +export interface SynthesisResult { answer: string; confidence: number; - tokens: SynthTokens; + tokens: SynthesisTokens; model: string; } -export interface SynthOptions { +export interface SynthesisOptions { + apiKey?: string; model?: string; - temperature?: number; - maxInputResults?: number; - signal?: AbortSignal; endpoint?: string; + signal?: AbortSignal; + temperature?: number; + fetchImpl?: typeof fetch; } -export class SynthError extends Error { - status?: number; - - constructor(message: string, status?: number) { - super(message); - this.name = "SynthError"; - this.status = status; - } -} - -const DEFAULT_MODEL = "gpt-4o-mini"; - -const SYSTEM_PROMPT = [ - "You are Queryx synthesis engine.", - "Return strict JSON only with shape:", - '{"answer":"string","confidence":number}.', - "Make answer concise and directly useful for agent consumers.", - "Use only provided sources, avoid speculation, no markdown.", -].join(" "); +export const SYNTH_SYSTEM_PROMPT = + "You are a search synthesis engine for agents. Respond with strict JSON only: " + + '{"answer":"string","confidence":number}. ' + + "Rules: concise answer, no markdown, mention uncertainty when evidence is weak, " + + "ground claims in provided sources only, and keep confidence between 0 and 1."; function clamp01(value: number): number { if (!Number.isFinite(value)) return 0; @@ -48,167 +35,172 @@ function clamp01(value: number): number { } function estimateTokens(text: string): number { - const normalized = text.trim(); - if (!normalized) return 0; - return Math.max(1, Math.ceil(normalized.length / 4)); + const chars = text.length; + return Math.max(1, Math.ceil(chars / 4)); } -function safeJsonParse(value: string): T | null { +function extractMessageContent(content: unknown): string { + if (typeof content === "string") return content; + if (Array.isArray(content)) { + return content + .map((part) => { + if (typeof part === "string") return part; + if (part && typeof part === "object" && "text" in part) { + const text = (part as { text?: unknown }).text; + return typeof text === "string" ? text : ""; + } + return ""; + }) + .join(""); + } + return ""; +} + +function parseStructuredOutput(content: string): { answer: string; confidence?: number } { + const trimmed = content.trim(); + if (!trimmed) return { answer: "" }; + try { - return JSON.parse(value) as T; + const parsed = JSON.parse(trimmed) as { answer?: unknown; confidence?: unknown }; + const answer = + typeof parsed.answer === "string" ? parsed.answer.trim() : trimmed; + const confidence = + typeof parsed.confidence === "number" ? parsed.confidence : undefined; + return { answer, confidence }; } catch { - return null; + return { answer: trimmed }; } } function heuristicConfidence(results: SearchResult[], answer: string): number { - const sourceFactor = Math.min(0.55, results.length * 0.1); - const averageSnippetLength = - results.length === 0 - ? 0 - : results.reduce((sum, item) => sum + item.snippet.trim().length, 0) / results.length; - const evidenceFactor = Math.min(0.25, averageSnippetLength / 220); - const answerPenalty = answer.trim().length < 30 ? 0.15 : 0; - return clamp01(0.2 + sourceFactor + evidenceFactor - answerPenalty); -} - -function buildUserPrompt(query: string, results: SearchResult[]): string { - const compactResults = results.map((result, index) => ({ - id: index + 1, - title: result.title, - url: result.url, - domain: result.domain, - snippet: result.snippet, - publishedAt: result.publishedAt ?? null, - })); - - return JSON.stringify({ - query, - results: compactResults, - instructions: "Synthesize a direct answer and confidence based only on these results.", - }); + let score = 0.2; + score += Math.min(results.length, 8) * 0.08; + if (answer.length > 80) score += 0.1; + if (/not enough|uncertain|unclear|insufficient/i.test(answer)) score -= 0.15; + return clamp01(score); } -function toNumber(value: unknown): number | undefined { - return typeof value === "number" && Number.isFinite(value) ? value : undefined; +function getApiKey(options: SynthesisOptions): string { + return options.apiKey ?? process.env.OPENAI_API_KEY ?? ""; } export async function synthesizeAnswer( query: string, results: SearchResult[], - options: SynthOptions = {}, -): Promise { - const model = options.model ?? DEFAULT_MODEL; + options: SynthesisOptions = {} +): Promise { + const model = options.model ?? process.env.SYNTH_MODEL ?? "gpt-4o-mini"; + const trimmedQuery = query.trim(); - if (results.length === 0) { + if (!trimmedQuery) { return { - answer: "No relevant sources were provided.", + answer: "", confidence: 0, tokens: { in: 0, out: 0 }, model, }; } - const endpoint = options.endpoint ?? process.env.OPENAI_API_URL?.trim(); - const apiKey = process.env.OPENAI_API_KEY?.trim(); - - if (!endpoint) { - throw new SynthError("OPENAI_API_URL is not set"); - } - + const apiKey = getApiKey(options); if (!apiKey) { - throw new SynthError("OPENAI_API_KEY is not set"); + throw new Error("Missing OpenAI API key. Set OPENAI_API_KEY."); } - const maxInputResults = options.maxInputResults ?? 8; - const selectedResults = results.slice(0, Math.max(1, maxInputResults)); - const userPrompt = buildUserPrompt(query, selectedResults); + const endpoint = + options.endpoint ?? + process.env.OPENAI_CHAT_COMPLETIONS_ENDPOINT ?? + "https://api.openai.com/v1/chat/completions"; + + const sourcePayload = results.slice(0, 8).map((r) => ({ + title: r.title, + url: r.url, + snippet: r.snippet, + domain: r.domain, + publishedAt: r.publishedAt, + })); + + const userPayload = JSON.stringify({ + query: trimmedQuery, + sources: sourcePayload, + }); const requestBody = { model, temperature: options.temperature ?? 0.2, - response_format: { type: "json_object" }, + response_format: { type: "json_object" as const }, messages: [ - { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: userPrompt }, + { role: "system" as const, content: SYNTH_SYSTEM_PROMPT }, + { role: "user" as const, content: userPayload }, ], }; - let response: Response; - try { - response = await fetch(endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - }, - body: JSON.stringify(requestBody), - signal: options.signal, - }); - } catch (error) { - const message = error instanceof Error ? error.message : "Unknown network error"; - throw new SynthError(`Failed to call synthesis model: ${message}`); - } + const fetchImpl = options.fetchImpl ?? fetch; + const response = await fetchImpl(endpoint, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(requestBody), + signal: options.signal, + }); if (!response.ok) { - let body = ""; + let details: unknown; try { - body = await response.text(); + details = await response.json(); } catch { - body = ""; + details = await response.text().catch(() => undefined); } - const suffix = body ? `: ${body}` : ""; - throw new SynthError(`Synthesis request failed (${response.status})${suffix}`, response.status); + throw new Error( + `Synthesis request failed (${response.status}): ${JSON.stringify(details)}` + ); } - type CompletionPayload = { + const payload = (await response.json()) as { model?: string; + choices?: Array<{ message?: { content?: unknown } }>; usage?: { prompt_tokens?: number; completion_tokens?: number; + input_tokens?: number; + output_tokens?: number; }; - choices?: Array<{ - message?: { - content?: string; - }; - }>; }; - let payload: CompletionPayload; - try { - payload = (await response.json()) as CompletionPayload; - } catch { - throw new SynthError("Synthesis model returned invalid JSON"); - } - - const content = payload.choices?.[0]?.message?.content; - if (typeof content !== "string" || !content.trim()) { - throw new SynthError("Synthesis response did not include message content"); - } - - const parsed = safeJsonParse>(content); - const answerFromJson = parsed && typeof parsed.answer === "string" ? parsed.answer.trim() : ""; - const answer = answerFromJson || content.trim(); - - const confidenceFromJson = parsed ? toNumber(parsed.confidence) : undefined; - const confidence = - confidenceFromJson === undefined - ? heuristicConfidence(selectedResults, answer) - : clamp01(confidenceFromJson); - - const promptTokens = toNumber(payload.usage?.prompt_tokens) ?? estimateTokens(SYSTEM_PROMPT + userPrompt); - const completionTokens = toNumber(payload.usage?.completion_tokens) ?? estimateTokens(answer); + const rawContent = extractMessageContent( + payload.choices?.[0]?.message?.content + ); + const parsed = parseStructuredOutput(rawContent); + + const answer = parsed.answer || "No synthesis available."; + const confidence = clamp01( + typeof parsed.confidence === "number" + ? parsed.confidence + : heuristicConfidence(results, answer) + ); + + const estimatedPrompt = estimateTokens(SYNTH_SYSTEM_PROMPT + userPayload); + const estimatedCompletion = estimateTokens(answer); + + const tokensIn = + payload.usage?.prompt_tokens ?? + payload.usage?.input_tokens ?? + estimatedPrompt; + const tokensOut = + payload.usage?.completion_tokens ?? + payload.usage?.output_tokens ?? + estimatedCompletion; return { answer, confidence, tokens: { - in: promptTokens, - out: completionTokens, + in: Math.max(0, Number(tokensIn) || 0), + out: Math.max(0, Number(tokensOut) || 0), }, - model: typeof payload.model === "string" && payload.model ? payload.model : model, + model: payload.model ?? model, }; } -export type { SearchResult } from "./types"; export default synthesizeAnswer; \ No newline at end of file From 18e9a7dd0f056ea45b1260c75f840807b3ae91f1 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 15:59:16 +0000 Subject: [PATCH 03/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 275 +++++++++++++++++++----------------------- src/logic/cache.ts | 101 ++++++++++------ src/logic/index.ts | 5 + src/logic/rank.ts | 170 ++++++++++++++------------ src/logic/synth.ts | 295 +++++++++++++++++++++++---------------------- src/logic/types.ts | 6 +- test/brave.test.ts | 97 +++++++++++++++ test/cache.test.ts | 36 ++++++ test/rank.test.ts | 88 ++++++++++++++ test/synth.test.ts | 97 +++++++++++++++ 10 files changed, 751 insertions(+), 419 deletions(-) create mode 100644 src/logic/index.ts create mode 100644 test/brave.test.ts create mode 100644 test/cache.test.ts create mode 100644 test/rank.test.ts create mode 100644 test/synth.test.ts diff --git a/src/logic/brave.ts b/src/logic/brave.ts index 65ab0ad..18a2b4a 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,214 +1,181 @@ -export type Freshness = "day" | "week" | "month"; - -export interface SearchResult { - title: string; - url: string; - snippet: string; - domain: string; - publishedAt?: string; - source?: string; - score?: number; -} +import { SearchFreshness, SearchResult } from "./types"; export interface BraveSearchOptions { - freshness?: Freshness; + freshness?: SearchFreshness; count?: number; offset?: number; signal?: AbortSignal; apiKey?: string; endpoint?: string; - fetchImpl?: typeof fetch; } +interface BraveResultRow { + title?: string; + url?: string; + description?: string; + page_age?: string; + age?: string; +} + +interface BraveSearchResponse { + web?: { + results?: BraveResultRow[]; + }; +} + +const FRESHNESS_MAP: Record = { + day: "pd", + week: "pw", + month: "pm", +}; + export class BraveApiError extends Error { - status: number; - details?: unknown; + status?: number; + cause?: unknown; - constructor(message: string, status: number, details?: unknown) { + constructor(message: string, status?: number, cause?: unknown) { super(message); this.name = "BraveApiError"; this.status = status; - this.details = details; + this.cause = cause; } } export class BraveRateLimitError extends BraveApiError { retryAfterSeconds?: number; - constructor(message: string, retryAfterSeconds?: number, details?: unknown) { - super(message, 429, details); + constructor(message: string, retryAfterSeconds?: number, cause?: unknown) { + super(message, 429, cause); this.name = "BraveRateLimitError"; this.retryAfterSeconds = retryAfterSeconds; } } -const FRESHNESS_MAP: Record = { - day: "pd", - week: "pw", - month: "pm", -}; - -function getApiKey(options: BraveSearchOptions): string { - return ( - options.apiKey ?? - process.env.BRAVE_API_KEY ?? - process.env.BRAVE_SEARCH_API_KEY ?? - "" - ); +export function normalizeBraveResponse(payload: BraveSearchResponse): SearchResult[] { + const rows = payload?.web?.results ?? []; + const total = rows.length || 1; + + return rows + .map((row, index) => { + const title = (row.title ?? "").trim(); + const url = (row.url ?? "").trim(); + const snippet = (row.description ?? "").trim(); + const domain = extractDomain(url); + const publishedAt = toIsoDate(row.page_age) ?? toIsoDate(row.age); + + if (!title || !url || !domain) { + return null; + } + + return { + title, + url, + snippet, + domain, + publishedAt, + source: "brave", + score: clamp(1 - index / total), + } satisfies SearchResult; + }) + .filter((item): item is SearchResult => item !== null); } -function normalizeString(value: unknown): string { - return typeof value === "string" ? value.trim() : ""; +function toIsoDate(input?: string): string | undefined { + if (!input) return undefined; + const dt = new Date(input); + if (Number.isNaN(dt.getTime())) return undefined; + return dt.toISOString(); } -function safeDomain(url: string): string { +function extractDomain(url: string): string { try { - const parsed = new URL(url); - return parsed.hostname.replace(/^www\./i, "").toLowerCase(); + const host = new URL(url).hostname.toLowerCase(); + return host.startsWith("www.") ? host.slice(4) : host; } catch { return ""; } } -function parsePublishedAt(value: unknown): string | undefined { - if (typeof value !== "string" || !value.trim()) return undefined; - const date = new Date(value); - if (Number.isNaN(date.getTime())) return undefined; - return date.toISOString(); -} - -function firstNonEmpty(...values: unknown[]): string { - for (const value of values) { - const s = normalizeString(value); - if (s) return s; - } - return ""; -} - -export function normalizeBraveResponse(payload: unknown): SearchResult[] { - const data = payload as { - web?: { results?: Array> }; - }; - - const rows = data?.web?.results; - if (!Array.isArray(rows)) return []; - - const normalized: SearchResult[] = []; - - for (const row of rows) { - const url = firstNonEmpty(row.url, (row as { profile?: { url?: string } }).profile?.url); - if (!url) continue; - - const domain = safeDomain(url); - if (!domain) continue; - - const title = firstNonEmpty(row.title); - const snippet = firstNonEmpty( - row.description, - row.snippet, - Array.isArray(row.extra_snippets) ? row.extra_snippets[0] : "" - ); - - normalized.push({ - title: title || domain, - url, - snippet, - domain, - source: firstNonEmpty( - (row as { profile?: { name?: string } }).profile?.name, - (row as { meta_url?: { hostname?: string } }).meta_url?.hostname, - domain - ), - publishedAt: parsePublishedAt( - firstNonEmpty( - row.page_age, - row.published, - row.published_at, - row.last_updated - ) - ), - }); +async function getErrorMessage(res: Response): Promise { + const text = await res.text(); + if (!text) { + return `Brave API error (${res.status})`; } - return normalized; -} - -async function parseErrorDetails(response: Response): Promise { - const contentType = response.headers.get("content-type") || ""; try { - if (contentType.includes("application/json")) { - return await response.json(); - } - return await response.text(); + const json = JSON.parse(text) as { error?: { message?: string }; message?: string }; + return json.error?.message ?? json.message ?? text; } catch { - return undefined; + return text; } } -export async function searchBrave( - query: string, - options: BraveSearchOptions = {} -): Promise { - const trimmedQuery = query.trim(); - if (!trimmedQuery) return []; +function clamp(value: number): number { + if (!Number.isFinite(value)) return 0; + if (value < 0) return 0; + if (value > 1) return 1; + return value; +} - const apiKey = getApiKey(options); - if (!apiKey) { - throw new Error("Missing Brave API key. Set BRAVE_API_KEY."); +export async function searchBrave(query: string, options: BraveSearchOptions = {}): Promise { + const normalizedQuery = query.trim(); + if (!normalizedQuery) { + return []; } - const endpoint = - options.endpoint ?? - process.env.BRAVE_SEARCH_ENDPOINT ?? - "https://api.search.brave.com/res/v1/web/search"; + const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; + if (!apiKey) { + throw new BraveApiError("Missing BRAVE_API_KEY"); + } - const requestUrl = new URL(endpoint); - requestUrl.searchParams.set("q", trimmedQuery); - requestUrl.searchParams.set("result_filter", "web"); + const endpoint = options.endpoint ?? process.env.BRAVE_SEARCH_ENDPOINT ?? "https://api.search.brave.com/res/v1/web/search"; + const params = new URLSearchParams({ + q: normalizedQuery, + count: String(options.count ?? 10), + offset: String(options.offset ?? 0), + }); - if (typeof options.count === "number") { - requestUrl.searchParams.set("count", String(options.count)); - } - if (typeof options.offset === "number") { - requestUrl.searchParams.set("offset", String(options.offset)); - } if (options.freshness) { - requestUrl.searchParams.set("freshness", FRESHNESS_MAP[options.freshness]); + params.set("freshness", FRESHNESS_MAP[options.freshness]); } - const fetchImpl = options.fetchImpl ?? fetch; + const url = `${endpoint}?${params.toString()}`; - const response = await fetchImpl(requestUrl.toString(), { - method: "GET", - headers: { - Accept: "application/json", - "X-Subscription-Token": apiKey, - }, - signal: options.signal, - }); + let res: Response; + try { + res = await fetch(url, { + method: "GET", + signal: options.signal, + headers: { + Accept: "application/json", + "X-Subscription-Token": apiKey, + }, + }); + } catch (error) { + throw new BraveApiError("Network error while calling Brave Search API", undefined, error); + } - if (response.status === 429) { - const retryAfterHeader = response.headers.get("retry-after"); - const retryAfterSeconds = retryAfterHeader ? Number(retryAfterHeader) : undefined; - const details = await parseErrorDetails(response); - throw new BraveRateLimitError( - "Brave API rate limit exceeded", - Number.isFinite(retryAfterSeconds) ? retryAfterSeconds : undefined, - details - ); + if (res.status === 429) { + const retryAfterRaw = res.headers.get("retry-after"); + const retryAfterSeconds = retryAfterRaw ? Number.parseInt(retryAfterRaw, 10) : undefined; + const message = await getErrorMessage(res); + throw new BraveRateLimitError(message || "Brave Search API rate limit exceeded", retryAfterSeconds); } - if (!response.ok) { - const details = await parseErrorDetails(response); - throw new BraveApiError( - `Brave API request failed with status ${response.status}`, - response.status, - details - ); + if (!res.ok) { + const message = await getErrorMessage(res); + throw new BraveApiError(message, res.status); + } + + let payload: BraveSearchResponse; + try { + payload = (await res.json()) as BraveSearchResponse; + } catch (error) { + throw new BraveApiError("Invalid JSON response from Brave Search API", res.status, error); } - const payload = await response.json(); return normalizeBraveResponse(payload); } +export const search = searchBrave; export default searchBrave; \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index 331458d..48ae16c 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -5,11 +5,7 @@ export interface CacheStats { misses: number; hitRate: number; size: number; -} - -export interface CacheOptions { - ttlSeconds?: number; - now?: () => number; + ttlSeconds: number; } interface CacheEntry { @@ -17,13 +13,10 @@ interface CacheEntry { expiresAt: number; } -const DEFAULT_TTL_SECONDS = 300; - -function readDefaultTtl(): number { - const raw = process.env.CACHE_TTL_SECONDS; - const parsed = raw ? Number(raw) : DEFAULT_TTL_SECONDS; - if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_TTL_SECONDS; - return Math.floor(parsed); +function parseTtlSeconds(raw: string | undefined, fallback: number): number { + const parsed = Number.parseInt(raw ?? "", 10); + if (!Number.isFinite(parsed) || parsed <= 0) return fallback; + return parsed; } export function normalizeQuery(query: string): string { @@ -31,33 +24,25 @@ export function normalizeQuery(query: string): string { } export function hashQuery(query: string): string { - return createHash("sha256").update(normalizeQuery(query)).digest("hex"); + const normalized = normalizeQuery(query); + return createHash("sha256").update(normalized).digest("hex"); } -export class QueryCache { +export class InMemoryTTLCache { + private readonly ttlMs: number; + private readonly ttlSeconds: number; private readonly store = new Map>(); - private readonly now: () => number; - private readonly defaultTtlMs: number; - private hits = 0; private misses = 0; - constructor(options: CacheOptions = {}) { - const ttlSeconds = options.ttlSeconds ?? readDefaultTtl(); - this.defaultTtlMs = Math.max(1, ttlSeconds * 1000); - this.now = options.now ?? (() => Date.now()); - } - - private pruneExpired(): void { - const now = this.now(); - for (const [key, entry] of this.store.entries()) { - if (entry.expiresAt <= now) this.store.delete(key); - } + constructor(ttlSeconds = 300) { + this.ttlSeconds = ttlSeconds; + this.ttlMs = ttlSeconds * 1000; } get(query: string): T | undefined { + this.pruneExpired(); const key = hashQuery(query); - const now = this.now(); const entry = this.store.get(key); if (!entry) { @@ -65,7 +50,7 @@ export class QueryCache { return undefined; } - if (entry.expiresAt <= now) { + if (entry.expiresAt <= Date.now()) { this.store.delete(key); this.misses += 1; return undefined; @@ -75,31 +60,67 @@ export class QueryCache { return entry.value; } - set(query: string, value: T, ttlSeconds?: number): void { + set(query: string, value: T): string { + this.pruneExpired(); const key = hashQuery(query); - const ttlMs = Math.max( - 1, - (ttlSeconds && Number.isFinite(ttlSeconds) - ? ttlSeconds - : this.defaultTtlMs / 1000) * 1000 - ); this.store.set(key, { value, - expiresAt: this.now() + ttlMs, + expiresAt: Date.now() + this.ttlMs, }); + return key; } stats(): CacheStats { this.pruneExpired(); const total = this.hits + this.misses; + return { hits: this.hits, misses: this.misses, hitRate: total === 0 ? 0 : this.hits / total, size: this.store.size, + ttlSeconds: this.ttlSeconds, }; } + + clear(): void { + this.store.clear(); + this.hits = 0; + this.misses = 0; + } + + private pruneExpired(): void { + const now = Date.now(); + for (const [key, value] of this.store.entries()) { + if (value.expiresAt <= now) { + this.store.delete(key); + } + } + } +} + +export function createCache(ttlSeconds = parseTtlSeconds(process.env.CACHE_TTL_SECONDS, 300)): InMemoryTTLCache { + return new InMemoryTTLCache(ttlSeconds); +} + +const defaultCache = createCache(); + +export function get(query: string): T | undefined { + return defaultCache.get(query) as T | undefined; +} + +export function set(query: string, value: T): string { + return defaultCache.set(query, value); +} + +export function stats(): CacheStats { + return defaultCache.stats(); } -export const cache = new QueryCache(); -export default cache; \ No newline at end of file +export default { + get, + set, + stats, + normalizeQuery, + hashQuery, +}; \ No newline at end of file diff --git a/src/logic/index.ts b/src/logic/index.ts new file mode 100644 index 0000000..4ac3f4a --- /dev/null +++ b/src/logic/index.ts @@ -0,0 +1,5 @@ +export * from "./types"; +export * from "./brave"; +export * from "./synth"; +export * from "./cache"; +export * from "./rank"; \ No newline at end of file diff --git a/src/logic/rank.ts b/src/logic/rank.ts index 7f7304f..8ab5c0f 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,133 +1,145 @@ -import type { SearchResult } from "./brave"; +import { SearchResult } from "./types"; export interface RankOptions { maxPerDomain?: number; - maxResults?: number; minQualityScore?: number; now?: Date; } export interface RankedSearchResult extends SearchResult { + qualityScore: number; + recencyScore: number; rankScore: number; } -function clamp01(value: number): number { +function clamp(value: number): number { if (!Number.isFinite(value)) return 0; if (value < 0) return 0; if (value > 1) return 1; return value; } -function canonicalUrl(rawUrl: string): string { +function extractDomain(url: string): string { try { - const url = new URL(rawUrl); - url.hash = ""; - for (const key of [...url.searchParams.keys()]) { - const k = key.toLowerCase(); - if (k.startsWith("utm_") || k === "fbclid" || k === "gclid" || k === "ref") { - url.searchParams.delete(key); + const hostname = new URL(url).hostname.toLowerCase(); + return hostname.startsWith("www.") ? hostname.slice(4) : hostname; + } catch { + return ""; + } +} + +function canonicalizeUrl(url: string): string { + try { + const parsed = new URL(url); + parsed.hash = ""; + for (const key of [...parsed.searchParams.keys()]) { + if (key.toLowerCase().startsWith("utm_")) { + parsed.searchParams.delete(key); } } - url.pathname = url.pathname.replace(/\/+$/g, "") || "/"; - return url.toString(); + const pathname = parsed.pathname.replace(/\/+$/, ""); + const query = parsed.searchParams.toString(); + return `${parsed.origin}${pathname}${query ? `?${query}` : ""}`; } catch { - return rawUrl.trim(); + return url.trim(); } } -function recencyBoost(publishedAt: string | undefined, now: Date): number { - if (!publishedAt) return 0; - const date = new Date(publishedAt); - if (Number.isNaN(date.getTime())) return 0; - - const ageMs = now.getTime() - date.getTime(); - if (ageMs < 0) return 0.3; - const day = 24 * 60 * 60 * 1000; - if (ageMs <= day) return 0.35; - if (ageMs <= 7 * day) return 0.22; - if (ageMs <= 30 * day) return 0.12; - if (ageMs <= 180 * day) return 0.05; - return 0; -} +export function qualityScore(result: SearchResult): number { + let score = 0; + + const titleLength = result.title.trim().length; + const snippetLength = result.snippet.trim().length; + + if (titleLength >= 12) score += 0.35; + else if (titleLength >= 6) score += 0.2; + + if (snippetLength >= 80) score += 0.35; + else if (snippetLength >= 30) score += 0.2; -function isLowQuality(result: SearchResult): boolean { - if (!result.url || !result.domain) return true; - const title = result.title?.trim() ?? ""; - const snippet = result.snippet?.trim() ?? ""; + if (/^https?:\/\//i.test(result.url)) score += 0.15; + if (result.domain.includes(".")) score += 0.1; - if (title.length < 8) return true; - if (snippet.length < 20) return true; + if (/(login|signup|privacy|terms|cookie)/i.test(result.url)) { + score -= 0.2; + } + + if (typeof result.score === "number") { + score += 0.05 * clamp(result.score); + } - return false; + return clamp(score); } -function qualityScore(result: SearchResult, now: Date): number { - const titleLen = (result.title ?? "").trim().length; - const snippetLen = (result.snippet ?? "").trim().length; - const titleScore = Math.min(titleLen, 120) / 120 * 0.35; - const snippetScore = Math.min(snippetLen, 300) / 300 * 0.45; - const sourceScore = result.domain ? 0.1 : 0; - const explicitScore = clamp01(Number(result.score ?? 0)) * 0.2; - const freshness = recencyBoost(result.publishedAt, now); - return titleScore + snippetScore + sourceScore + explicitScore + freshness; +export function recencyScore(publishedAt: string | undefined, now = new Date()): number { + if (!publishedAt) return 0.3; + + const ts = new Date(publishedAt).getTime(); + if (Number.isNaN(ts)) return 0.3; + + const ageDays = (now.getTime() - ts) / 86_400_000; + if (ageDays <= 0) return 1; + + return clamp(Math.exp(-ageDays / 30)); } -export function rankSearchResults( - results: SearchResult[], - options: RankOptions = {} -): SearchResult[] { +export function rankResults(results: SearchResult[], options: RankOptions = {}): RankedSearchResult[] { const maxPerDomain = options.maxPerDomain ?? 2; - const maxResults = options.maxResults ?? 10; - const minQualityScore = options.minQualityScore ?? 0.25; + const minQualityScore = options.minQualityScore ?? 0.35; const now = options.now ?? new Date(); const seenUrls = new Set(); - const seenTitleDomain = new Set(); - const ranked: RankedSearchResult[] = []; + const candidates: RankedSearchResult[] = []; + + for (const input of results) { + if (!input?.title || !input?.url) continue; - for (const result of results) { - const canonical = canonicalUrl(result.url); - if (seenUrls.has(canonical)) continue; - seenUrls.add(canonical); + const normalizedUrl = canonicalizeUrl(input.url); + if (!normalizedUrl || seenUrls.has(normalizedUrl)) continue; + seenUrls.add(normalizedUrl); - const titleDomainKey = `${result.domain}|${result.title.trim().toLowerCase()}`; - if (seenTitleDomain.has(titleDomainKey)) continue; - seenTitleDomain.add(titleDomainKey); + const domain = (input.domain || extractDomain(input.url)).toLowerCase(); + if (!domain) continue; - if (isLowQuality(result)) continue; + const materialized: SearchResult = { + ...input, + domain, + url: normalizedUrl, + }; - const rankScore = qualityScore(result, now); - if (rankScore < minQualityScore) continue; + const qScore = qualityScore(materialized); + if (qScore < minQualityScore) continue; - ranked.push({ - ...result, + const rScore = recencyScore(materialized.publishedAt, now); + const sourceScore = typeof materialized.score === "number" ? clamp(materialized.score) : 0.5; + const rankScore = clamp(0.6 * qScore + 0.25 * rScore + 0.15 * sourceScore); + + candidates.push({ + ...materialized, + qualityScore: qScore, + recencyScore: rScore, rankScore, }); } - ranked.sort((a, b) => { + candidates.sort((a, b) => { if (b.rankScore !== a.rankScore) return b.rankScore - a.rankScore; - const ad = a.publishedAt ? new Date(a.publishedAt).getTime() : 0; - const bd = b.publishedAt ? new Date(b.publishedAt).getTime() : 0; - return bd - ad; + return b.recencyScore - a.recencyScore; }); - const perDomain = new Map(); - const output: SearchResult[] = []; - - for (const item of ranked) { - const domain = item.domain.toLowerCase(); - const current = perDomain.get(domain) ?? 0; - if (current >= maxPerDomain) continue; + const perDomainCount = new Map(); + const output: RankedSearchResult[] = []; - perDomain.set(domain, current + 1); - const { rankScore: _unused, ...result } = item; - output.push(result); + for (const candidate of candidates) { + const used = perDomainCount.get(candidate.domain) ?? 0; + if (used >= maxPerDomain) continue; - if (output.length >= maxResults) break; + perDomainCount.set(candidate.domain, used + 1); + output.push(candidate); } return output; } -export default rankSearchResults; \ No newline at end of file +export const rank = rankResults; +export default rankResults; \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index bccdd17..7f4fc84 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,206 +1,215 @@ -import type { SearchResult } from "./brave"; +import { SearchResult } from "./types"; -export interface SynthesisTokens { +export interface SynthTokens { in: number; out: number; } -export interface SynthesisResult { +export interface SynthResult { answer: string; confidence: number; - tokens: SynthesisTokens; + tokens: SynthTokens; model: string; } -export interface SynthesisOptions { - apiKey?: string; +export interface SynthOptions { model?: string; - endpoint?: string; signal?: AbortSignal; - temperature?: number; - fetchImpl?: typeof fetch; + apiKey?: string; + baseUrl?: string; } -export const SYNTH_SYSTEM_PROMPT = - "You are a search synthesis engine for agents. Respond with strict JSON only: " + - '{"answer":"string","confidence":number}. ' + - "Rules: concise answer, no markdown, mention uncertainty when evidence is weak, " + - "ground claims in provided sources only, and keep confidence between 0 and 1."; +interface ChatCompletionResponse { + model?: string; + choices?: Array<{ + message?: { + content?: string; + }; + }>; + usage?: { + prompt_tokens?: number; + completion_tokens?: number; + total_tokens?: number; + }; +} -function clamp01(value: number): number { +const SYSTEM_PROMPT = [ + "You are Queryx synthesis for agent consumers.", + "Return STRICT JSON only with shape:", + '{"answer":"string","confidence":0.0}', + "Keep the answer concise, factual, and directly useful for downstream automation.", + "Confidence must be a number between 0 and 1 based on evidence quality and source agreement.", +].join(" "); + +export class SynthError extends Error { + status?: number; + cause?: unknown; + + constructor(message: string, status?: number, cause?: unknown) { + super(message); + this.name = "SynthError"; + this.status = status; + this.cause = cause; + } +} + +export function clampConfidence(value: number): number { if (!Number.isFinite(value)) return 0; if (value < 0) return 0; if (value > 1) return 1; return value; } +export function computeConfidence(query: string, results: SearchResult[], answer: string): number { + const evidenceDepth = Math.min(results.length / 8, 1); + const sourceDiversity = Math.min(new Set(results.map((r) => r.domain)).size / 5, 1); + const answerLength = Math.min(answer.trim().split(/\s+/).filter(Boolean).length / 80, 1); + + const uncertaintyPenalty = + /unclear|unknown|insufficient|not enough|cannot determine/i.test(answer) ? 0.2 : 0; + + const score = 0.5 * evidenceDepth + 0.3 * sourceDiversity + 0.2 * answerLength - uncertaintyPenalty; + void query; + return clampConfidence(score); +} + function estimateTokens(text: string): number { - const chars = text.length; - return Math.max(1, Math.ceil(chars / 4)); + const cleaned = text.trim(); + if (!cleaned) return 0; + return Math.max(1, Math.ceil(cleaned.length / 4)); } -function extractMessageContent(content: unknown): string { - if (typeof content === "string") return content; - if (Array.isArray(content)) { - return content - .map((part) => { - if (typeof part === "string") return part; - if (part && typeof part === "object" && "text" in part) { - const text = (part as { text?: unknown }).text; - return typeof text === "string" ? text : ""; - } - return ""; - }) - .join(""); - } - return ""; +function buildUserPrompt(query: string, results: SearchResult[]): string { + const compact = results.slice(0, 8).map((r) => ({ + title: r.title, + url: r.url, + snippet: r.snippet, + domain: r.domain, + publishedAt: r.publishedAt, + })); + + return JSON.stringify({ + query, + results: compact, + instruction: "Synthesize an answer for agents. No markdown. JSON only.", + }); } -function parseStructuredOutput(content: string): { answer: string; confidence?: number } { - const trimmed = content.trim(); - if (!trimmed) return { answer: "" }; +function parseJsonObject(text: string): Record | null { + const trimmed = text.trim(); + if (!trimmed) return null; try { - const parsed = JSON.parse(trimmed) as { answer?: unknown; confidence?: unknown }; - const answer = - typeof parsed.answer === "string" ? parsed.answer.trim() : trimmed; - const confidence = - typeof parsed.confidence === "number" ? parsed.confidence : undefined; - return { answer, confidence }; + const direct = JSON.parse(trimmed); + if (direct && typeof direct === "object") return direct as Record; + return null; } catch { - return { answer: trimmed }; + const match = trimmed.match(/\{[\s\S]*\}/); + if (!match) return null; + try { + const parsed = JSON.parse(match[0]); + if (parsed && typeof parsed === "object") return parsed as Record; + return null; + } catch { + return null; + } } } -function heuristicConfidence(results: SearchResult[], answer: string): number { - let score = 0.2; - score += Math.min(results.length, 8) * 0.08; - if (answer.length > 80) score += 0.1; - if (/not enough|uncertain|unclear|insufficient/i.test(answer)) score -= 0.15; - return clamp01(score); -} - -function getApiKey(options: SynthesisOptions): string { - return options.apiKey ?? process.env.OPENAI_API_KEY ?? ""; +async function readErrorMessage(res: Response): Promise { + const text = await res.text(); + if (!text) return `Synthesis API error (${res.status})`; + try { + const parsed = JSON.parse(text) as { error?: { message?: string }; message?: string }; + return parsed.error?.message ?? parsed.message ?? text; + } catch { + return text; + } } -export async function synthesizeAnswer( - query: string, - results: SearchResult[], - options: SynthesisOptions = {} -): Promise { - const model = options.model ?? process.env.SYNTH_MODEL ?? "gpt-4o-mini"; - const trimmedQuery = query.trim(); - - if (!trimmedQuery) { +export async function synthesize(query: string, results: SearchResult[], options: SynthOptions = {}): Promise { + const normalizedQuery = query.trim(); + if (!normalizedQuery) { return { answer: "", confidence: 0, tokens: { in: 0, out: 0 }, - model, + model: options.model ?? process.env.SYNTH_MODEL ?? "gpt-4o-mini", }; } - const apiKey = getApiKey(options); + const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; if (!apiKey) { - throw new Error("Missing OpenAI API key. Set OPENAI_API_KEY."); + throw new SynthError("Missing OPENAI_API_KEY"); } - const endpoint = - options.endpoint ?? - process.env.OPENAI_CHAT_COMPLETIONS_ENDPOINT ?? - "https://api.openai.com/v1/chat/completions"; - - const sourcePayload = results.slice(0, 8).map((r) => ({ - title: r.title, - url: r.url, - snippet: r.snippet, - domain: r.domain, - publishedAt: r.publishedAt, - })); - - const userPayload = JSON.stringify({ - query: trimmedQuery, - sources: sourcePayload, - }); + const model = options.model ?? process.env.SYNTH_MODEL ?? "gpt-4o-mini"; + const baseUrl = (options.baseUrl ?? process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1").replace(/\/+$/, ""); + const userPrompt = buildUserPrompt(normalizedQuery, results); - const requestBody = { - model, - temperature: options.temperature ?? 0.2, - response_format: { type: "json_object" as const }, - messages: [ - { role: "system" as const, content: SYNTH_SYSTEM_PROMPT }, - { role: "user" as const, content: userPayload }, - ], - }; + let res: Response; + try { + res = await fetch(`${baseUrl}/chat/completions`, { + method: "POST", + signal: options.signal, + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model, + temperature: 0.2, + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: userPrompt }, + ], + }), + }); + } catch (error) { + throw new SynthError("Network error while calling synthesis model", undefined, error); + } - const fetchImpl = options.fetchImpl ?? fetch; - const response = await fetchImpl(endpoint, { - method: "POST", - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify(requestBody), - signal: options.signal, - }); + if (!res.ok) { + const message = await readErrorMessage(res); + throw new SynthError(message, res.status); + } - if (!response.ok) { - let details: unknown; - try { - details = await response.json(); - } catch { - details = await response.text().catch(() => undefined); - } - throw new Error( - `Synthesis request failed (${response.status}): ${JSON.stringify(details)}` - ); + let payload: ChatCompletionResponse; + try { + payload = (await res.json()) as ChatCompletionResponse; + } catch (error) { + throw new SynthError("Invalid JSON from synthesis model", res.status, error); } - const payload = (await response.json()) as { - model?: string; - choices?: Array<{ message?: { content?: unknown } }>; - usage?: { - prompt_tokens?: number; - completion_tokens?: number; - input_tokens?: number; - output_tokens?: number; - }; - }; + const content = payload.choices?.[0]?.message?.content?.trim() ?? ""; + const parsed = parseJsonObject(content); - const rawContent = extractMessageContent( - payload.choices?.[0]?.message?.content - ); - const parsed = parseStructuredOutput(rawContent); + const answer = + typeof parsed?.answer === "string" + ? parsed.answer.trim() + : content; - const answer = parsed.answer || "No synthesis available."; - const confidence = clamp01( - typeof parsed.confidence === "number" - ? parsed.confidence - : heuristicConfidence(results, answer) - ); + const parsedConfidence = + typeof parsed?.confidence === "number" ? parsed.confidence : undefined; - const estimatedPrompt = estimateTokens(SYNTH_SYSTEM_PROMPT + userPayload); - const estimatedCompletion = estimateTokens(answer); + const confidence = clampConfidence( + parsedConfidence ?? computeConfidence(normalizedQuery, results, answer), + ); - const tokensIn = - payload.usage?.prompt_tokens ?? - payload.usage?.input_tokens ?? - estimatedPrompt; - const tokensOut = - payload.usage?.completion_tokens ?? - payload.usage?.output_tokens ?? - estimatedCompletion; + const inTokens = payload.usage?.prompt_tokens ?? estimateTokens(`${SYSTEM_PROMPT}\n${userPrompt}`); + const outTokens = payload.usage?.completion_tokens ?? estimateTokens(answer); return { answer, confidence, tokens: { - in: Math.max(0, Number(tokensIn) || 0), - out: Math.max(0, Number(tokensOut) || 0), + in: Math.max(0, Math.round(inTokens)), + out: Math.max(0, Math.round(outTokens)), }, model: payload.model ?? model, }; } -export default synthesizeAnswer; \ No newline at end of file +export const synth = synthesize; +export default synthesize; \ No newline at end of file diff --git a/src/logic/types.ts b/src/logic/types.ts index 85203c5..595ab68 100644 --- a/src/logic/types.ts +++ b/src/logic/types.ts @@ -1,11 +1,11 @@ -export type Freshness = "day" | "week" | "month"; +export type SearchFreshness = "day" | "week" | "month"; export interface SearchResult { title: string; url: string; snippet: string; domain: string; - source?: string; - publishedAt?: string | null; + publishedAt?: string; score?: number; + source?: string; } \ No newline at end of file diff --git a/test/brave.test.ts b/test/brave.test.ts new file mode 100644 index 0000000..02d7449 --- /dev/null +++ b/test/brave.test.ts @@ -0,0 +1,97 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { BraveApiError, BraveRateLimitError, normalizeBraveResponse, searchBrave } from "../src/logic/brave"; + +const originalFetch = globalThis.fetch; +const originalApiKey = process.env.BRAVE_API_KEY; + +describe("brave.ts", () => { + beforeEach(() => { + process.env.BRAVE_API_KEY = "test-brave-key"; + }); + + afterEach(() => { + (globalThis as { fetch: typeof fetch }).fetch = originalFetch; + if (originalApiKey === undefined) { + delete process.env.BRAVE_API_KEY; + } else { + process.env.BRAVE_API_KEY = originalApiKey; + } + }); + + it("normalises results and maps freshness parameter", async () => { + let requestedUrl = ""; + + (globalThis as { fetch: typeof fetch }).fetch = (async (input: RequestInfo | URL) => { + requestedUrl = String(input); + return new Response( + JSON.stringify({ + web: { + results: [ + { + title: "Example Result", + url: "https://www.example.com/post", + description: "Useful snippet", + page_age: "2026-03-03T12:00:00.000Z", + }, + { + title: "Invalid Row", + url: "not-a-url", + description: "Should be filtered", + }, + ], + }, + }), + { status: 200, headers: { "Content-Type": "application/json" } }, + ); + }) as typeof fetch; + + const results = await searchBrave(" test query ", { freshness: "week" }); + + expect(requestedUrl).toContain("freshness=pw"); + expect(results).toHaveLength(1); + expect(results[0].domain).toBe("example.com"); + expect(results[0].snippet).toBe("Useful snippet"); + expect(results[0].source).toBe("brave"); + }); + + it("throws rate-limit errors with retry-after metadata", async () => { + (globalThis as { fetch: typeof fetch }).fetch = (async () => { + return new Response(JSON.stringify({ error: { message: "Rate limit exceeded" } }), { + status: 429, + headers: { "Retry-After": "12" }, + }); + }) as typeof fetch; + + let thrown: unknown; + try { + await searchBrave("rate limited"); + } catch (error) { + thrown = error; + } + + expect(thrown).toBeInstanceOf(BraveRateLimitError); + expect((thrown as BraveRateLimitError).retryAfterSeconds).toBe(12); + }); + + it("throws API errors for non-429 failures", async () => { + (globalThis as { fetch: typeof fetch }).fetch = (async () => { + return new Response(JSON.stringify({ message: "Internal server error" }), { + status: 500, + }); + }) as typeof fetch; + + let thrown: unknown; + try { + await searchBrave("server fail"); + } catch (error) { + thrown = error; + } + + expect(thrown).toBeInstanceOf(BraveApiError); + expect((thrown as BraveApiError).status).toBe(500); + }); + + it("normalise helper returns empty array safely", () => { + expect(normalizeBraveResponse({})).toEqual([]); + }); +}); \ No newline at end of file diff --git a/test/cache.test.ts b/test/cache.test.ts new file mode 100644 index 0000000..0ff0b47 --- /dev/null +++ b/test/cache.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it } from "bun:test"; +import { createCache } from "../src/logic/cache"; + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +describe("cache.ts", () => { + it("expires entries after TTL", async () => { + const cache = createCache(0.05); + + cache.set(" Hello World ", "value"); + expect(cache.get("hello world")).toBe("value"); + + await sleep(70); + expect(cache.get("hello world")).toBeUndefined(); + + const s = cache.stats(); + expect(s.hits).toBe(1); + expect(s.misses).toBe(1); + }); + + it("tracks hit/miss and hit rate", () => { + const cache = createCache(60); + + cache.set("foo", 123); + expect(cache.get("foo")).toBe(123); + expect(cache.get("bar")).toBeUndefined(); + + const s = cache.stats(); + expect(s.size).toBe(1); + expect(s.hits).toBe(1); + expect(s.misses).toBe(1); + expect(s.hitRate).toBeCloseTo(0.5, 5); + }); +}); \ No newline at end of file diff --git a/test/rank.test.ts b/test/rank.test.ts new file mode 100644 index 0000000..938b5a8 --- /dev/null +++ b/test/rank.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it } from "bun:test"; +import { rankResults } from "../src/logic/rank"; +import type { SearchResult } from "../src/logic/types"; + +describe("rank.ts", () => { + it("deduplicates by canonical URL and caps results per domain", () => { + const now = new Date("2026-03-04T00:00:00.000Z"); + const input: SearchResult[] = [ + { + title: "A1", + url: "https://example.com/post?id=1&utm_source=x", + snippet: "A long, useful snippet with enough context to pass quality thresholds.", + domain: "example.com", + publishedAt: "2026-03-03T00:00:00.000Z", + }, + { + title: "A1 duplicate canonical", + url: "https://example.com/post?id=1&utm_source=y", + snippet: "Duplicate URL once utm params are removed.", + domain: "example.com", + publishedAt: "2026-03-02T00:00:00.000Z", + }, + { + title: "A2", + url: "https://example.com/post-2", + snippet: "Another strong snippet that should be retained.", + domain: "example.com", + publishedAt: "2026-03-01T00:00:00.000Z", + }, + { + title: "A3", + url: "https://example.com/post-3", + snippet: "Would exceed the per-domain cap.", + domain: "example.com", + publishedAt: "2026-02-28T00:00:00.000Z", + }, + { + title: "B1", + url: "https://other.net/news", + snippet: "Different domain with quality content and unique perspective.", + domain: "other.net", + publishedAt: "2026-03-02T00:00:00.000Z", + }, + ]; + + const ranked = rankResults(input, { + now, + maxPerDomain: 2, + minQualityScore: 0.1, + }); + + const uniqueUrls = new Set(ranked.map((r) => r.url)); + const fromExample = ranked.filter((r) => r.domain === "example.com"); + + expect(uniqueUrls.size).toBe(ranked.length); + expect(fromExample.length).toBeLessThanOrEqual(2); + }); + + it("boosts newer results when quality is similar", () => { + const now = new Date("2026-03-04T00:00:00.000Z"); + + const input: SearchResult[] = [ + { + title: "Fresh result", + url: "https://fresh.io/a", + snippet: "Detailed and high-quality context for the same topic.", + domain: "fresh.io", + publishedAt: "2026-03-03T00:00:00.000Z", + }, + { + title: "Old result", + url: "https://archive.io/b", + snippet: "Detailed and high-quality context for the same topic.", + domain: "archive.io", + publishedAt: "2024-01-01T00:00:00.000Z", + }, + ]; + + const ranked = rankResults(input, { + now, + maxPerDomain: 2, + minQualityScore: 0.1, + }); + + expect(ranked[0].title).toBe("Fresh result"); + expect(ranked[0].recencyScore).toBeGreaterThan(ranked[1].recencyScore); + }); +}); \ No newline at end of file diff --git a/test/synth.test.ts b/test/synth.test.ts new file mode 100644 index 0000000..84087fb --- /dev/null +++ b/test/synth.test.ts @@ -0,0 +1,97 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { synthesize } from "../src/logic/synth"; +import type { SearchResult } from "../src/logic/types"; + +const originalFetch = globalThis.fetch; +const originalApiKey = process.env.OPENAI_API_KEY; + +const sampleResults: SearchResult[] = [ + { + title: "Bun 1.2 Released", + url: "https://example.com/bun-release", + snippet: "Bun ships speed improvements and runtime updates.", + domain: "example.com", + publishedAt: "2026-03-01T00:00:00.000Z", + }, + { + title: "Runtime Benchmarks", + url: "https://another.com/bench", + snippet: "Independent benchmark data for modern runtimes.", + domain: "another.com", + publishedAt: "2026-02-28T00:00:00.000Z", + }, +]; + +describe("synth.ts", () => { + beforeEach(() => { + process.env.OPENAI_API_KEY = "test-openai-key"; + }); + + afterEach(() => { + (globalThis as { fetch: typeof fetch }).fetch = originalFetch; + if (originalApiKey === undefined) { + delete process.env.OPENAI_API_KEY; + } else { + process.env.OPENAI_API_KEY = originalApiKey; + } + }); + + it("clamps confidence and uses returned token usage", async () => { + (globalThis as { fetch: typeof fetch }).fetch = (async () => { + return new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: JSON.stringify({ + answer: "Bun delivers runtime and tooling improvements.", + confidence: 1.8, + }), + }, + }, + ], + usage: { + prompt_tokens: 120, + completion_tokens: 25, + }, + }), + { status: 200, headers: { "Content-Type": "application/json" } }, + ); + }) as typeof fetch; + + const output = await synthesize("What changed in Bun?", sampleResults); + + expect(output.answer).toContain("Bun"); + expect(output.confidence).toBe(1); + expect(output.tokens).toEqual({ in: 120, out: 25 }); + expect(output.model).toBe("gpt-4o-mini"); + }); + + it("estimates tokens when usage is missing and keeps confidence in range", async () => { + (globalThis as { fetch: typeof fetch }).fetch = (async () => { + return new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: JSON.stringify({ + answer: "Evidence indicates improved runtime performance and DX updates.", + }), + }, + }, + ], + }), + { status: 200, headers: { "Content-Type": "application/json" } }, + ); + }) as typeof fetch; + + const output = await synthesize("Summarize the updates", sampleResults); + + expect(output.tokens.in).toBeGreaterThan(0); + expect(output.tokens.out).toBeGreaterThan(0); + expect(output.confidence).toBeGreaterThanOrEqual(0); + expect(output.confidence).toBeLessThanOrEqual(1); + }); +}); \ No newline at end of file From 9c647a066b619ab391562ca1e89024a9b3492635 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:05:28 +0000 Subject: [PATCH 04/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 372 +++++++++++++++++++++++++++++------------- src/logic/cache.ts | 117 +++++++------- src/logic/synth.ts | 390 ++++++++++++++++++++++++++++----------------- 3 files changed, 558 insertions(+), 321 deletions(-) diff --git a/src/logic/brave.ts b/src/logic/brave.ts index 18a2b4a..2136e17 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,181 +1,329 @@ -import { SearchFreshness, SearchResult } from "./types"; +export type Freshness = "day" | "week" | "month"; + +export interface SearchResult { + title: string; + url: string; + description: string; + domain: string; + publishedAt?: string; + score?: number; + source: "brave"; +} export interface BraveSearchOptions { - freshness?: SearchFreshness; + apiKey?: string; + endpoint?: string; + freshness?: Freshness; count?: number; offset?: number; + country?: string; + language?: string; + safeSearch?: "off" | "moderate" | "strict"; + timeoutMs?: number; signal?: AbortSignal; - apiKey?: string; - endpoint?: string; } -interface BraveResultRow { +export type BraveErrorCode = + | "CONFIGURATION_ERROR" + | "BAD_REQUEST" + | "UNAUTHORIZED" + | "FORBIDDEN" + | "RATE_LIMITED" + | "UPSTREAM_ERROR" + | "TIMEOUT" + | "NETWORK_ERROR" + | "INVALID_RESPONSE"; + +export class BraveApiError extends Error { + readonly status: number; + readonly code: BraveErrorCode; + readonly retryAfterSeconds?: number; + readonly details?: unknown; + + constructor( + message: string, + options: { + status: number; + code: BraveErrorCode; + retryAfterSeconds?: number; + details?: unknown; + }, + ) { + super(message); + this.name = "BraveApiError"; + this.status = options.status; + this.code = options.code; + this.retryAfterSeconds = options.retryAfterSeconds; + this.details = options.details; + } +} + +interface BraveRawResult { title?: string; url?: string; description?: string; - page_age?: string; - age?: string; + extra_snippets?: unknown; + page_age?: unknown; + age?: unknown; } -interface BraveSearchResponse { +interface BraveRawResponse { web?: { - results?: BraveResultRow[]; + results?: BraveRawResult[]; }; } -const FRESHNESS_MAP: Record = { +const FRESHNESS_MAP: Record = { day: "pd", week: "pw", month: "pm", }; -export class BraveApiError extends Error { - status?: number; - cause?: unknown; +function normalizeWhitespace(value: string): string { + return value.replace(/\s+/g, " ").trim(); +} - constructor(message: string, status?: number, cause?: unknown) { - super(message); - this.name = "BraveApiError"; - this.status = status; - this.cause = cause; +function extractDomain(inputUrl: string): string { + try { + return new URL(inputUrl).hostname.toLowerCase(); + } catch { + return ""; } } -export class BraveRateLimitError extends BraveApiError { - retryAfterSeconds?: number; +function parsePublishedAt(value: unknown): string | undefined { + if (!value) return undefined; - constructor(message: string, retryAfterSeconds?: number, cause?: unknown) { - super(message, 429, cause); - this.name = "BraveRateLimitError"; - this.retryAfterSeconds = retryAfterSeconds; + if (typeof value === "string") { + const ms = Date.parse(value); + return Number.isNaN(ms) ? undefined : new Date(ms).toISOString(); } -} - -export function normalizeBraveResponse(payload: BraveSearchResponse): SearchResult[] { - const rows = payload?.web?.results ?? []; - const total = rows.length || 1; - return rows - .map((row, index) => { - const title = (row.title ?? "").trim(); - const url = (row.url ?? "").trim(); - const snippet = (row.description ?? "").trim(); - const domain = extractDomain(url); - const publishedAt = toIsoDate(row.page_age) ?? toIsoDate(row.age); + if (typeof value === "number") { + const ms = value > 1e12 ? value : value * 1000; + return Number.isNaN(ms) ? undefined : new Date(ms).toISOString(); + } - if (!title || !url || !domain) { - return null; - } + if (typeof value === "object") { + const maybeDate = (value as { date?: unknown }).date; + if (typeof maybeDate === "string") { + const ms = Date.parse(maybeDate); + return Number.isNaN(ms) ? undefined : new Date(ms).toISOString(); + } + } - return { - title, - url, - snippet, - domain, - publishedAt, - source: "brave", - score: clamp(1 - index / total), - } satisfies SearchResult; - }) - .filter((item): item is SearchResult => item !== null); + return undefined; } -function toIsoDate(input?: string): string | undefined { - if (!input) return undefined; - const dt = new Date(input); - if (Number.isNaN(dt.getTime())) return undefined; - return dt.toISOString(); -} +function parseDescription(raw: BraveRawResult): string { + if (typeof raw.description === "string" && raw.description.trim()) { + return normalizeWhitespace(raw.description); + } -function extractDomain(url: string): string { - try { - const host = new URL(url).hostname.toLowerCase(); - return host.startsWith("www.") ? host.slice(4) : host; - } catch { - return ""; + if (Array.isArray(raw.extra_snippets)) { + const first = raw.extra_snippets.find((s) => typeof s === "string"); + if (typeof first === "string") { + return normalizeWhitespace(first); + } } + + return ""; } -async function getErrorMessage(res: Response): Promise { - const text = await res.text(); - if (!text) { - return `Brave API error (${res.status})`; +export function normalizeBraveResponse(payload: unknown): SearchResult[] { + const data = payload as BraveRawResponse; + const rawResults = data?.web?.results; + if (!Array.isArray(rawResults)) return []; + + const normalized: SearchResult[] = []; + + for (const item of rawResults) { + if (!item || typeof item.url !== "string") continue; + + const url = item.url.trim(); + const domain = extractDomain(url); + if (!url || !domain) continue; + + const title = + typeof item.title === "string" && item.title.trim() + ? normalizeWhitespace(item.title) + : "Untitled"; + + const description = parseDescription(item); + const publishedAt = parsePublishedAt(item.page_age ?? item.age); + + normalized.push({ + title, + url, + description, + domain, + publishedAt, + source: "brave", + }); } + return normalized; +} + +async function parseResponseDetails(response: Response): Promise { + const contentType = response.headers.get("content-type") ?? ""; try { - const json = JSON.parse(text) as { error?: { message?: string }; message?: string }; - return json.error?.message ?? json.message ?? text; + if (contentType.includes("application/json")) { + return await response.json(); + } + const text = await response.text(); + return text || undefined; } catch { - return text; + return undefined; } } -function clamp(value: number): number { - if (!Number.isFinite(value)) return 0; - if (value < 0) return 0; - if (value > 1) return 1; - return value; +function buildQueryParams(query: string, options: BraveSearchOptions): URLSearchParams { + const params = new URLSearchParams(); + params.set("q", query); + + params.set("count", String(options.count ?? 10)); + params.set("offset", String(options.offset ?? 0)); + + if (options.country) params.set("country", options.country); + if (options.language) params.set("search_lang", options.language); + if (options.safeSearch) params.set("safesearch", options.safeSearch); + if (options.freshness) params.set("freshness", FRESHNESS_MAP[options.freshness]); + + return params; +} + +function buildRequestUrl(endpoint: string, params: URLSearchParams): string { + const separator = endpoint.includes("?") ? "&" : "?"; + return `${endpoint}${separator}${params.toString()}`; } -export async function searchBrave(query: string, options: BraveSearchOptions = {}): Promise { - const normalizedQuery = query.trim(); +export async function searchBrave( + query: string, + options: BraveSearchOptions = {}, +): Promise { + const normalizedQuery = normalizeWhitespace(query); if (!normalizedQuery) { - return []; + throw new BraveApiError("Query is required", { + status: 400, + code: "BAD_REQUEST", + }); } const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; if (!apiKey) { - throw new BraveApiError("Missing BRAVE_API_KEY"); + throw new BraveApiError("Missing Brave API key", { + status: 500, + code: "CONFIGURATION_ERROR", + }); } - const endpoint = options.endpoint ?? process.env.BRAVE_SEARCH_ENDPOINT ?? "https://api.search.brave.com/res/v1/web/search"; - const params = new URLSearchParams({ - q: normalizedQuery, - count: String(options.count ?? 10), - offset: String(options.offset ?? 0), - }); + const endpoint = options.endpoint ?? process.env.BRAVE_API_ENDPOINT; + if (!endpoint) { + throw new BraveApiError("Missing Brave API endpoint", { + status: 500, + code: "CONFIGURATION_ERROR", + }); + } + + const timeoutMs = options.timeoutMs ?? 8_000; + const controller = new AbortController(); + const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs); - if (options.freshness) { - params.set("freshness", FRESHNESS_MAP[options.freshness]); + if (options.signal) { + if (options.signal.aborted) controller.abort(); + options.signal.addEventListener("abort", () => controller.abort(), { once: true }); } - const url = `${endpoint}?${params.toString()}`; + const requestUrl = buildRequestUrl(endpoint, buildQueryParams(normalizedQuery, options)); - let res: Response; try { - res = await fetch(url, { + const response = await fetch(requestUrl, { method: "GET", - signal: options.signal, headers: { Accept: "application/json", "X-Subscription-Token": apiKey, }, + signal: controller.signal, }); - } catch (error) { - throw new BraveApiError("Network error while calling Brave Search API", undefined, error); - } - if (res.status === 429) { - const retryAfterRaw = res.headers.get("retry-after"); - const retryAfterSeconds = retryAfterRaw ? Number.parseInt(retryAfterRaw, 10) : undefined; - const message = await getErrorMessage(res); - throw new BraveRateLimitError(message || "Brave Search API rate limit exceeded", retryAfterSeconds); - } + if (!response.ok) { + const details = await parseResponseDetails(response); - if (!res.ok) { - const message = await getErrorMessage(res); - throw new BraveApiError(message, res.status); - } + if (response.status === 429) { + const retryAfterHeader = response.headers.get("retry-after"); + const retryAfterSeconds = retryAfterHeader + ? Number.parseInt(retryAfterHeader, 10) + : undefined; - let payload: BraveSearchResponse; - try { - payload = (await res.json()) as BraveSearchResponse; + throw new BraveApiError("Brave API rate limit exceeded", { + status: 429, + code: "RATE_LIMITED", + retryAfterSeconds: Number.isFinite(retryAfterSeconds) + ? retryAfterSeconds + : undefined, + details, + }); + } + + if (response.status === 400) { + throw new BraveApiError("Brave API rejected request", { + status: 400, + code: "BAD_REQUEST", + details, + }); + } + + if (response.status === 401) { + throw new BraveApiError("Brave API unauthorized", { + status: 401, + code: "UNAUTHORIZED", + details, + }); + } + + if (response.status === 403) { + throw new BraveApiError("Brave API forbidden", { + status: 403, + code: "FORBIDDEN", + details, + }); + } + + throw new BraveApiError("Brave API upstream error", { + status: response.status, + code: "UPSTREAM_ERROR", + details, + }); + } + + let payload: unknown; + try { + payload = await response.json(); + } catch { + throw new BraveApiError("Invalid JSON from Brave API", { + status: 502, + code: "INVALID_RESPONSE", + }); + } + + return normalizeBraveResponse(payload); } catch (error) { - throw new BraveApiError("Invalid JSON response from Brave Search API", res.status, error); - } + if (error instanceof BraveApiError) throw error; - return normalizeBraveResponse(payload); -} + if (error instanceof Error && error.name === "AbortError") { + throw new BraveApiError("Brave API request timed out", { + status: 504, + code: "TIMEOUT", + }); + } -export const search = searchBrave; -export default searchBrave; \ No newline at end of file + throw new BraveApiError("Brave API network error", { + status: 502, + code: "NETWORK_ERROR", + details: error, + }); + } finally { + clearTimeout(timeoutHandle); + } +} \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index 48ae16c..f2d57b7 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -3,46 +3,72 @@ import { createHash } from "node:crypto"; export interface CacheStats { hits: number; misses: number; + requests: number; hitRate: number; size: number; ttlSeconds: number; } +export interface CacheOptions { + ttlSeconds?: number; +} + interface CacheEntry { value: T; expiresAt: number; } -function parseTtlSeconds(raw: string | undefined, fallback: number): number { - const parsed = Number.parseInt(raw ?? "", 10); - if (!Number.isFinite(parsed) || parsed <= 0) return fallback; - return parsed; -} +const DEFAULT_TTL_SECONDS = 300; export function normalizeQuery(query: string): string { return query.trim().toLowerCase().replace(/\s+/g, " "); } -export function hashQuery(query: string): string { - const normalized = normalizeQuery(query); - return createHash("sha256").update(normalized).digest("hex"); +export function hashQuery(normalizedQuery: string): string { + return createHash("sha256").update(normalizedQuery).digest("hex"); } -export class InMemoryTTLCache { - private readonly ttlMs: number; - private readonly ttlSeconds: number; +export function makeCacheKey(query: string): string { + return hashQuery(normalizeQuery(query)); +} + +export function getDefaultCacheTtlSeconds(): number { + const raw = process.env.CACHE_TTL_SECONDS; + const parsed = raw ? Number(raw) : NaN; + if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_TTL_SECONDS; + return parsed; +} + +function sanitizeTtl(ttlSeconds: number): number { + if (!Number.isFinite(ttlSeconds) || ttlSeconds <= 0) return DEFAULT_TTL_SECONDS; + return ttlSeconds; +} + +export class QueryCache { private readonly store = new Map>(); private hits = 0; private misses = 0; + private readonly ttlSeconds: number; - constructor(ttlSeconds = 300) { - this.ttlSeconds = ttlSeconds; - this.ttlMs = ttlSeconds * 1000; + constructor(options: CacheOptions = {}) { + this.ttlSeconds = sanitizeTtl(options.ttlSeconds ?? getDefaultCacheTtlSeconds()); + } + + private pruneExpired(): void { + const now = Date.now(); + for (const [key, entry] of this.store.entries()) { + if (entry.expiresAt <= now) { + this.store.delete(key); + } + } } get(query: string): T | undefined { - this.pruneExpired(); - const key = hashQuery(query); + return this.getByKey(makeCacheKey(query)); + } + + getByKey(key: string): T | undefined { + const now = Date.now(); const entry = this.store.get(key); if (!entry) { @@ -50,7 +76,7 @@ export class InMemoryTTLCache { return undefined; } - if (entry.expiresAt <= Date.now()) { + if (entry.expiresAt <= now) { this.store.delete(key); this.misses += 1; return undefined; @@ -60,24 +86,26 @@ export class InMemoryTTLCache { return entry.value; } - set(query: string, value: T): string { - this.pruneExpired(); - const key = hashQuery(query); - this.store.set(key, { - value, - expiresAt: Date.now() + this.ttlMs, - }); + set(query: string, value: T, ttlSeconds?: number): string { + const key = makeCacheKey(query); + this.setByKey(key, value, ttlSeconds); return key; } + setByKey(key: string, value: T, ttlSeconds?: number): void { + const effectiveTtl = sanitizeTtl(ttlSeconds ?? this.ttlSeconds); + const expiresAt = Date.now() + effectiveTtl * 1000; + this.store.set(key, { value, expiresAt }); + } + stats(): CacheStats { this.pruneExpired(); - const total = this.hits + this.misses; - + const requests = this.hits + this.misses; return { hits: this.hits, misses: this.misses, - hitRate: total === 0 ? 0 : this.hits / total, + requests, + hitRate: requests === 0 ? 0 : this.hits / requests, size: this.store.size, ttlSeconds: this.ttlSeconds, }; @@ -88,39 +116,6 @@ export class InMemoryTTLCache { this.hits = 0; this.misses = 0; } - - private pruneExpired(): void { - const now = Date.now(); - for (const [key, value] of this.store.entries()) { - if (value.expiresAt <= now) { - this.store.delete(key); - } - } - } -} - -export function createCache(ttlSeconds = parseTtlSeconds(process.env.CACHE_TTL_SECONDS, 300)): InMemoryTTLCache { - return new InMemoryTTLCache(ttlSeconds); -} - -const defaultCache = createCache(); - -export function get(query: string): T | undefined { - return defaultCache.get(query) as T | undefined; -} - -export function set(query: string, value: T): string { - return defaultCache.set(query, value); -} - -export function stats(): CacheStats { - return defaultCache.stats(); } -export default { - get, - set, - stats, - normalizeQuery, - hashQuery, -}; \ No newline at end of file +export const cache = new QueryCache(); \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index 7f4fc84..8c87b34 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,215 +1,309 @@ -import { SearchResult } from "./types"; +import type { SearchResult } from "./brave"; -export interface SynthTokens { +export interface SynthesisTokens { in: number; out: number; } -export interface SynthResult { +export interface SynthesisResult { answer: string; confidence: number; - tokens: SynthTokens; + tokens: SynthesisTokens; model: string; } -export interface SynthOptions { +export interface SynthesisOptions { + apiKey?: string; + endpoint?: string; model?: string; + timeoutMs?: number; + temperature?: number; signal?: AbortSignal; - apiKey?: string; - baseUrl?: string; } -interface ChatCompletionResponse { - model?: string; - choices?: Array<{ - message?: { - content?: string; - }; - }>; - usage?: { - prompt_tokens?: number; - completion_tokens?: number; - total_tokens?: number; - }; -} +export type SynthesisErrorCode = + | "CONFIGURATION_ERROR" + | "BAD_REQUEST" + | "UNAUTHORIZED" + | "RATE_LIMITED" + | "UPSTREAM_ERROR" + | "TIMEOUT" + | "NETWORK_ERROR" + | "INVALID_RESPONSE"; -const SYSTEM_PROMPT = [ - "You are Queryx synthesis for agent consumers.", - "Return STRICT JSON only with shape:", - '{"answer":"string","confidence":0.0}', - "Keep the answer concise, factual, and directly useful for downstream automation.", - "Confidence must be a number between 0 and 1 based on evidence quality and source agreement.", -].join(" "); - -export class SynthError extends Error { - status?: number; - cause?: unknown; - - constructor(message: string, status?: number, cause?: unknown) { +export class SynthesisError extends Error { + readonly status: number; + readonly code: SynthesisErrorCode; + readonly details?: unknown; + + constructor( + message: string, + options: { status: number; code: SynthesisErrorCode; details?: unknown }, + ) { super(message); - this.name = "SynthError"; - this.status = status; - this.cause = cause; + this.name = "SynthesisError"; + this.status = options.status; + this.code = options.code; + this.details = options.details; } } -export function clampConfidence(value: number): number { - if (!Number.isFinite(value)) return 0; - if (value < 0) return 0; - if (value > 1) return 1; - return value; -} +const DEFAULT_MODEL = "gpt-4o-mini"; -export function computeConfidence(query: string, results: SearchResult[], answer: string): number { - const evidenceDepth = Math.min(results.length / 8, 1); - const sourceDiversity = Math.min(new Set(results.map((r) => r.domain)).size / 5, 1); - const answerLength = Math.min(answer.trim().split(/\s+/).filter(Boolean).length / 80, 1); +const SYSTEM_PROMPT = [ + "You are a synthesis engine for downstream agents.", + "Return strict JSON only with this schema:", + '{"answer":"string","confidence":0..1}', + "Rules:", + "- concise, factual, no fluff", + "- mention uncertainty briefly when evidence is weak", + "- confidence must reflect source support and consistency", +].join("\n"); - const uncertaintyPenalty = - /unclear|unknown|insufficient|not enough|cannot determine/i.test(answer) ? 0.2 : 0; +function clamp01(value: number): number { + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.min(1, value)); +} - const score = 0.5 * evidenceDepth + 0.3 * sourceDiversity + 0.2 * answerLength - uncertaintyPenalty; - void query; - return clampConfidence(score); +function normalizeText(value: string): string { + return value.replace(/\s+/g, " ").trim(); } -function estimateTokens(text: string): number { +function estimateTokenCount(text: string): number { const cleaned = text.trim(); if (!cleaned) return 0; return Math.max(1, Math.ceil(cleaned.length / 4)); } -function buildUserPrompt(query: string, results: SearchResult[]): string { - const compact = results.slice(0, 8).map((r) => ({ - title: r.title, - url: r.url, - snippet: r.snippet, - domain: r.domain, - publishedAt: r.publishedAt, - })); - - return JSON.stringify({ - query, - results: compact, - instruction: "Synthesize an answer for agents. No markdown. JSON only.", - }); +function buildSourceBlock(results: SearchResult[]): string { + return results + .slice(0, 10) + .map((result, index) => { + return [ + `[${index + 1}] ${normalizeText(result.title)}`, + `URL: ${result.url}`, + `Domain: ${result.domain}`, + `Snippet: ${normalizeText(result.description || "")}`, + result.publishedAt ? `Published: ${result.publishedAt}` : "", + ] + .filter(Boolean) + .join("\n"); + }) + .join("\n\n"); +} + +function extractMessageContent(payload: unknown): string { + const content = (payload as { choices?: Array<{ message?: { content?: unknown } }> }) + ?.choices?.[0]?.message?.content; + + if (typeof content === "string") return content; + + if (Array.isArray(content)) { + return content + .map((part) => { + if (typeof part === "string") return part; + if (part && typeof part === "object" && "text" in part) { + const text = (part as { text?: unknown }).text; + return typeof text === "string" ? text : ""; + } + return ""; + }) + .join(""); + } + + return ""; } -function parseJsonObject(text: string): Record | null { - const trimmed = text.trim(); - if (!trimmed) return null; +function parseJsonObject(raw: string): Record { + const trimmed = raw.trim(); + if (!trimmed) return {}; try { - const direct = JSON.parse(trimmed); - if (direct && typeof direct === "object") return direct as Record; - return null; + return JSON.parse(trimmed) as Record; } catch { const match = trimmed.match(/\{[\s\S]*\}/); - if (!match) return null; + if (!match) return {}; try { - const parsed = JSON.parse(match[0]); - if (parsed && typeof parsed === "object") return parsed as Record; - return null; + return JSON.parse(match[0]) as Record; } catch { - return null; + return {}; } } } -async function readErrorMessage(res: Response): Promise { - const text = await res.text(); - if (!text) return `Synthesis API error (${res.status})`; +async function parseErrorDetails(response: Response): Promise { + const contentType = response.headers.get("content-type") ?? ""; try { - const parsed = JSON.parse(text) as { error?: { message?: string }; message?: string }; - return parsed.error?.message ?? parsed.message ?? text; + if (contentType.includes("application/json")) return await response.json(); + const text = await response.text(); + return text || undefined; } catch { - return text; + return undefined; } } -export async function synthesize(query: string, results: SearchResult[], options: SynthOptions = {}): Promise { - const normalizedQuery = query.trim(); - if (!normalizedQuery) { - return { - answer: "", - confidence: 0, - tokens: { in: 0, out: 0 }, - model: options.model ?? process.env.SYNTH_MODEL ?? "gpt-4o-mini", - }; - } - +export async function synthesizeAnswer( + query: string, + results: SearchResult[], + options: SynthesisOptions = {}, +): Promise { const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; if (!apiKey) { - throw new SynthError("Missing OPENAI_API_KEY"); + throw new SynthesisError("Missing OpenAI API key", { + status: 500, + code: "CONFIGURATION_ERROR", + }); + } + + const endpoint = options.endpoint ?? process.env.OPENAI_CHAT_COMPLETIONS_ENDPOINT; + if (!endpoint) { + throw new SynthesisError("Missing OpenAI completions endpoint", { + status: 500, + code: "CONFIGURATION_ERROR", + }); } - const model = options.model ?? process.env.SYNTH_MODEL ?? "gpt-4o-mini"; - const baseUrl = (options.baseUrl ?? process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1").replace(/\/+$/, ""); - const userPrompt = buildUserPrompt(normalizedQuery, results); + const model = options.model ?? DEFAULT_MODEL; + const timeoutMs = options.timeoutMs ?? 10_000; + const temperature = options.temperature ?? 0.2; + + const sourceBlock = buildSourceBlock(results); + const userPrompt = [ + `Query: ${normalizeText(query)}`, + "", + "Sources:", + sourceBlock || "(no sources)", + "", + "Return JSON only.", + ].join("\n"); + + const body = { + model, + temperature, + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: userPrompt }, + ], + }; + + const controller = new AbortController(); + const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs); + + if (options.signal) { + if (options.signal.aborted) controller.abort(); + options.signal.addEventListener("abort", () => controller.abort(), { once: true }); + } - let res: Response; try { - res = await fetch(`${baseUrl}/chat/completions`, { + const response = await fetch(endpoint, { method: "POST", - signal: options.signal, headers: { - "Content-Type": "application/json", + "content-type": "application/json", Authorization: `Bearer ${apiKey}`, }, - body: JSON.stringify({ - model, - temperature: 0.2, - response_format: { type: "json_object" }, - messages: [ - { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: userPrompt }, - ], - }), + body: JSON.stringify(body), + signal: controller.signal, }); - } catch (error) { - throw new SynthError("Network error while calling synthesis model", undefined, error); - } - if (!res.ok) { - const message = await readErrorMessage(res); - throw new SynthError(message, res.status); - } + if (!response.ok) { + const details = await parseErrorDetails(response); - let payload: ChatCompletionResponse; - try { - payload = (await res.json()) as ChatCompletionResponse; - } catch (error) { - throw new SynthError("Invalid JSON from synthesis model", res.status, error); - } + if (response.status === 400) { + throw new SynthesisError("Synthesis request rejected", { + status: 400, + code: "BAD_REQUEST", + details, + }); + } - const content = payload.choices?.[0]?.message?.content?.trim() ?? ""; - const parsed = parseJsonObject(content); + if (response.status === 401 || response.status === 403) { + throw new SynthesisError("Synthesis unauthorized", { + status: response.status, + code: "UNAUTHORIZED", + details, + }); + } - const answer = - typeof parsed?.answer === "string" - ? parsed.answer.trim() - : content; + if (response.status === 429) { + throw new SynthesisError("Synthesis rate limited", { + status: 429, + code: "RATE_LIMITED", + details, + }); + } - const parsedConfidence = - typeof parsed?.confidence === "number" ? parsed.confidence : undefined; + throw new SynthesisError("Synthesis upstream error", { + status: response.status, + code: "UPSTREAM_ERROR", + details, + }); + } - const confidence = clampConfidence( - parsedConfidence ?? computeConfidence(normalizedQuery, results, answer), - ); + let payload: unknown; + try { + payload = await response.json(); + } catch { + throw new SynthesisError("Invalid JSON from synthesis model", { + status: 502, + code: "INVALID_RESPONSE", + }); + } - const inTokens = payload.usage?.prompt_tokens ?? estimateTokens(`${SYSTEM_PROMPT}\n${userPrompt}`); - const outTokens = payload.usage?.completion_tokens ?? estimateTokens(answer); + const rawContent = extractMessageContent(payload); + const parsed = parseJsonObject(rawContent); - return { - answer, - confidence, - tokens: { - in: Math.max(0, Math.round(inTokens)), - out: Math.max(0, Math.round(outTokens)), - }, - model: payload.model ?? model, - }; + const answerRaw = + typeof parsed.answer === "string" && parsed.answer.trim() + ? parsed.answer + : "Insufficient evidence in retrieved sources."; + + const confidenceRaw = + typeof parsed.confidence === "number" + ? parsed.confidence + : Number.parseFloat(String(parsed.confidence ?? "0")); + + const confidence = clamp01(confidenceRaw); + + const usage = payload as { + usage?: { prompt_tokens?: number; completion_tokens?: number }; + model?: string; + }; + + const inTokens = + typeof usage.usage?.prompt_tokens === "number" + ? usage.usage.prompt_tokens + : estimateTokenCount(`${SYSTEM_PROMPT}\n${userPrompt}`); + + const outTokens = + typeof usage.usage?.completion_tokens === "number" + ? usage.usage.completion_tokens + : estimateTokenCount(answerRaw); + + return { + answer: normalizeText(answerRaw), + confidence, + tokens: { in: Math.max(0, inTokens), out: Math.max(0, outTokens) }, + model: usage.model ?? model, + }; + } catch (error) { + if (error instanceof SynthesisError) throw error; + + if (error instanceof Error && error.name === "AbortError") { + throw new SynthesisError("Synthesis request timed out", { + status: 504, + code: "TIMEOUT", + }); + } + + throw new SynthesisError("Synthesis network error", { + status: 502, + code: "NETWORK_ERROR", + details: error, + }); + } finally { + clearTimeout(timeoutHandle); + } } -export const synth = synthesize; -export default synthesize; \ No newline at end of file +export type { SearchResult }; \ No newline at end of file From 11a01ce4483c88b5f355b284bbe6f2c21c67d954 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:11:39 +0000 Subject: [PATCH 05/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/types.ts | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/logic/types.ts b/src/logic/types.ts index 595ab68..185d3d3 100644 --- a/src/logic/types.ts +++ b/src/logic/types.ts @@ -1,11 +1,23 @@ -export type SearchFreshness = "day" | "week" | "month"; +export type Freshness = "day" | "week" | "month"; export interface SearchResult { title: string; url: string; - snippet: string; + description: string; domain: string; + source: string; publishedAt?: string; score?: number; - source?: string; +} + +export interface TokenUsage { + in: number; + out: number; +} + +export interface SynthesisResult { + answer: string; + confidence: number; + tokens: TokenUsage; + model: string; } \ No newline at end of file From 86215456f77121dc592e989550a4d55fbcafbed0 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:17:24 +0000 Subject: [PATCH 06/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 394 ++++++++++++++++----------------------------- src/logic/cache.ts | 86 +++++----- src/logic/rank.ts | 207 +++++++++++++----------- src/logic/types.ts | 15 +- 4 files changed, 294 insertions(+), 408 deletions(-) diff --git a/src/logic/brave.ts b/src/logic/brave.ts index 2136e17..eac5b9d 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,244 +1,162 @@ -export type Freshness = "day" | "week" | "month"; - -export interface SearchResult { - title: string; - url: string; - description: string; - domain: string; - publishedAt?: string; - score?: number; - source: "brave"; -} - -export interface BraveSearchOptions { - apiKey?: string; - endpoint?: string; - freshness?: Freshness; - count?: number; - offset?: number; - country?: string; - language?: string; - safeSearch?: "off" | "moderate" | "strict"; - timeoutMs?: number; - signal?: AbortSignal; -} +import type { Freshness, SearchResult } from "./types"; -export type BraveErrorCode = - | "CONFIGURATION_ERROR" - | "BAD_REQUEST" - | "UNAUTHORIZED" - | "FORBIDDEN" - | "RATE_LIMITED" - | "UPSTREAM_ERROR" - | "TIMEOUT" - | "NETWORK_ERROR" - | "INVALID_RESPONSE"; +const DEFAULT_BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"; +const DEFAULT_TIMEOUT_MS = 10_000; -export class BraveApiError extends Error { - readonly status: number; - readonly code: BraveErrorCode; - readonly retryAfterSeconds?: number; - readonly details?: unknown; - - constructor( - message: string, - options: { - status: number; - code: BraveErrorCode; - retryAfterSeconds?: number; - details?: unknown; - }, - ) { - super(message); - this.name = "BraveApiError"; - this.status = options.status; - this.code = options.code; - this.retryAfterSeconds = options.retryAfterSeconds; - this.details = options.details; - } -} +const FRESHNESS_MAP: Record = { + day: "pd", + week: "pw", + month: "pm", +}; -interface BraveRawResult { +interface BraveWebResult { title?: string; url?: string; description?: string; - extra_snippets?: unknown; - page_age?: unknown; - age?: unknown; + age?: string; + page_age?: string; + extra_snippets?: string[]; + profile?: { + name?: string; + }; + score?: number; } -interface BraveRawResponse { +interface BraveSearchResponse { web?: { - results?: BraveRawResult[]; + results?: BraveWebResult[]; }; } -const FRESHNESS_MAP: Record = { - day: "pd", - week: "pw", - month: "pm", -}; - -function normalizeWhitespace(value: string): string { - return value.replace(/\s+/g, " ").trim(); +export interface BraveSearchParams { + freshness?: Freshness; + count?: number; + offset?: number; } -function extractDomain(inputUrl: string): string { - try { - return new URL(inputUrl).hostname.toLowerCase(); - } catch { - return ""; - } +export interface BraveClientOptions { + apiKey?: string; + endpoint?: string; + timeoutMs?: number; + fetchImpl?: typeof fetch; } -function parsePublishedAt(value: unknown): string | undefined { - if (!value) return undefined; +export class BraveApiError extends Error { + public readonly status: number; + public readonly body?: string; - if (typeof value === "string") { - const ms = Date.parse(value); - return Number.isNaN(ms) ? undefined : new Date(ms).toISOString(); + constructor(message: string, status: number, body?: string) { + super(message); + this.name = "BraveApiError"; + this.status = status; + this.body = body; } +} - if (typeof value === "number") { - const ms = value > 1e12 ? value : value * 1000; - return Number.isNaN(ms) ? undefined : new Date(ms).toISOString(); - } +export class BraveRateLimitError extends BraveApiError { + public readonly retryAfterSeconds?: number; - if (typeof value === "object") { - const maybeDate = (value as { date?: unknown }).date; - if (typeof maybeDate === "string") { - const ms = Date.parse(maybeDate); - return Number.isNaN(ms) ? undefined : new Date(ms).toISOString(); - } + constructor(message: string, retryAfterSeconds?: number, body?: string) { + super(message, 429, body); + this.name = "BraveRateLimitError"; + this.retryAfterSeconds = retryAfterSeconds; } - - return undefined; } -function parseDescription(raw: BraveRawResult): string { - if (typeof raw.description === "string" && raw.description.trim()) { - return normalizeWhitespace(raw.description); +function extractDomain(url: string): string { + try { + return new URL(url).hostname.replace(/^www\./i, "").toLowerCase(); + } catch { + return ""; } +} - if (Array.isArray(raw.extra_snippets)) { - const first = raw.extra_snippets.find((s) => typeof s === "string"); - if (typeof first === "string") { - return normalizeWhitespace(first); - } +function toIsoDate(value: unknown): string | undefined { + if (typeof value !== "string" || !value.trim()) { + return undefined; } - - return ""; + const parsed = new Date(value); + if (Number.isNaN(parsed.getTime())) { + return undefined; + } + return parsed.toISOString(); } -export function normalizeBraveResponse(payload: unknown): SearchResult[] { - const data = payload as BraveRawResponse; - const rawResults = data?.web?.results; - if (!Array.isArray(rawResults)) return []; - - const normalized: SearchResult[] = []; +function normalizeResult(result: BraveWebResult): SearchResult | null { + const title = (result.title ?? "").trim(); + const url = (result.url ?? "").trim(); - for (const item of rawResults) { - if (!item || typeof item.url !== "string") continue; + const description = + (result.description ?? "").trim() || + (result.extra_snippets ?? []).map((item) => item.trim()).filter(Boolean).join(" "); - const url = item.url.trim(); - const domain = extractDomain(url); - if (!url || !domain) continue; - - const title = - typeof item.title === "string" && item.title.trim() - ? normalizeWhitespace(item.title) - : "Untitled"; - - const description = parseDescription(item); - const publishedAt = parsePublishedAt(item.page_age ?? item.age); + if (!title || !url || !description) { + return null; + } - normalized.push({ - title, - url, - description, - domain, - publishedAt, - source: "brave", - }); + const domain = extractDomain(url); + if (!domain) { + return null; } - return normalized; + const source = (result.profile?.name ?? "").trim() || domain; + const publishedAt = toIsoDate(result.page_age ?? result.age); + + return { + title, + url, + description, + source, + domain, + publishedAt, + score: typeof result.score === "number" ? result.score : undefined, + }; } -async function parseResponseDetails(response: Response): Promise { - const contentType = response.headers.get("content-type") ?? ""; +async function safeReadText(response: Response): Promise { try { - if (contentType.includes("application/json")) { - return await response.json(); - } - const text = await response.text(); - return text || undefined; + return await response.text(); } catch { - return undefined; + return ""; } } -function buildQueryParams(query: string, options: BraveSearchOptions): URLSearchParams { - const params = new URLSearchParams(); - params.set("q", query); - - params.set("count", String(options.count ?? 10)); - params.set("offset", String(options.offset ?? 0)); - - if (options.country) params.set("country", options.country); - if (options.language) params.set("search_lang", options.language); - if (options.safeSearch) params.set("safesearch", options.safeSearch); - if (options.freshness) params.set("freshness", FRESHNESS_MAP[options.freshness]); - - return params; -} - -function buildRequestUrl(endpoint: string, params: URLSearchParams): string { - const separator = endpoint.includes("?") ? "&" : "?"; - return `${endpoint}${separator}${params.toString()}`; -} - export async function searchBrave( query: string, - options: BraveSearchOptions = {}, + params: BraveSearchParams = {}, + options: BraveClientOptions = {}, ): Promise { - const normalizedQuery = normalizeWhitespace(query); - if (!normalizedQuery) { - throw new BraveApiError("Query is required", { - status: 400, - code: "BAD_REQUEST", - }); + const trimmedQuery = query.trim(); + if (!trimmedQuery) { + return []; } const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; if (!apiKey) { - throw new BraveApiError("Missing Brave API key", { - status: 500, - code: "CONFIGURATION_ERROR", - }); + throw new BraveApiError("Missing BRAVE_API_KEY.", 500); } - const endpoint = options.endpoint ?? process.env.BRAVE_API_ENDPOINT; - if (!endpoint) { - throw new BraveApiError("Missing Brave API endpoint", { - status: 500, - code: "CONFIGURATION_ERROR", - }); - } + const endpoint = options.endpoint ?? process.env.BRAVE_API_ENDPOINT ?? DEFAULT_BRAVE_ENDPOINT; + const fetchImpl = options.fetchImpl ?? fetch; - const timeoutMs = options.timeoutMs ?? 8_000; - const controller = new AbortController(); - const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs); + const url = new URL(endpoint); + url.searchParams.set("q", trimmedQuery); - if (options.signal) { - if (options.signal.aborted) controller.abort(); - options.signal.addEventListener("abort", () => controller.abort(), { once: true }); + if (typeof params.count === "number" && params.count > 0) { + url.searchParams.set("count", String(Math.floor(params.count))); + } + if (typeof params.offset === "number" && params.offset >= 0) { + url.searchParams.set("offset", String(Math.floor(params.offset))); + } + if (params.freshness) { + url.searchParams.set("freshness", FRESHNESS_MAP[params.freshness]); } - const requestUrl = buildRequestUrl(endpoint, buildQueryParams(normalizedQuery, options)); + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), options.timeoutMs ?? DEFAULT_TIMEOUT_MS); try { - const response = await fetch(requestUrl, { + const response = await fetchImpl(url.toString(), { method: "GET", headers: { Accept: "application/json", @@ -247,83 +165,53 @@ export async function searchBrave( signal: controller.signal, }); - if (!response.ok) { - const details = await parseResponseDetails(response); - - if (response.status === 429) { - const retryAfterHeader = response.headers.get("retry-after"); - const retryAfterSeconds = retryAfterHeader - ? Number.parseInt(retryAfterHeader, 10) - : undefined; - - throw new BraveApiError("Brave API rate limit exceeded", { - status: 429, - code: "RATE_LIMITED", - retryAfterSeconds: Number.isFinite(retryAfterSeconds) - ? retryAfterSeconds - : undefined, - details, - }); - } + if (response.status === 429) { + const retryAfterHeader = response.headers.get("retry-after"); + const retryAfterSeconds = retryAfterHeader ? Number(retryAfterHeader) : undefined; + const body = await safeReadText(response); + throw new BraveRateLimitError( + "Brave API rate limit exceeded.", + Number.isFinite(retryAfterSeconds) ? retryAfterSeconds : undefined, + body, + ); + } - if (response.status === 400) { - throw new BraveApiError("Brave API rejected request", { - status: 400, - code: "BAD_REQUEST", - details, - }); - } + if (!response.ok) { + const body = await safeReadText(response); + throw new BraveApiError(`Brave API request failed with status ${response.status}.`, response.status, body); + } - if (response.status === 401) { - throw new BraveApiError("Brave API unauthorized", { - status: 401, - code: "UNAUTHORIZED", - details, - }); - } + const payload = (await response.json()) as BraveSearchResponse; + const rawResults = payload.web?.results ?? []; - if (response.status === 403) { - throw new BraveApiError("Brave API forbidden", { - status: 403, - code: "FORBIDDEN", - details, - }); + const normalized: SearchResult[] = []; + for (const rawResult of rawResults) { + const item = normalizeResult(rawResult); + if (item) { + normalized.push(item); } - - throw new BraveApiError("Brave API upstream error", { - status: response.status, - code: "UPSTREAM_ERROR", - details, - }); } - let payload: unknown; - try { - payload = await response.json(); - } catch { - throw new BraveApiError("Invalid JSON from Brave API", { - status: 502, - code: "INVALID_RESPONSE", - }); + return normalized; + } catch (error: unknown) { + if (error instanceof BraveApiError) { + throw error; } - return normalizeBraveResponse(payload); - } catch (error) { - if (error instanceof BraveApiError) throw error; - if (error instanceof Error && error.name === "AbortError") { - throw new BraveApiError("Brave API request timed out", { - status: 504, - code: "TIMEOUT", - }); + throw new BraveApiError("Brave API request timed out.", 408); } - throw new BraveApiError("Brave API network error", { - status: 502, - code: "NETWORK_ERROR", - details: error, - }); + throw new BraveApiError( + `Brave API request failed: ${error instanceof Error ? error.message : String(error)}`, + 500, + ); } finally { - clearTimeout(timeoutHandle); + clearTimeout(timeoutId); } -} \ No newline at end of file +} + +export const getBraveSearchResults = searchBrave; + +export type { Freshness, SearchResult }; +export default searchBrave; \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index f2d57b7..10ffd2c 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -3,7 +3,6 @@ import { createHash } from "node:crypto"; export interface CacheStats { hits: number; misses: number; - requests: number; hitRate: number; size: number; ttlSeconds: number; @@ -11,6 +10,7 @@ export interface CacheStats { export interface CacheOptions { ttlSeconds?: number; + now?: () => number; } interface CacheEntry { @@ -18,7 +18,10 @@ interface CacheEntry { expiresAt: number; } -const DEFAULT_TTL_SECONDS = 300; +const DEFAULT_TTL_SECONDS = (() => { + const raw = Number(process.env.CACHE_TTL_SECONDS ?? 300); + return Number.isFinite(raw) && raw > 0 ? raw : 300; +})(); export function normalizeQuery(query: string): string { return query.trim().toLowerCase().replace(/\s+/g, " "); @@ -28,47 +31,25 @@ export function hashQuery(normalizedQuery: string): string { return createHash("sha256").update(normalizedQuery).digest("hex"); } -export function makeCacheKey(query: string): string { - return hashQuery(normalizeQuery(query)); -} - -export function getDefaultCacheTtlSeconds(): number { - const raw = process.env.CACHE_TTL_SECONDS; - const parsed = raw ? Number(raw) : NaN; - if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_TTL_SECONDS; - return parsed; -} - -function sanitizeTtl(ttlSeconds: number): number { - if (!Number.isFinite(ttlSeconds) || ttlSeconds <= 0) return DEFAULT_TTL_SECONDS; - return ttlSeconds; -} - -export class QueryCache { +export class InMemoryCache { private readonly store = new Map>(); + private readonly now: () => number; + private readonly ttlSeconds: number; private hits = 0; private misses = 0; - private readonly ttlSeconds: number; constructor(options: CacheOptions = {}) { - this.ttlSeconds = sanitizeTtl(options.ttlSeconds ?? getDefaultCacheTtlSeconds()); + this.now = options.now ?? (() => Date.now()); + this.ttlSeconds = options.ttlSeconds ?? DEFAULT_TTL_SECONDS; } - private pruneExpired(): void { - const now = Date.now(); - for (const [key, entry] of this.store.entries()) { - if (entry.expiresAt <= now) { - this.store.delete(key); - } - } + keyForQuery(query: string): string { + return hashQuery(normalizeQuery(query)); } get(query: string): T | undefined { - return this.getByKey(makeCacheKey(query)); - } - - getByKey(key: string): T | undefined { - const now = Date.now(); + this.purgeExpired(); + const key = this.keyForQuery(query); const entry = this.store.get(key); if (!entry) { @@ -76,7 +57,7 @@ export class QueryCache { return undefined; } - if (entry.expiresAt <= now) { + if (entry.expiresAt <= this.now()) { this.store.delete(key); this.misses += 1; return undefined; @@ -86,26 +67,25 @@ export class QueryCache { return entry.value; } - set(query: string, value: T, ttlSeconds?: number): string { - const key = makeCacheKey(query); - this.setByKey(key, value, ttlSeconds); - return key; - } + set(query: string, value: T, ttlSeconds = this.ttlSeconds): string { + if (!Number.isFinite(ttlSeconds) || ttlSeconds <= 0) { + throw new Error("Cache TTL must be a positive number."); + } - setByKey(key: string, value: T, ttlSeconds?: number): void { - const effectiveTtl = sanitizeTtl(ttlSeconds ?? this.ttlSeconds); - const expiresAt = Date.now() + effectiveTtl * 1000; + const key = this.keyForQuery(query); + const expiresAt = this.now() + ttlSeconds * 1000; this.store.set(key, { value, expiresAt }); + return key; } stats(): CacheStats { - this.pruneExpired(); - const requests = this.hits + this.misses; + this.purgeExpired(); + const total = this.hits + this.misses; + return { hits: this.hits, misses: this.misses, - requests, - hitRate: requests === 0 ? 0 : this.hits / requests, + hitRate: total === 0 ? 0 : this.hits / total, size: this.store.size, ttlSeconds: this.ttlSeconds, }; @@ -116,6 +96,18 @@ export class QueryCache { this.hits = 0; this.misses = 0; } + + private purgeExpired(): void { + const now = this.now(); + for (const [key, entry] of this.store.entries()) { + if (entry.expiresAt <= now) { + this.store.delete(key); + } + } + } } -export const cache = new QueryCache(); \ No newline at end of file +export const cache = new InMemoryCache(); +export const queryCache = cache; + +export default cache; \ No newline at end of file diff --git a/src/logic/rank.ts b/src/logic/rank.ts index 8ab5c0f..f395caa 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,145 +1,162 @@ -import { SearchResult } from "./types"; +import type { SearchResult } from "./types"; + +const TRACKING_QUERY_PARAMS = new Set([ + "utm_source", + "utm_medium", + "utm_campaign", + "utm_term", + "utm_content", + "gclid", + "fbclid", + "mc_cid", + "mc_eid", + "ref", + "source", +]); export interface RankOptions { maxPerDomain?: number; - minQualityScore?: number; + minDescriptionLength?: number; now?: Date; } -export interface RankedSearchResult extends SearchResult { - qualityScore: number; - recencyScore: number; - rankScore: number; -} - -function clamp(value: number): number { - if (!Number.isFinite(value)) return 0; - if (value < 0) return 0; - if (value > 1) return 1; - return value; -} +export type RankedSearchResult = SearchResult & { rankScore: number }; function extractDomain(url: string): string { try { - const hostname = new URL(url).hostname.toLowerCase(); - return hostname.startsWith("www.") ? hostname.slice(4) : hostname; + return new URL(url).hostname.replace(/^www\./i, "").toLowerCase(); } catch { return ""; } } -function canonicalizeUrl(url: string): string { +export function canonicalizeUrl(input: string): string { try { - const parsed = new URL(url); - parsed.hash = ""; - for (const key of [...parsed.searchParams.keys()]) { - if (key.toLowerCase().startsWith("utm_")) { - parsed.searchParams.delete(key); + const url = new URL(input); + url.hash = ""; + + for (const key of [...url.searchParams.keys()]) { + if (TRACKING_QUERY_PARAMS.has(key.toLowerCase())) { + url.searchParams.delete(key); } } - const pathname = parsed.pathname.replace(/\/+$/, ""); - const query = parsed.searchParams.toString(); - return `${parsed.origin}${pathname}${query ? `?${query}` : ""}`; + + const trimmedPath = url.pathname.replace(/\/+$/, ""); + url.pathname = trimmedPath || "/"; + return url.toString(); } catch { - return url.trim(); + return input.trim(); } } -export function qualityScore(result: SearchResult): number { - let score = 0; - - const titleLength = result.title.trim().length; - const snippetLength = result.snippet.trim().length; - - if (titleLength >= 12) score += 0.35; - else if (titleLength >= 6) score += 0.2; - - if (snippetLength >= 80) score += 0.35; - else if (snippetLength >= 30) score += 0.2; - - if (/^https?:\/\//i.test(result.url)) score += 0.15; - if (result.domain.includes(".")) score += 0.1; +export function recencyBoost(publishedAt: string | undefined, now: Date = new Date()): number { + if (!publishedAt) { + return 0; + } - if (/(login|signup|privacy|terms|cookie)/i.test(result.url)) { - score -= 0.2; + const date = new Date(publishedAt); + if (Number.isNaN(date.getTime())) { + return 0; } - if (typeof result.score === "number") { - score += 0.05 * clamp(result.score); + const ageMs = now.getTime() - date.getTime(); + if (ageMs < 0) { + return 0.2; } - return clamp(score); -} + const ageDays = ageMs / (24 * 60 * 60 * 1000); -export function recencyScore(publishedAt: string | undefined, now = new Date()): number { - if (!publishedAt) return 0.3; + if (ageDays <= 1) return 0.25; + if (ageDays <= 7) return 0.16; + if (ageDays <= 30) return 0.08; + if (ageDays <= 90) return 0.03; + return 0; +} - const ts = new Date(publishedAt).getTime(); - if (Number.isNaN(ts)) return 0.3; +export function isLowQualityResult(result: SearchResult, minDescriptionLength = 40): boolean { + if (!result.title?.trim()) return true; + if (!result.url?.trim()) return true; + if (!result.description?.trim()) return true; + if (result.title.trim().length < 4) return true; + if (result.description.trim().length < minDescriptionLength) return true; + if (!/^https?:\/\//i.test(result.url.trim())) return true; + if (!extractDomain(result.url)) return true; + return false; +} - const ageDays = (now.getTime() - ts) / 86_400_000; - if (ageDays <= 0) return 1; +function qualityScore(result: SearchResult, now: Date): number { + const base = typeof result.score === "number" ? result.score : 0.5; + const descriptionBoost = Math.min(0.2, result.description.trim().length / 500); + const titleBoost = Math.min(0.1, result.title.trim().length / 120); + const httpsBoost = result.url.startsWith("https://") ? 0.03 : 0; + return base + descriptionBoost + titleBoost + httpsBoost + recencyBoost(result.publishedAt, now); +} - return clamp(Math.exp(-ageDays / 30)); +function normalizeResult(result: SearchResult): SearchResult { + const domain = result.domain?.trim().toLowerCase() || extractDomain(result.url); + const source = result.source?.trim() || domain; + return { + ...result, + domain, + source, + }; } export function rankResults(results: SearchResult[], options: RankOptions = {}): RankedSearchResult[] { const maxPerDomain = options.maxPerDomain ?? 2; - const minQualityScore = options.minQualityScore ?? 0.35; + const minDescriptionLength = options.minDescriptionLength ?? 40; const now = options.now ?? new Date(); - const seenUrls = new Set(); - const candidates: RankedSearchResult[] = []; - - for (const input of results) { - if (!input?.title || !input?.url) continue; + const deduped = new Map(); - const normalizedUrl = canonicalizeUrl(input.url); - if (!normalizedUrl || seenUrls.has(normalizedUrl)) continue; - seenUrls.add(normalizedUrl); - - const domain = (input.domain || extractDomain(input.url)).toLowerCase(); - if (!domain) continue; - - const materialized: SearchResult = { - ...input, - domain, - url: normalizedUrl, - }; + for (const raw of results) { + const result = normalizeResult(raw); + if (isLowQualityResult(result, minDescriptionLength)) { + continue; + } - const qScore = qualityScore(materialized); - if (qScore < minQualityScore) continue; + const canonicalUrl = canonicalizeUrl(result.url); + const existing = deduped.get(canonicalUrl); - const rScore = recencyScore(materialized.publishedAt, now); - const sourceScore = typeof materialized.score === "number" ? clamp(materialized.score) : 0.5; - const rankScore = clamp(0.6 * qScore + 0.25 * rScore + 0.15 * sourceScore); + if (!existing) { + deduped.set(canonicalUrl, result); + continue; + } - candidates.push({ - ...materialized, - qualityScore: qScore, - recencyScore: rScore, - rankScore, - }); + if (qualityScore(result, now) > qualityScore(existing, now)) { + deduped.set(canonicalUrl, result); + } } - candidates.sort((a, b) => { - if (b.rankScore !== a.rankScore) return b.rankScore - a.rankScore; - return b.recencyScore - a.recencyScore; - }); + const ranked = [...deduped.values()] + .map((result) => ({ + ...result, + rankScore: qualityScore(result, now), + })) + .sort((a, b) => b.rankScore - a.rankScore); + + const domainCounts = new Map(); + const limited: RankedSearchResult[] = []; - const perDomainCount = new Map(); - const output: RankedSearchResult[] = []; + for (const result of ranked) { + const domain = result.domain || extractDomain(result.url); + if (!domain) { + continue; + } - for (const candidate of candidates) { - const used = perDomainCount.get(candidate.domain) ?? 0; - if (used >= maxPerDomain) continue; + const count = domainCounts.get(domain) ?? 0; + if (count >= maxPerDomain) { + continue; + } - perDomainCount.set(candidate.domain, used + 1); - output.push(candidate); + domainCounts.set(domain, count + 1); + limited.push(result); } - return output; + return limited; } -export const rank = rankResults; +export const rankAndDedupResults = rankResults; + +export type { SearchResult }; export default rankResults; \ No newline at end of file diff --git a/src/logic/types.ts b/src/logic/types.ts index 185d3d3..e191a36 100644 --- a/src/logic/types.ts +++ b/src/logic/types.ts @@ -4,20 +4,9 @@ export interface SearchResult { title: string; url: string; description: string; - domain: string; source: string; + domain: string; publishedAt?: string; + favicon?: string; score?: number; -} - -export interface TokenUsage { - in: number; - out: number; -} - -export interface SynthesisResult { - answer: string; - confidence: number; - tokens: TokenUsage; - model: string; } \ No newline at end of file From 61074ab1ed96934aab73360d9fe1674a72aca895 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:23:04 +0000 Subject: [PATCH 07/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 313 +++++++++++++++++--------------- src/logic/cache.ts | 106 ++++++----- src/logic/synth.ts | 435 ++++++++++++++++++++++----------------------- 3 files changed, 435 insertions(+), 419 deletions(-) diff --git a/src/logic/brave.ts b/src/logic/brave.ts index eac5b9d..9d183fb 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,217 +1,236 @@ -import type { Freshness, SearchResult } from "./types"; - -const DEFAULT_BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"; -const DEFAULT_TIMEOUT_MS = 10_000; - -const FRESHNESS_MAP: Record = { - day: "pd", - week: "pw", - month: "pm", -}; - -interface BraveWebResult { - title?: string; - url?: string; - description?: string; - age?: string; - page_age?: string; - extra_snippets?: string[]; - profile?: { - name?: string; - }; +export type Freshness = "day" | "week" | "month"; + +export interface SearchResult { + title: string; + url: string; + description: string; + sourceDomain: string; + publishedAt?: string; score?: number; } -interface BraveSearchResponse { - web?: { - results?: BraveWebResult[]; - }; -} - -export interface BraveSearchParams { - freshness?: Freshness; - count?: number; - offset?: number; -} - -export interface BraveClientOptions { +export interface BraveSearchOptions { apiKey?: string; endpoint?: string; - timeoutMs?: number; fetchImpl?: typeof fetch; + signal?: AbortSignal; + count?: number; + offset?: number; + country?: string; + language?: string; + freshness?: Freshness; } -export class BraveApiError extends Error { +type JsonRecord = Record; + +const FRESHNESS_PARAM_MAP: Record = { + day: "pd", + week: "pw", + month: "pm", +}; + +export class BraveAPIError extends Error { public readonly status: number; - public readonly body?: string; + public readonly details?: unknown; - constructor(message: string, status: number, body?: string) { + constructor(message: string, status: number, details?: unknown) { super(message); - this.name = "BraveApiError"; + this.name = "BraveAPIError"; this.status = status; - this.body = body; + this.details = details; } } -export class BraveRateLimitError extends BraveApiError { +export class BraveRateLimitError extends BraveAPIError { public readonly retryAfterSeconds?: number; - constructor(message: string, retryAfterSeconds?: number, body?: string) { - super(message, 429, body); + constructor(message: string, retryAfterSeconds?: number, details?: unknown) { + super(message, 429, details); this.name = "BraveRateLimitError"; this.retryAfterSeconds = retryAfterSeconds; } } -function extractDomain(url: string): string { - try { - return new URL(url).hostname.replace(/^www\./i, "").toLowerCase(); - } catch { - return ""; +function isRecord(value: unknown): value is JsonRecord { + return typeof value === "object" && value !== null; +} + +function parsePublishedAt(raw: unknown): string | undefined { + if (typeof raw === "number" && Number.isFinite(raw)) { + const milliseconds = raw > 1_000_000_000_000 ? raw : raw * 1000; + const date = new Date(milliseconds); + return Number.isNaN(date.getTime()) ? undefined : date.toISOString(); + } + + if (typeof raw === "string" && raw.trim().length > 0) { + const date = new Date(raw); + return Number.isNaN(date.getTime()) ? undefined : date.toISOString(); } + + return undefined; } -function toIsoDate(value: unknown): string | undefined { - if (typeof value !== "string" || !value.trim()) { +function getDomainFromUrl(rawUrl: string): string | undefined { + try { + const parsed = new URL(rawUrl); + return parsed.hostname.replace(/^www\./i, "").toLowerCase(); + } catch { return undefined; } - const parsed = new Date(value); - if (Number.isNaN(parsed.getTime())) { +} + +function clamp01(value: number): number { + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.min(1, value)); +} + +async function parseErrorBody(response: Response): Promise { + const contentType = response.headers.get("content-type") ?? ""; + try { + if (contentType.includes("application/json")) { + return await response.json(); + } + return await response.text(); + } catch { return undefined; } - return parsed.toISOString(); } -function normalizeResult(result: BraveWebResult): SearchResult | null { - const title = (result.title ?? "").trim(); - const url = (result.url ?? "").trim(); +export function mapFreshnessParam(freshness: Freshness): string { + return FRESHNESS_PARAM_MAP[freshness]; +} - const description = - (result.description ?? "").trim() || - (result.extra_snippets ?? []).map((item) => item.trim()).filter(Boolean).join(" "); +export function normalizeBraveResults(payload: unknown): SearchResult[] { + if (!isRecord(payload)) return []; - if (!title || !url || !description) { - return null; - } + const web = payload.web; + if (!isRecord(web)) return []; - const domain = extractDomain(url); - if (!domain) { - return null; - } + const results = web.results; + if (!Array.isArray(results)) return []; - const source = (result.profile?.name ?? "").trim() || domain; - const publishedAt = toIsoDate(result.page_age ?? result.age); + const normalized: SearchResult[] = []; - return { - title, - url, - description, - source, - domain, - publishedAt, - score: typeof result.score === "number" ? result.score : undefined, - }; -} + for (const item of results) { + if (!isRecord(item)) continue; -async function safeReadText(response: Response): Promise { - try { - return await response.text(); - } catch { - return ""; + const url = typeof item.url === "string" ? item.url.trim() : ""; + const domain = getDomainFromUrl(url); + if (!domain) continue; + + const title = typeof item.title === "string" ? item.title.trim() : ""; + if (!title) continue; + + const description = + typeof item.description === "string" ? item.description.trim() : ""; + + const publishedAt = + parsePublishedAt(item.page_age) ?? + parsePublishedAt(item.published) ?? + parsePublishedAt(item.date) ?? + parsePublishedAt(item.age); + + const scoreRaw = + typeof item.score === "number" ? item.score : Number(item.score); + const score = Number.isFinite(scoreRaw) ? clamp01(scoreRaw) : undefined; + + normalized.push({ + title, + url, + description, + sourceDomain: domain, + ...(publishedAt ? { publishedAt } : {}), + ...(score !== undefined ? { score } : {}), + }); } + + return normalized; } export async function searchBrave( query: string, - params: BraveSearchParams = {}, - options: BraveClientOptions = {}, + options: BraveSearchOptions = {}, ): Promise { - const trimmedQuery = query.trim(); - if (!trimmedQuery) { - return []; - } + const q = query.trim(); + if (!q) return []; const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; if (!apiKey) { - throw new BraveApiError("Missing BRAVE_API_KEY.", 500); + throw new BraveAPIError("Missing BRAVE_API_KEY", 0); } - const endpoint = options.endpoint ?? process.env.BRAVE_API_ENDPOINT ?? DEFAULT_BRAVE_ENDPOINT; - const fetchImpl = options.fetchImpl ?? fetch; + const endpoint = options.endpoint ?? process.env.BRAVE_API_URL; + if (!endpoint) { + throw new BraveAPIError("Missing BRAVE_API_URL", 0); + } - const url = new URL(endpoint); - url.searchParams.set("q", trimmedQuery); + const params = new URLSearchParams(); + params.set("q", q); + params.set("source", "web"); + params.set("count", String(options.count ?? 10)); - if (typeof params.count === "number" && params.count > 0) { - url.searchParams.set("count", String(Math.floor(params.count))); + if (typeof options.offset === "number" && options.offset >= 0) { + params.set("offset", String(options.offset)); } - if (typeof params.offset === "number" && params.offset >= 0) { - url.searchParams.set("offset", String(Math.floor(params.offset))); + if (options.country) { + params.set("country", options.country); } - if (params.freshness) { - url.searchParams.set("freshness", FRESHNESS_MAP[params.freshness]); + if (options.language) { + params.set("search_lang", options.language); + } + if (options.freshness) { + params.set("freshness", mapFreshnessParam(options.freshness)); } - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), options.timeoutMs ?? DEFAULT_TIMEOUT_MS); + const target = `${endpoint}?${params.toString()}`; + const fetchImpl = options.fetchImpl ?? fetch; + let response: Response; try { - const response = await fetchImpl(url.toString(), { + response = await fetchImpl(target, { method: "GET", + signal: options.signal, headers: { Accept: "application/json", "X-Subscription-Token": apiKey, }, - signal: controller.signal, }); + } catch (error) { + throw new BraveAPIError("Failed to reach Brave Search API", 0, error); + } - if (response.status === 429) { - const retryAfterHeader = response.headers.get("retry-after"); - const retryAfterSeconds = retryAfterHeader ? Number(retryAfterHeader) : undefined; - const body = await safeReadText(response); - throw new BraveRateLimitError( - "Brave API rate limit exceeded.", - Number.isFinite(retryAfterSeconds) ? retryAfterSeconds : undefined, - body, - ); - } - - if (!response.ok) { - const body = await safeReadText(response); - throw new BraveApiError(`Brave API request failed with status ${response.status}.`, response.status, body); - } - - const payload = (await response.json()) as BraveSearchResponse; - const rawResults = payload.web?.results ?? []; - - const normalized: SearchResult[] = []; - for (const rawResult of rawResults) { - const item = normalizeResult(rawResult); - if (item) { - normalized.push(item); - } - } - - return normalized; - } catch (error: unknown) { - if (error instanceof BraveApiError) { - throw error; - } + if (response.status === 429) { + const retryAfterHeader = response.headers.get("retry-after"); + const retryAfterSeconds = retryAfterHeader + ? Number(retryAfterHeader) + : undefined; + throw new BraveRateLimitError( + "Brave Search API rate limit exceeded", + Number.isFinite(retryAfterSeconds) ? retryAfterSeconds : undefined, + await parseErrorBody(response), + ); + } - if (error instanceof Error && error.name === "AbortError") { - throw new BraveApiError("Brave API request timed out.", 408); - } + if (!response.ok) { + throw new BraveAPIError( + `Brave Search API request failed with status ${response.status}`, + response.status, + await parseErrorBody(response), + ); + } - throw new BraveApiError( - `Brave API request failed: ${error instanceof Error ? error.message : String(error)}`, - 500, + let jsonPayload: unknown; + try { + jsonPayload = await response.json(); + } catch (error) { + throw new BraveAPIError( + "Brave Search API returned invalid JSON", + response.status, + error, ); - } finally { - clearTimeout(timeoutId); } -} -export const getBraveSearchResults = searchBrave; + return normalizeBraveResults(jsonPayload); +} -export type { Freshness, SearchResult }; export default searchBrave; \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index 10ffd2c..7839c84 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -8,56 +8,56 @@ export interface CacheStats { ttlSeconds: number; } -export interface CacheOptions { - ttlSeconds?: number; - now?: () => number; -} - interface CacheEntry { value: T; expiresAt: number; } -const DEFAULT_TTL_SECONDS = (() => { - const raw = Number(process.env.CACHE_TTL_SECONDS ?? 300); - return Number.isFinite(raw) && raw > 0 ? raw : 300; -})(); +function parseTtlSeconds(raw: string | undefined, fallback: number): number { + const n = Number(raw); + return Number.isFinite(n) && n > 0 ? n : fallback; +} + +export const DEFAULT_CACHE_TTL_SECONDS = parseTtlSeconds( + process.env.CACHE_TTL_SECONDS, + 300, +); export function normalizeQuery(query: string): string { return query.trim().toLowerCase().replace(/\s+/g, " "); } -export function hashQuery(normalizedQuery: string): string { - return createHash("sha256").update(normalizedQuery).digest("hex"); +export function getCacheKey(query: string): string { + return createHash("sha256").update(normalizeQuery(query)).digest("hex"); } export class InMemoryCache { private readonly store = new Map>(); - private readonly now: () => number; - private readonly ttlSeconds: number; private hits = 0; private misses = 0; + private readonly defaultTtlSeconds: number; - constructor(options: CacheOptions = {}) { - this.now = options.now ?? (() => Date.now()); - this.ttlSeconds = options.ttlSeconds ?? DEFAULT_TTL_SECONDS; + constructor(defaultTtlSeconds: number = DEFAULT_CACHE_TTL_SECONDS) { + this.defaultTtlSeconds = defaultTtlSeconds > 0 ? defaultTtlSeconds : 300; } - keyForQuery(query: string): string { - return hashQuery(normalizeQuery(query)); + private purgeExpired(): void { + const now = Date.now(); + for (const [key, entry] of this.store.entries()) { + if (entry.expiresAt <= now) { + this.store.delete(key); + } + } } - get(query: string): T | undefined { - this.purgeExpired(); - const key = this.keyForQuery(query); + get(key: string): T | undefined { const entry = this.store.get(key); - if (!entry) { this.misses += 1; return undefined; } - if (entry.expiresAt <= this.now()) { + if (entry.expiresAt <= Date.now()) { this.store.delete(key); this.misses += 1; return undefined; @@ -67,47 +67,55 @@ export class InMemoryCache { return entry.value; } - set(query: string, value: T, ttlSeconds = this.ttlSeconds): string { - if (!Number.isFinite(ttlSeconds) || ttlSeconds <= 0) { - throw new Error("Cache TTL must be a positive number."); - } - - const key = this.keyForQuery(query); - const expiresAt = this.now() + ttlSeconds * 1000; + set(key: string, value: T, ttlSeconds: number = this.defaultTtlSeconds): void { + const safeTtl = Number.isFinite(ttlSeconds) && ttlSeconds > 0 ? ttlSeconds : 0; + const expiresAt = Date.now() + safeTtl * 1000; this.store.set(key, { value, expiresAt }); - return key; + } + + clear(): void { + this.store.clear(); + this.hits = 0; + this.misses = 0; } stats(): CacheStats { this.purgeExpired(); const total = this.hits + this.misses; - return { hits: this.hits, misses: this.misses, hitRate: total === 0 ? 0 : this.hits / total, size: this.store.size, - ttlSeconds: this.ttlSeconds, + ttlSeconds: this.defaultTtlSeconds, }; } +} - clear(): void { - this.store.clear(); - this.hits = 0; - this.misses = 0; - } +const cache = new InMemoryCache(DEFAULT_CACHE_TTL_SECONDS); - private purgeExpired(): void { - const now = this.now(); - for (const [key, entry] of this.store.entries()) { - if (entry.expiresAt <= now) { - this.store.delete(key); - } - } - } +export function get(key: string): T | undefined { + return cache.get(key) as T | undefined; } -export const cache = new InMemoryCache(); -export const queryCache = cache; +export function set(key: string, value: T, ttlSeconds?: number): void { + cache.set(key, value, ttlSeconds); +} + +export function stats(): CacheStats { + return cache.stats(); +} + +export function clear(): void { + cache.clear(); +} -export default cache; \ No newline at end of file +export default { + get, + set, + stats, + clear, + getCacheKey, + normalizeQuery, + InMemoryCache, +}; \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index 8c87b34..af57c49 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,142 +1,174 @@ import type { SearchResult } from "./brave"; -export interface SynthesisTokens { +export interface SynthTokens { in: number; out: number; } -export interface SynthesisResult { +export interface SynthResponse { answer: string; confidence: number; - tokens: SynthesisTokens; + tokens: SynthTokens; model: string; } -export interface SynthesisOptions { +export interface SynthOptions { apiKey?: string; endpoint?: string; model?: string; - timeoutMs?: number; temperature?: number; + fetchImpl?: typeof fetch; signal?: AbortSignal; } -export type SynthesisErrorCode = - | "CONFIGURATION_ERROR" - | "BAD_REQUEST" - | "UNAUTHORIZED" - | "RATE_LIMITED" - | "UPSTREAM_ERROR" - | "TIMEOUT" - | "NETWORK_ERROR" - | "INVALID_RESPONSE"; - -export class SynthesisError extends Error { - readonly status: number; - readonly code: SynthesisErrorCode; - readonly details?: unknown; - - constructor( - message: string, - options: { status: number; code: SynthesisErrorCode; details?: unknown }, - ) { +export class SynthAPIError extends Error { + public readonly status: number; + public readonly details?: unknown; + + constructor(message: string, status: number, details?: unknown) { super(message); - this.name = "SynthesisError"; - this.status = options.status; - this.code = options.code; - this.details = options.details; + this.name = "SynthAPIError"; + this.status = status; + this.details = details; } } +type JsonRecord = Record; + const DEFAULT_MODEL = "gpt-4o-mini"; const SYSTEM_PROMPT = [ - "You are a synthesis engine for downstream agents.", - "Return strict JSON only with this schema:", - '{"answer":"string","confidence":0..1}', + "You are Queryx synthesis engine.", + "Return strictly valid JSON only.", + 'Schema: {"answer": string, "confidence": number}', "Rules:", - "- concise, factual, no fluff", - "- mention uncertainty briefly when evidence is weak", - "- confidence must reflect source support and consistency", + "- concise answer, 2-6 sentences", + "- no markdown", + "- confidence in [0,1]", + "- if sources conflict, acknowledge uncertainty briefly", ].join("\n"); -function clamp01(value: number): number { - if (!Number.isFinite(value)) return 0; - return Math.max(0, Math.min(1, value)); +function isRecord(value: unknown): value is JsonRecord { + return typeof value === "object" && value !== null; } -function normalizeText(value: string): string { - return value.replace(/\s+/g, " ").trim(); +export function clampConfidence(value: number): number { + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.min(1, value)); } -function estimateTokenCount(text: string): number { - const cleaned = text.trim(); - if (!cleaned) return 0; - return Math.max(1, Math.ceil(cleaned.length / 4)); +export function estimateTokenCount(text: string): number { + const trimmed = text.trim(); + if (!trimmed) return 0; + return Math.ceil(trimmed.length / 4); } -function buildSourceBlock(results: SearchResult[]): string { - return results - .slice(0, 10) - .map((result, index) => { - return [ - `[${index + 1}] ${normalizeText(result.title)}`, - `URL: ${result.url}`, - `Domain: ${result.domain}`, - `Snippet: ${normalizeText(result.description || "")}`, - result.publishedAt ? `Published: ${result.publishedAt}` : "", - ] - .filter(Boolean) - .join("\n"); - }) - .join("\n\n"); -} +function extractMessageContent(raw: unknown): string { + if (typeof raw === "string") return raw; -function extractMessageContent(payload: unknown): string { - const content = (payload as { choices?: Array<{ message?: { content?: unknown } }> }) - ?.choices?.[0]?.message?.content; - - if (typeof content === "string") return content; - - if (Array.isArray(content)) { - return content - .map((part) => { - if (typeof part === "string") return part; - if (part && typeof part === "object" && "text" in part) { - const text = (part as { text?: unknown }).text; - return typeof text === "string" ? text : ""; - } - return ""; - }) - .join(""); + if (Array.isArray(raw)) { + const parts: string[] = []; + for (const item of raw) { + if (!isRecord(item)) continue; + const text = item.text; + if (typeof text === "string") parts.push(text); + } + return parts.join("\n").trim(); } return ""; } -function parseJsonObject(raw: string): Record { - const trimmed = raw.trim(); - if (!trimmed) return {}; +function tryParseJsonObject(raw: string): JsonRecord | undefined { + const text = raw.trim(); + if (!text) return undefined; try { - return JSON.parse(trimmed) as Record; + const parsed = JSON.parse(text); + return isRecord(parsed) ? parsed : undefined; } catch { - const match = trimmed.match(/\{[\s\S]*\}/); - if (!match) return {}; + const start = text.indexOf("{"); + const end = text.lastIndexOf("}"); + if (start < 0 || end <= start) return undefined; try { - return JSON.parse(match[0]) as Record; + const parsed = JSON.parse(text.slice(start, end + 1)); + return isRecord(parsed) ? parsed : undefined; } catch { - return {}; + return undefined; } } } -async function parseErrorDetails(response: Response): Promise { +function parseUsageTokens(payload: unknown): SynthTokens | undefined { + if (!isRecord(payload)) return undefined; + const usage = payload.usage; + if (!isRecord(usage)) return undefined; + + const promptTokens = + typeof usage.prompt_tokens === "number" + ? usage.prompt_tokens + : Number(usage.prompt_tokens); + const completionTokens = + typeof usage.completion_tokens === "number" + ? usage.completion_tokens + : Number(usage.completion_tokens); + + if (!Number.isFinite(promptTokens) || !Number.isFinite(completionTokens)) { + return undefined; + } + + return { + in: Math.max(0, Math.round(promptTokens)), + out: Math.max(0, Math.round(completionTokens)), + }; +} + +export function computeConfidence( + _query: string, + results: SearchResult[], + answer: string, +): number { + if (results.length === 0) return 0.1; + + const uniqueDomains = new Set( + results.map((r) => r.sourceDomain).filter(Boolean), + ).size; + + let confidence = 0.25; + confidence += Math.min(results.length, 8) * 0.06; + confidence += Math.min(uniqueDomains, 5) * 0.035; + + const hasRecentSource = results.some((r) => { + if (!r.publishedAt) return false; + const t = new Date(r.publishedAt).getTime(); + if (Number.isNaN(t)) return false; + const ageDays = (Date.now() - t) / (1000 * 60 * 60 * 24); + return ageDays >= 0 && ageDays <= 30; + }); + + if (hasRecentSource) confidence += 0.08; + if (answer.trim().length < 80) confidence -= 0.05; + + return clampConfidence(confidence); +} + +function buildSourcePayload(results: SearchResult[]): unknown[] { + return results.slice(0, 12).map((r) => ({ + title: r.title, + url: r.url, + domain: r.sourceDomain, + description: r.description, + publishedAt: r.publishedAt, + })); +} + +async function parseErrorBody(response: Response): Promise { const contentType = response.headers.get("content-type") ?? ""; try { - if (contentType.includes("application/json")) return await response.json(); - const text = await response.text(); - return text || undefined; + if (contentType.includes("application/json")) { + return await response.json(); + } + return await response.text(); } catch { return undefined; } @@ -145,165 +177,122 @@ async function parseErrorDetails(response: Response): Promise { export async function synthesizeAnswer( query: string, results: SearchResult[], - options: SynthesisOptions = {}, -): Promise { + options: SynthOptions = {}, +): Promise { + const normalizedQuery = query.trim(); + if (!normalizedQuery) { + return { + answer: "", + confidence: 0, + tokens: { in: 0, out: 0 }, + model: options.model ?? DEFAULT_MODEL, + }; + } + const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; if (!apiKey) { - throw new SynthesisError("Missing OpenAI API key", { - status: 500, - code: "CONFIGURATION_ERROR", - }); + throw new SynthAPIError("Missing OPENAI_API_KEY", 0); } - const endpoint = options.endpoint ?? process.env.OPENAI_CHAT_COMPLETIONS_ENDPOINT; + const endpoint = options.endpoint ?? process.env.OPENAI_CHAT_COMPLETIONS_URL; if (!endpoint) { - throw new SynthesisError("Missing OpenAI completions endpoint", { - status: 500, - code: "CONFIGURATION_ERROR", - }); + throw new SynthAPIError("Missing OPENAI_CHAT_COMPLETIONS_URL", 0); } - const model = options.model ?? DEFAULT_MODEL; - const timeoutMs = options.timeoutMs ?? 10_000; - const temperature = options.temperature ?? 0.2; - - const sourceBlock = buildSourceBlock(results); - const userPrompt = [ - `Query: ${normalizeText(query)}`, - "", - "Sources:", - sourceBlock || "(no sources)", - "", - "Return JSON only.", - ].join("\n"); - - const body = { + const model = options.model ?? process.env.OPENAI_MODEL ?? DEFAULT_MODEL; + + const requestBody = { model, - temperature, + temperature: options.temperature ?? 0.2, response_format: { type: "json_object" }, messages: [ { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: userPrompt }, + { + role: "user", + content: JSON.stringify({ + query: normalizedQuery, + sources: buildSourcePayload(results), + }), + }, ], }; - const controller = new AbortController(); - const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs); - - if (options.signal) { - if (options.signal.aborted) controller.abort(); - options.signal.addEventListener("abort", () => controller.abort(), { once: true }); - } + const fetchImpl = options.fetchImpl ?? fetch; + let response: Response; try { - const response = await fetch(endpoint, { + response = await fetchImpl(endpoint, { method: "POST", + signal: options.signal, headers: { "content-type": "application/json", - Authorization: `Bearer ${apiKey}`, + authorization: `Bearer ${apiKey}`, }, - body: JSON.stringify(body), - signal: controller.signal, + body: JSON.stringify(requestBody), }); - - if (!response.ok) { - const details = await parseErrorDetails(response); - - if (response.status === 400) { - throw new SynthesisError("Synthesis request rejected", { - status: 400, - code: "BAD_REQUEST", - details, - }); - } - - if (response.status === 401 || response.status === 403) { - throw new SynthesisError("Synthesis unauthorized", { - status: response.status, - code: "UNAUTHORIZED", - details, - }); - } - - if (response.status === 429) { - throw new SynthesisError("Synthesis rate limited", { - status: 429, - code: "RATE_LIMITED", - details, - }); - } - - throw new SynthesisError("Synthesis upstream error", { - status: response.status, - code: "UPSTREAM_ERROR", - details, - }); - } - - let payload: unknown; - try { - payload = await response.json(); - } catch { - throw new SynthesisError("Invalid JSON from synthesis model", { - status: 502, - code: "INVALID_RESPONSE", - }); - } - - const rawContent = extractMessageContent(payload); - const parsed = parseJsonObject(rawContent); - - const answerRaw = - typeof parsed.answer === "string" && parsed.answer.trim() - ? parsed.answer - : "Insufficient evidence in retrieved sources."; - - const confidenceRaw = - typeof parsed.confidence === "number" - ? parsed.confidence - : Number.parseFloat(String(parsed.confidence ?? "0")); - - const confidence = clamp01(confidenceRaw); - - const usage = payload as { - usage?: { prompt_tokens?: number; completion_tokens?: number }; - model?: string; - }; - - const inTokens = - typeof usage.usage?.prompt_tokens === "number" - ? usage.usage.prompt_tokens - : estimateTokenCount(`${SYSTEM_PROMPT}\n${userPrompt}`); - - const outTokens = - typeof usage.usage?.completion_tokens === "number" - ? usage.usage.completion_tokens - : estimateTokenCount(answerRaw); - - return { - answer: normalizeText(answerRaw), - confidence, - tokens: { in: Math.max(0, inTokens), out: Math.max(0, outTokens) }, - model: usage.model ?? model, - }; } catch (error) { - if (error instanceof SynthesisError) throw error; + throw new SynthAPIError("Failed to reach synthesis model endpoint", 0, error); + } - if (error instanceof Error && error.name === "AbortError") { - throw new SynthesisError("Synthesis request timed out", { - status: 504, - code: "TIMEOUT", - }); - } + if (!response.ok) { + throw new SynthAPIError( + `Synthesis request failed with status ${response.status}`, + response.status, + await parseErrorBody(response), + ); + } - throw new SynthesisError("Synthesis network error", { - status: 502, - code: "NETWORK_ERROR", - details: error, - }); - } finally { - clearTimeout(timeoutHandle); + let payload: unknown; + try { + payload = await response.json(); + } catch (error) { + throw new SynthAPIError("Synthesis endpoint returned invalid JSON", 0, error); } + + const modelName = + isRecord(payload) && typeof payload.model === "string" ? payload.model : model; + + const rawContent = (() => { + if (!isRecord(payload)) return ""; + const choices = payload.choices; + if (!Array.isArray(choices) || choices.length === 0) return ""; + const first = choices[0]; + if (!isRecord(first)) return ""; + const message = first.message; + if (!isRecord(message)) return ""; + return extractMessageContent(message.content); + })(); + + const parsed = tryParseJsonObject(rawContent); + const answer = + parsed && typeof parsed.answer === "string" && parsed.answer.trim().length > 0 + ? parsed.answer.trim() + : rawContent.trim() || "No synthesis available."; + + const modelConfidenceRaw = parsed?.confidence; + const modelConfidence = + typeof modelConfidenceRaw === "number" + ? modelConfidenceRaw + : Number(modelConfidenceRaw); + + const confidence = Number.isFinite(modelConfidence) + ? clampConfidence(modelConfidence) + : computeConfidence(normalizedQuery, results, answer); + + const usageTokens = parseUsageTokens(payload); + const tokens = + usageTokens ?? + ({ + in: estimateTokenCount(JSON.stringify(requestBody)), + out: estimateTokenCount(answer), + } satisfies SynthTokens); + + return { + answer, + confidence, + tokens, + model: modelName, + }; } -export type { SearchResult }; \ No newline at end of file +export default synthesizeAnswer; \ No newline at end of file From 8dcc5d9fee23d6ba14fde2a38b7ba13d690438d6 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:29:31 +0000 Subject: [PATCH 08/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 307 +++++++++++++++++----------------------- src/logic/synth.ts | 341 +++++++++++++++++++-------------------------- src/logic/types.ts | 17 ++- 3 files changed, 286 insertions(+), 379 deletions(-) diff --git a/src/logic/brave.ts b/src/logic/brave.ts index 9d183fb..7f5d81b 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,236 +1,189 @@ -export type Freshness = "day" | "week" | "month"; - -export interface SearchResult { - title: string; - url: string; - description: string; - sourceDomain: string; - publishedAt?: string; - score?: number; -} +import type { Freshness, SearchResult } from "./types"; -export interface BraveSearchOptions { - apiKey?: string; - endpoint?: string; - fetchImpl?: typeof fetch; - signal?: AbortSignal; +export interface BraveSearchParams { + query: string; count?: number; - offset?: number; + freshness?: Freshness; country?: string; language?: string; - freshness?: Freshness; + safeSearch?: "off" | "moderate" | "strict"; + timeoutMs?: number; + signal?: AbortSignal; } -type JsonRecord = Record; +export class BraveApiError extends Error { + status: number; + body?: string; -const FRESHNESS_PARAM_MAP: Record = { - day: "pd", - week: "pw", - month: "pm", -}; - -export class BraveAPIError extends Error { - public readonly status: number; - public readonly details?: unknown; - - constructor(message: string, status: number, details?: unknown) { + constructor(message: string, status = 500, body?: string) { super(message); - this.name = "BraveAPIError"; + this.name = "BraveApiError"; this.status = status; - this.details = details; + this.body = body; } } -export class BraveRateLimitError extends BraveAPIError { - public readonly retryAfterSeconds?: number; +export class BraveRateLimitError extends BraveApiError { + retryAfterSeconds: number | null; - constructor(message: string, retryAfterSeconds?: number, details?: unknown) { - super(message, 429, details); + constructor(retryAfterSeconds: number | null, body?: string) { + super("Brave API rate limit exceeded", 429, body); this.name = "BraveRateLimitError"; this.retryAfterSeconds = retryAfterSeconds; } } -function isRecord(value: unknown): value is JsonRecord { - return typeof value === "object" && value !== null; -} - -function parsePublishedAt(raw: unknown): string | undefined { - if (typeof raw === "number" && Number.isFinite(raw)) { - const milliseconds = raw > 1_000_000_000_000 ? raw : raw * 1000; - const date = new Date(milliseconds); - return Number.isNaN(date.getTime()) ? undefined : date.toISOString(); - } - - if (typeof raw === "string" && raw.trim().length > 0) { - const date = new Date(raw); - return Number.isNaN(date.getTime()) ? undefined : date.toISOString(); - } +const FRESHNESS_MAP: Record = { + day: "pd", + week: "pw", + month: "pm", +}; - return undefined; -} +function normalizeDomain(input: string): string { + const cleaned = input.trim().toLowerCase(); + if (!cleaned) return ""; -function getDomainFromUrl(rawUrl: string): string | undefined { try { - const parsed = new URL(rawUrl); - return parsed.hostname.replace(/^www\./i, "").toLowerCase(); + const parsed = new URL(cleaned.startsWith("http") ? cleaned : `https://${cleaned}`); + return parsed.hostname.replace(/^www\./, ""); } catch { - return undefined; + return cleaned.replace(/^www\./, ""); } } -function clamp01(value: number): number { - if (!Number.isFinite(value)) return 0; - return Math.max(0, Math.min(1, value)); -} - -async function parseErrorBody(response: Response): Promise { - const contentType = response.headers.get("content-type") ?? ""; - try { - if (contentType.includes("application/json")) { - return await response.json(); - } - return await response.text(); - } catch { - return undefined; - } +function toIsoDate(input: unknown): string | undefined { + if (typeof input !== "string" || input.trim().length === 0) return undefined; + const timestamp = Date.parse(input); + if (Number.isNaN(timestamp)) return undefined; + return new Date(timestamp).toISOString(); } -export function mapFreshnessParam(freshness: Freshness): string { - return FRESHNESS_PARAM_MAP[freshness]; +function parseResult(item: any): SearchResult | null { + const url = typeof item?.url === "string" ? item.url.trim() : ""; + const title = typeof item?.title === "string" ? item.title.trim() : ""; + const snippet = typeof item?.description === "string" ? item.description.trim() : ""; + + if (!url || !title) return null; + + const domainInput = + typeof item?.meta_url?.hostname === "string" && item.meta_url.hostname.trim().length > 0 + ? item.meta_url.hostname + : url; + + return { + title, + url, + snippet, + domain: normalizeDomain(domainInput), + publishedAt: toIsoDate(item?.page_fetched) ?? toIsoDate(item?.published) ?? toIsoDate(item?.date), + source: "brave", + }; } -export function normalizeBraveResults(payload: unknown): SearchResult[] { - if (!isRecord(payload)) return []; - - const web = payload.web; - if (!isRecord(web)) return []; - - const results = web.results; - if (!Array.isArray(results)) return []; +export function normalizeBraveResponse(payload: unknown): SearchResult[] { + const rawResults = (payload as any)?.web?.results; + if (!Array.isArray(rawResults)) return []; const normalized: SearchResult[] = []; - - for (const item of results) { - if (!isRecord(item)) continue; - - const url = typeof item.url === "string" ? item.url.trim() : ""; - const domain = getDomainFromUrl(url); - if (!domain) continue; - - const title = typeof item.title === "string" ? item.title.trim() : ""; - if (!title) continue; - - const description = - typeof item.description === "string" ? item.description.trim() : ""; - - const publishedAt = - parsePublishedAt(item.page_age) ?? - parsePublishedAt(item.published) ?? - parsePublishedAt(item.date) ?? - parsePublishedAt(item.age); - - const scoreRaw = - typeof item.score === "number" ? item.score : Number(item.score); - const score = Number.isFinite(scoreRaw) ? clamp01(scoreRaw) : undefined; - - normalized.push({ - title, - url, - description, - sourceDomain: domain, - ...(publishedAt ? { publishedAt } : {}), - ...(score !== undefined ? { score } : {}), - }); + for (const item of rawResults) { + const parsed = parseResult(item); + if (parsed) normalized.push(parsed); } - return normalized; } -export async function searchBrave( - query: string, - options: BraveSearchOptions = {}, -): Promise { - const q = query.trim(); - if (!q) return []; +async function readResponseTextSafe(response: Response): Promise { + try { + return await response.text(); + } catch { + return ""; + } +} - const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; - if (!apiKey) { - throw new BraveAPIError("Missing BRAVE_API_KEY", 0); +export async function searchBrave(params: BraveSearchParams): Promise { + const query = params.query?.trim(); + if (!query) { + throw new BraveApiError("Query is required", 400); } - const endpoint = options.endpoint ?? process.env.BRAVE_API_URL; - if (!endpoint) { - throw new BraveAPIError("Missing BRAVE_API_URL", 0); + const apiKey = process.env.BRAVE_API_KEY; + if (!apiKey) { + throw new BraveApiError("Missing BRAVE_API_KEY", 500); } - const params = new URLSearchParams(); - params.set("q", q); - params.set("source", "web"); - params.set("count", String(options.count ?? 10)); + const endpoint = process.env.BRAVE_API_URL ?? "https://api.search.brave.com/res/v1/web/search"; - if (typeof options.offset === "number" && options.offset >= 0) { - params.set("offset", String(options.offset)); - } - if (options.country) { - params.set("country", options.country); - } - if (options.language) { - params.set("search_lang", options.language); - } - if (options.freshness) { - params.set("freshness", mapFreshnessParam(options.freshness)); + const queryParams = new URLSearchParams(); + queryParams.set("q", query); + queryParams.set("count", String(params.count ?? 10)); + if (params.country) queryParams.set("country", params.country); + if (params.language) queryParams.set("search_lang", params.language); + if (params.safeSearch) queryParams.set("safesearch", params.safeSearch); + if (params.freshness) queryParams.set("freshness", FRESHNESS_MAP[params.freshness]); + + const url = `${endpoint}?${queryParams.toString()}`; + + const controller = new AbortController(); + let timeout: ReturnType | undefined; + let externalAbortListener: (() => void) | undefined; + + if (params.signal) { + if (params.signal.aborted) { + controller.abort(); + } else { + externalAbortListener = () => controller.abort(); + params.signal.addEventListener("abort", externalAbortListener, { once: true }); + } } - const target = `${endpoint}?${params.toString()}`; - const fetchImpl = options.fetchImpl ?? fetch; + if (params.timeoutMs && params.timeoutMs > 0) { + timeout = setTimeout(() => controller.abort(), params.timeoutMs); + } - let response: Response; try { - response = await fetchImpl(target, { + const response = await fetch(url, { method: "GET", - signal: options.signal, headers: { Accept: "application/json", "X-Subscription-Token": apiKey, }, + signal: controller.signal, }); - } catch (error) { - throw new BraveAPIError("Failed to reach Brave Search API", 0, error); - } - if (response.status === 429) { - const retryAfterHeader = response.headers.get("retry-after"); - const retryAfterSeconds = retryAfterHeader - ? Number(retryAfterHeader) - : undefined; - throw new BraveRateLimitError( - "Brave Search API rate limit exceeded", - Number.isFinite(retryAfterSeconds) ? retryAfterSeconds : undefined, - await parseErrorBody(response), - ); - } + if (response.status === 429) { + const retryAfterRaw = response.headers.get("retry-after"); + const retryAfterSeconds = retryAfterRaw ? Number.parseInt(retryAfterRaw, 10) : null; + const body = await readResponseTextSafe(response); + throw new BraveRateLimitError(Number.isNaN(retryAfterSeconds as number) ? null : retryAfterSeconds, body); + } - if (!response.ok) { - throw new BraveAPIError( - `Brave Search API request failed with status ${response.status}`, - response.status, - await parseErrorBody(response), - ); - } + if (!response.ok) { + const body = await readResponseTextSafe(response); + throw new BraveApiError(`Brave API request failed with status ${response.status}`, response.status, body); + } - let jsonPayload: unknown; - try { - jsonPayload = await response.json(); + let payload: unknown; + try { + payload = await response.json(); + } catch { + throw new BraveApiError("Invalid JSON from Brave API", response.status); + } + + return normalizeBraveResponse(payload); } catch (error) { - throw new BraveAPIError( - "Brave Search API returned invalid JSON", - response.status, - error, - ); + if (error instanceof BraveApiError) throw error; + if ((error as Error)?.name === "AbortError") { + throw new BraveApiError("Brave API request timed out or was aborted", 408); + } + throw new BraveApiError(`Brave API request failed: ${(error as Error)?.message ?? "Unknown error"}`, 500); + } finally { + if (timeout) clearTimeout(timeout); + if (externalAbortListener && params.signal) { + params.signal.removeEventListener("abort", externalAbortListener); + } } - - return normalizeBraveResults(jsonPayload); } +export const braveSearch = searchBrave; +export const search = searchBrave; export default searchBrave; \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index af57c49..3133c85 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,176 +1,154 @@ -import type { SearchResult } from "./brave"; - -export interface SynthTokens { - in: number; - out: number; -} - -export interface SynthResponse { - answer: string; - confidence: number; - tokens: SynthTokens; - model: string; -} +import type { SearchResult, SynthesisResult } from "./types"; export interface SynthOptions { - apiKey?: string; - endpoint?: string; model?: string; temperature?: number; - fetchImpl?: typeof fetch; + maxTokens?: number; signal?: AbortSignal; } -export class SynthAPIError extends Error { - public readonly status: number; - public readonly details?: unknown; +export class SynthError extends Error { + status: number; + body?: string; - constructor(message: string, status: number, details?: unknown) { + constructor(message: string, status = 500, body?: string) { super(message); - this.name = "SynthAPIError"; + this.name = "SynthError"; this.status = status; - this.details = details; + this.body = body; } } -type JsonRecord = Record; - const DEFAULT_MODEL = "gpt-4o-mini"; - -const SYSTEM_PROMPT = [ - "You are Queryx synthesis engine.", - "Return strictly valid JSON only.", - 'Schema: {"answer": string, "confidence": number}', - "Rules:", - "- concise answer, 2-6 sentences", - "- no markdown", - "- confidence in [0,1]", - "- if sources conflict, acknowledge uncertainty briefly", -].join("\n"); - -function isRecord(value: unknown): value is JsonRecord { - return typeof value === "object" && value !== null; +const DEFAULT_SYSTEM_PROMPT = + "You are Queryx synthesis engine for downstream agents. Respond with strict JSON only: " + + '{"answer":"string","confidence":0.0}. ' + + "Rules: concise answer, no markdown, no preamble, ground claims in provided sources, " + + "state uncertainty when evidence is weak."; + +function clamp(value: number, min: number, max: number): number { + return Math.max(min, Math.min(max, value)); } export function clampConfidence(value: number): number { if (!Number.isFinite(value)) return 0; - return Math.max(0, Math.min(1, value)); + return clamp(value, 0, 1); } -export function estimateTokenCount(text: string): number { - const trimmed = text.trim(); - if (!trimmed) return 0; - return Math.ceil(trimmed.length / 4); +export function estimateTokens(text: string): number { + const length = text.trim().length; + if (length === 0) return 1; + return Math.max(1, Math.ceil(length / 4)); } -function extractMessageContent(raw: unknown): string { - if (typeof raw === "string") return raw; - - if (Array.isArray(raw)) { - const parts: string[] = []; - for (const item of raw) { - if (!isRecord(item)) continue; - const text = item.text; - if (typeof text === "string") parts.push(text); - } - return parts.join("\n").trim(); - } - - return ""; +function stripCodeFences(text: string): string { + const fenced = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); + return fenced ? fenced[1] : text; } -function tryParseJsonObject(raw: string): JsonRecord | undefined { - const text = raw.trim(); - if (!text) return undefined; +function parseModelJSON(content: string): { answer?: string; confidence?: number } { + const cleaned = stripCodeFences(content).trim(); + if (!cleaned) return {}; try { - const parsed = JSON.parse(text); - return isRecord(parsed) ? parsed : undefined; + return JSON.parse(cleaned); } catch { - const start = text.indexOf("{"); - const end = text.lastIndexOf("}"); - if (start < 0 || end <= start) return undefined; - try { - const parsed = JSON.parse(text.slice(start, end + 1)); - return isRecord(parsed) ? parsed : undefined; - } catch { - return undefined; + const start = cleaned.indexOf("{"); + const end = cleaned.lastIndexOf("}"); + if (start >= 0 && end > start) { + try { + return JSON.parse(cleaned.slice(start, end + 1)); + } catch { + return {}; + } } + return {}; } } -function parseUsageTokens(payload: unknown): SynthTokens | undefined { - if (!isRecord(payload)) return undefined; - const usage = payload.usage; - if (!isRecord(usage)) return undefined; - - const promptTokens = - typeof usage.prompt_tokens === "number" - ? usage.prompt_tokens - : Number(usage.prompt_tokens); - const completionTokens = - typeof usage.completion_tokens === "number" - ? usage.completion_tokens - : Number(usage.completion_tokens); +function extractMessageContent(payload: any): string { + const content = payload?.choices?.[0]?.message?.content; + if (typeof content === "string") return content; + if (Array.isArray(content)) { + return content + .map((item) => { + if (typeof item === "string") return item; + if (typeof item?.text === "string") return item.text; + return ""; + }) + .join("\n"); + } + return ""; +} - if (!Number.isFinite(promptTokens) || !Number.isFinite(completionTokens)) { - return undefined; +function fallbackAnswer(results: SearchResult[]): string { + if (results.length === 0) { + return "Insufficient evidence to answer confidently from current search results."; } - return { - in: Math.max(0, Math.round(promptTokens)), - out: Math.max(0, Math.round(completionTokens)), - }; + const summary = results + .slice(0, 3) + .map((r) => (r.snippet?.trim().length ? r.snippet.trim() : r.title.trim())) + .join(" ") + .replace(/\s+/g, " ") + .trim(); + + return summary.length > 0 + ? summary.slice(0, 500) + : "Relevant sources were found, but they do not contain enough detail for a confident answer."; } -export function computeConfidence( - _query: string, - results: SearchResult[], - answer: string, -): number { - if (results.length === 0) return 0.1; +export function scoreConfidence(query: string, results: SearchResult[], answer: string): number { + let score = 0.1; - const uniqueDomains = new Set( - results.map((r) => r.sourceDomain).filter(Boolean), - ).size; + const uniqueDomains = new Set(results.map((r) => r.domain)).size; + score += Math.min(0.35, results.length * 0.08); + score += Math.min(0.2, uniqueDomains * 0.06); - let confidence = 0.25; - confidence += Math.min(results.length, 8) * 0.06; - confidence += Math.min(uniqueDomains, 5) * 0.035; + if (answer.trim().length >= 80) score += 0.15; const hasRecentSource = results.some((r) => { if (!r.publishedAt) return false; - const t = new Date(r.publishedAt).getTime(); - if (Number.isNaN(t)) return false; - const ageDays = (Date.now() - t) / (1000 * 60 * 60 * 24); - return ageDays >= 0 && ageDays <= 30; + const ts = Date.parse(r.publishedAt); + if (Number.isNaN(ts)) return false; + const daysOld = (Date.now() - ts) / 86_400_000; + return daysOld <= 30; }); + if (hasRecentSource) score += 0.1; - if (hasRecentSource) confidence += 0.08; - if (answer.trim().length < 80) confidence -= 0.05; + if (query.trim().length > 0 && answer.toLowerCase().includes(query.trim().split(/\s+/)[0].toLowerCase())) { + score += 0.05; + } - return clampConfidence(confidence); + return clampConfidence(score); } -function buildSourcePayload(results: SearchResult[]): unknown[] { - return results.slice(0, 12).map((r) => ({ +function buildUserPrompt(query: string, results: SearchResult[]): string { + const compactResults = results.slice(0, 8).map((r, index) => ({ + index: index + 1, title: r.title, url: r.url, - domain: r.sourceDomain, - description: r.description, - publishedAt: r.publishedAt, + domain: r.domain, + snippet: r.snippet, + publishedAt: r.publishedAt ?? null, })); + + return JSON.stringify( + { + query, + results: compactResults, + instructions: "Return only JSON: {answer: string, confidence: number between 0 and 1}", + }, + null, + 2, + ); } -async function parseErrorBody(response: Response): Promise { - const contentType = response.headers.get("content-type") ?? ""; +async function readResponseTextSafe(response: Response): Promise { try { - if (contentType.includes("application/json")) { - return await response.json(); - } return await response.text(); } catch { - return undefined; + return ""; } } @@ -178,121 +156,86 @@ export async function synthesizeAnswer( query: string, results: SearchResult[], options: SynthOptions = {}, -): Promise { - const normalizedQuery = query.trim(); - if (!normalizedQuery) { - return { - answer: "", - confidence: 0, - tokens: { in: 0, out: 0 }, - model: options.model ?? DEFAULT_MODEL, - }; - } - - const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; +): Promise { + const apiKey = process.env.OPENAI_API_KEY; if (!apiKey) { - throw new SynthAPIError("Missing OPENAI_API_KEY", 0); - } - - const endpoint = options.endpoint ?? process.env.OPENAI_CHAT_COMPLETIONS_URL; - if (!endpoint) { - throw new SynthAPIError("Missing OPENAI_CHAT_COMPLETIONS_URL", 0); + throw new SynthError("Missing OPENAI_API_KEY", 500); } const model = options.model ?? process.env.OPENAI_MODEL ?? DEFAULT_MODEL; + const endpoint = process.env.OPENAI_API_URL ?? "https://api.openai.com/v1/chat/completions"; + const systemPrompt = process.env.SYNTH_SYSTEM_PROMPT ?? DEFAULT_SYSTEM_PROMPT; + const userPrompt = buildUserPrompt(query, results); const requestBody = { model, temperature: options.temperature ?? 0.2, + max_tokens: options.maxTokens ?? 320, response_format: { type: "json_object" }, messages: [ - { role: "system", content: SYSTEM_PROMPT }, - { - role: "user", - content: JSON.stringify({ - query: normalizedQuery, - sources: buildSourcePayload(results), - }), - }, + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, ], }; - const fetchImpl = options.fetchImpl ?? fetch; let response: Response; - try { - response = await fetchImpl(endpoint, { + response = await fetch(endpoint, { method: "POST", - signal: options.signal, headers: { - "content-type": "application/json", - authorization: `Bearer ${apiKey}`, + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", }, body: JSON.stringify(requestBody), + signal: options.signal, }); } catch (error) { - throw new SynthAPIError("Failed to reach synthesis model endpoint", 0, error); + throw new SynthError(`Synthesis request failed: ${(error as Error)?.message ?? "Unknown error"}`, 500); } if (!response.ok) { - throw new SynthAPIError( - `Synthesis request failed with status ${response.status}`, - response.status, - await parseErrorBody(response), - ); + const body = await readResponseTextSafe(response); + throw new SynthError(`Synthesis API failed with status ${response.status}`, response.status, body); } - let payload: unknown; + let payload: any; try { payload = await response.json(); - } catch (error) { - throw new SynthAPIError("Synthesis endpoint returned invalid JSON", 0, error); + } catch { + throw new SynthError("Invalid JSON from synthesis API", response.status); } - const modelName = - isRecord(payload) && typeof payload.model === "string" ? payload.model : model; - - const rawContent = (() => { - if (!isRecord(payload)) return ""; - const choices = payload.choices; - if (!Array.isArray(choices) || choices.length === 0) return ""; - const first = choices[0]; - if (!isRecord(first)) return ""; - const message = first.message; - if (!isRecord(message)) return ""; - return extractMessageContent(message.content); - })(); - - const parsed = tryParseJsonObject(rawContent); - const answer = - parsed && typeof parsed.answer === "string" && parsed.answer.trim().length > 0 - ? parsed.answer.trim() - : rawContent.trim() || "No synthesis available."; - - const modelConfidenceRaw = parsed?.confidence; - const modelConfidence = - typeof modelConfidenceRaw === "number" - ? modelConfidenceRaw - : Number(modelConfidenceRaw); - - const confidence = Number.isFinite(modelConfidence) - ? clampConfidence(modelConfidence) - : computeConfidence(normalizedQuery, results, answer); - - const usageTokens = parseUsageTokens(payload); - const tokens = - usageTokens ?? - ({ - in: estimateTokenCount(JSON.stringify(requestBody)), - out: estimateTokenCount(answer), - } satisfies SynthTokens); + const content = extractMessageContent(payload); + const parsed = parseModelJSON(content); + + const answer = typeof parsed.answer === "string" && parsed.answer.trim().length > 0 ? parsed.answer.trim() : fallbackAnswer(results); + + const confidence = + typeof parsed.confidence === "number" + ? clampConfidence(parsed.confidence) + : scoreConfidence(query, results, answer); + + const promptTokens = + typeof payload?.usage?.prompt_tokens === "number" + ? payload.usage.prompt_tokens + : estimateTokens(systemPrompt) + estimateTokens(userPrompt); + + const completionTokens = + typeof payload?.usage?.completion_tokens === "number" + ? payload.usage.completion_tokens + : estimateTokens(content || answer); return { answer, confidence, - tokens, - model: modelName, + tokens: { + in: promptTokens, + out: completionTokens, + }, + model: typeof payload?.model === "string" && payload.model.length > 0 ? payload.model : model, }; } +export const synthesize = synthesizeAnswer; +export const synth = synthesizeAnswer; export default synthesizeAnswer; \ No newline at end of file diff --git a/src/logic/types.ts b/src/logic/types.ts index e191a36..9b7bd29 100644 --- a/src/logic/types.ts +++ b/src/logic/types.ts @@ -3,10 +3,21 @@ export type Freshness = "day" | "week" | "month"; export interface SearchResult { title: string; url: string; - description: string; - source: string; + snippet: string; domain: string; publishedAt?: string; - favicon?: string; score?: number; + source?: string; +} + +export interface TokenUsage { + in: number; + out: number; +} + +export interface SynthesisResult { + answer: string; + confidence: number; + tokens: TokenUsage; + model: string; } \ No newline at end of file From 3485fb745021cad01d9a981cd5ec18f368212f2a Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:35:35 +0000 Subject: [PATCH 09/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/__tests__/brave.test.ts | 86 +++++++ src/logic/brave.ts | 291 +++++++++++++----------- src/logic/cache.ts | 74 +++--- src/logic/rank.ts | 198 +++++++--------- src/logic/synth.ts | 362 +++++++++++++++++------------- src/logic/types.ts | 14 +- 6 files changed, 575 insertions(+), 450 deletions(-) create mode 100644 src/logic/__tests__/brave.test.ts diff --git a/src/logic/__tests__/brave.test.ts b/src/logic/__tests__/brave.test.ts new file mode 100644 index 0000000..06023d9 --- /dev/null +++ b/src/logic/__tests__/brave.test.ts @@ -0,0 +1,86 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { BraveApiError, normalizeBraveResponse, searchBrave } from "../brave"; + +const originalFetch = globalThis.fetch; + +describe("brave.ts", () => { + beforeEach(() => { + process.env.BRAVE_API_KEY = "test-key"; + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + delete process.env.BRAVE_API_KEY; + }); + + it("normalises Brave response into SearchResult[]", async () => { + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + web: { + results: [ + { + title: "First result", + url: "https://news.example.com/story?utm=1", + description: "This is the first test result description.", + page_age: "2026-02-28T08:00:00Z", + meta_url: { hostname: "news.example.com" }, + }, + { + title: "Second result", + url: "https://blog.example.org/post", + description: "This is the second test result description.", + }, + ], + }, + }), + { status: 200 }, + )) as typeof fetch; + + const results = await searchBrave("queryx", { + endpoint: "https://github.com/langoustine69/queryx", + }); + + expect(results).toHaveLength(2); + expect(results[0].title).toBe("First result"); + expect(results[0].domain).toBe("news.example.com"); + expect(results[0].publishedAt).toBe("2026-02-28T08:00:00.000Z"); + expect(results[1].domain).toBe("blog.example.org"); + }); + + it("throws typed rate-limit errors", async () => { + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + error: { + message: "Rate limited", + code: "too_many_requests", + }, + }), + { + status: 429, + headers: { + "retry-after": "9", + }, + }, + )) as typeof fetch; + + try { + await searchBrave("queryx", { + endpoint: "https://github.com/langoustine69/queryx", + }); + throw new Error("Expected searchBrave to throw"); + } catch (error) { + expect(error).toBeInstanceOf(BraveApiError); + const typed = error as BraveApiError; + expect(typed.status).toBe(429); + expect(typed.code).toBe("RATE_LIMITED"); + expect(typed.retryAfterSeconds).toBe(9); + } + }); + + it("handles malformed payloads safely", () => { + const results = normalizeBraveResponse({ web: { results: "not-an-array" } }); + expect(results).toEqual([]); + }); +}); \ No newline at end of file diff --git a/src/logic/brave.ts b/src/logic/brave.ts index 7f5d81b..a275aac 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,189 +1,228 @@ import type { Freshness, SearchResult } from "./types"; -export interface BraveSearchParams { - query: string; +export interface BraveSearchOptions { + apiKey?: string; + endpoint?: string; count?: number; freshness?: Freshness; country?: string; - language?: string; - safeSearch?: "off" | "moderate" | "strict"; + searchLang?: string; timeoutMs?: number; - signal?: AbortSignal; } export class BraveApiError extends Error { status: number; - body?: string; - - constructor(message: string, status = 500, body?: string) { + code: string; + retryAfterSeconds?: number; + details?: unknown; + + constructor( + message: string, + status: number, + code: string, + retryAfterSeconds?: number, + details?: unknown, + ) { super(message); this.name = "BraveApiError"; this.status = status; - this.body = body; + this.code = code; + this.retryAfterSeconds = retryAfterSeconds; + this.details = details; } } -export class BraveRateLimitError extends BraveApiError { - retryAfterSeconds: number | null; - - constructor(retryAfterSeconds: number | null, body?: string) { - super("Brave API rate limit exceeded", 429, body); - this.name = "BraveRateLimitError"; - this.retryAfterSeconds = retryAfterSeconds; - } +interface BraveWebResult { + title?: string; + url?: string; + description?: string; + age?: string; + page_age?: string; + meta_url?: { + hostname?: string; + }; + profile?: { + long_name?: string; + }; } -const FRESHNESS_MAP: Record = { - day: "pd", - week: "pw", - month: "pm", -}; +interface BraveResponse { + web?: { + results?: BraveWebResult[]; + }; + error?: { + code?: string; + message?: string; + }; +} -function normalizeDomain(input: string): string { - const cleaned = input.trim().toLowerCase(); - if (!cleaned) return ""; +const DEFAULT_BRAVE_ENDPOINT = + process.env.BRAVE_SEARCH_ENDPOINT ?? "https://api.search.brave.com/res/v1/web/search"; + +function mapFreshness(freshness?: Freshness): string | undefined { + if (!freshness) return undefined; + switch (freshness) { + case "day": + return "pd"; + case "week": + return "pw"; + case "month": + return "pm"; + default: + return undefined; + } +} +function domainFromUrl(url: string): string { try { - const parsed = new URL(cleaned.startsWith("http") ? cleaned : `https://${cleaned}`); - return parsed.hostname.replace(/^www\./, ""); + const parsed = new URL(url); + return parsed.hostname.replace(/^www\./i, "").toLowerCase(); } catch { - return cleaned.replace(/^www\./, ""); + return ""; } } -function toIsoDate(input: unknown): string | undefined { - if (typeof input !== "string" || input.trim().length === 0) return undefined; - const timestamp = Date.parse(input); +function parsePublishedAt(value?: string): string | undefined { + if (!value) return undefined; + const timestamp = Date.parse(value); if (Number.isNaN(timestamp)) return undefined; return new Date(timestamp).toISOString(); } -function parseResult(item: any): SearchResult | null { - const url = typeof item?.url === "string" ? item.url.trim() : ""; - const title = typeof item?.title === "string" ? item.title.trim() : ""; - const snippet = typeof item?.description === "string" ? item.description.trim() : ""; +function normalizeOne(result: BraveWebResult): SearchResult | null { + if (!result || !result.url || !result.title) return null; - if (!url || !title) return null; + const description = typeof result.description === "string" ? result.description.trim() : ""; + if (description.length === 0) return null; - const domainInput = - typeof item?.meta_url?.hostname === "string" && item.meta_url.hostname.trim().length > 0 - ? item.meta_url.hostname - : url; + const domain = + (result.meta_url?.hostname || domainFromUrl(result.url)).replace(/^www\./i, "").toLowerCase(); return { - title, - url, - snippet, - domain: normalizeDomain(domainInput), - publishedAt: toIsoDate(item?.page_fetched) ?? toIsoDate(item?.published) ?? toIsoDate(item?.date), + title: result.title.trim(), + url: result.url, + description, + domain, + publishedAt: parsePublishedAt(result.page_age) ?? parsePublishedAt(result.age), source: "brave", }; } export function normalizeBraveResponse(payload: unknown): SearchResult[] { - const rawResults = (payload as any)?.web?.results; - if (!Array.isArray(rawResults)) return []; + if (!payload || typeof payload !== "object") return []; + const typed = payload as BraveResponse; + const results = typed.web?.results; + if (!Array.isArray(results)) return []; const normalized: SearchResult[] = []; - for (const item of rawResults) { - const parsed = parseResult(item); - if (parsed) normalized.push(parsed); + for (const raw of results) { + const item = normalizeOne(raw); + if (item) normalized.push(item); } return normalized; } -async function readResponseTextSafe(response: Response): Promise { +async function fetchWithTimeout( + input: string, + init: RequestInit, + timeoutMs: number, +): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), timeoutMs); + try { - return await response.text(); - } catch { - return ""; + return await fetch(input, { + ...init, + signal: controller.signal, + }); + } finally { + clearTimeout(timeout); } } -export async function searchBrave(params: BraveSearchParams): Promise { - const query = params.query?.trim(); - if (!query) { - throw new BraveApiError("Query is required", 400); +function parseRetryAfterSeconds(headerValue: string | null): number | undefined { + if (!headerValue) return undefined; + const asNumber = Number(headerValue); + if (Number.isFinite(asNumber) && asNumber >= 0) return asNumber; + + const asDate = Date.parse(headerValue); + if (Number.isNaN(asDate)) return undefined; + const deltaMs = asDate - Date.now(); + return deltaMs > 0 ? Math.ceil(deltaMs / 1000) : 0; +} + +function safeJsonParse(raw: string): unknown { + try { + return JSON.parse(raw); + } catch { + return undefined; } +} + +export async function searchBrave( + query: string, + options: BraveSearchOptions = {}, +): Promise { + const trimmedQuery = query.trim(); + if (!trimmedQuery) return []; - const apiKey = process.env.BRAVE_API_KEY; + const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; if (!apiKey) { - throw new BraveApiError("Missing BRAVE_API_KEY", 500); + throw new BraveApiError("Missing Brave API key", 401, "MISSING_API_KEY"); } - const endpoint = process.env.BRAVE_API_URL ?? "https://api.search.brave.com/res/v1/web/search"; - - const queryParams = new URLSearchParams(); - queryParams.set("q", query); - queryParams.set("count", String(params.count ?? 10)); - if (params.country) queryParams.set("country", params.country); - if (params.language) queryParams.set("search_lang", params.language); - if (params.safeSearch) queryParams.set("safesearch", params.safeSearch); - if (params.freshness) queryParams.set("freshness", FRESHNESS_MAP[params.freshness]); + const endpoint = options.endpoint ?? DEFAULT_BRAVE_ENDPOINT; + const count = Math.max(1, Math.min(20, options.count ?? 10)); + const timeoutMs = Math.max(1000, options.timeoutMs ?? 12_000); - const url = `${endpoint}?${queryParams.toString()}`; + const url = new URL(endpoint); + url.searchParams.set("q", trimmedQuery); + url.searchParams.set("count", String(count)); + if (options.country) url.searchParams.set("country", options.country); + if (options.searchLang) url.searchParams.set("search_lang", options.searchLang); + const freshness = mapFreshness(options.freshness); + if (freshness) url.searchParams.set("freshness", freshness); - const controller = new AbortController(); - let timeout: ReturnType | undefined; - let externalAbortListener: (() => void) | undefined; - - if (params.signal) { - if (params.signal.aborted) { - controller.abort(); - } else { - externalAbortListener = () => controller.abort(); - params.signal.addEventListener("abort", externalAbortListener, { once: true }); - } + let response: Response; + try { + response = await fetchWithTimeout( + url.toString(), + { + method: "GET", + headers: { + Accept: "application/json", + "X-Subscription-Token": apiKey, + }, + }, + timeoutMs, + ); + } catch (error) { + throw new BraveApiError( + error instanceof Error ? error.message : "Brave request failed", + 0, + "NETWORK_ERROR", + undefined, + error, + ); } - if (params.timeoutMs && params.timeoutMs > 0) { - timeout = setTimeout(() => controller.abort(), params.timeoutMs); - } + const rawText = await response.text(); + const payload = safeJsonParse(rawText) as BraveResponse | undefined; - try { - const response = await fetch(url, { - method: "GET", - headers: { - Accept: "application/json", - "X-Subscription-Token": apiKey, - }, - signal: controller.signal, - }); + if (!response.ok) { + const retryAfterSeconds = parseRetryAfterSeconds(response.headers.get("retry-after")); + const apiMessage = payload?.error?.message; + const message = apiMessage || `Brave API error (${response.status})`; + const code = + response.status === 429 + ? "RATE_LIMITED" + : payload?.error?.code || (response.status >= 500 ? "UPSTREAM_ERROR" : "REQUEST_ERROR"); - if (response.status === 429) { - const retryAfterRaw = response.headers.get("retry-after"); - const retryAfterSeconds = retryAfterRaw ? Number.parseInt(retryAfterRaw, 10) : null; - const body = await readResponseTextSafe(response); - throw new BraveRateLimitError(Number.isNaN(retryAfterSeconds as number) ? null : retryAfterSeconds, body); - } - - if (!response.ok) { - const body = await readResponseTextSafe(response); - throw new BraveApiError(`Brave API request failed with status ${response.status}`, response.status, body); - } - - let payload: unknown; - try { - payload = await response.json(); - } catch { - throw new BraveApiError("Invalid JSON from Brave API", response.status); - } - - return normalizeBraveResponse(payload); - } catch (error) { - if (error instanceof BraveApiError) throw error; - if ((error as Error)?.name === "AbortError") { - throw new BraveApiError("Brave API request timed out or was aborted", 408); - } - throw new BraveApiError(`Brave API request failed: ${(error as Error)?.message ?? "Unknown error"}`, 500); - } finally { - if (timeout) clearTimeout(timeout); - if (externalAbortListener && params.signal) { - params.signal.removeEventListener("abort", externalAbortListener); - } + throw new BraveApiError(message, response.status, code, retryAfterSeconds, payload); } + + return normalizeBraveResponse(payload); } -export const braveSearch = searchBrave; -export const search = searchBrave; +export type { Freshness, SearchResult }; export default searchBrave; \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index 7839c84..f9fbbbd 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -5,7 +5,6 @@ export interface CacheStats { misses: number; hitRate: number; size: number; - ttlSeconds: number; } interface CacheEntry { @@ -13,22 +12,19 @@ interface CacheEntry { expiresAt: number; } -function parseTtlSeconds(raw: string | undefined, fallback: number): number { - const n = Number(raw); - return Number.isFinite(n) && n > 0 ? n : fallback; +function getDefaultTtlSeconds(): number { + const parsed = Number(process.env.CACHE_TTL_SECONDS); + if (!Number.isFinite(parsed) || parsed <= 0) return 300; + return parsed; } -export const DEFAULT_CACHE_TTL_SECONDS = parseTtlSeconds( - process.env.CACHE_TTL_SECONDS, - 300, -); - export function normalizeQuery(query: string): string { return query.trim().toLowerCase().replace(/\s+/g, " "); } -export function getCacheKey(query: string): string { - return createHash("sha256").update(normalizeQuery(query)).digest("hex"); +export function hashQuery(query: string): string { + const normalized = normalizeQuery(query); + return createHash("sha256").update(normalized).digest("hex"); } export class InMemoryCache { @@ -37,21 +33,21 @@ export class InMemoryCache { private misses = 0; private readonly defaultTtlSeconds: number; - constructor(defaultTtlSeconds: number = DEFAULT_CACHE_TTL_SECONDS) { - this.defaultTtlSeconds = defaultTtlSeconds > 0 ? defaultTtlSeconds : 300; + constructor(ttlSeconds: number = getDefaultTtlSeconds()) { + this.defaultTtlSeconds = ttlSeconds; } - private purgeExpired(): void { + private pruneExpired(): void { const now = Date.now(); for (const [key, entry] of this.store.entries()) { - if (entry.expiresAt <= now) { - this.store.delete(key); - } + if (entry.expiresAt <= now) this.store.delete(key); } } - get(key: string): T | undefined { + get(query: string): T | undefined { + const key = hashQuery(query); const entry = this.store.get(key); + if (!entry) { this.misses += 1; return undefined; @@ -67,10 +63,15 @@ export class InMemoryCache { return entry.value; } - set(key: string, value: T, ttlSeconds: number = this.defaultTtlSeconds): void { - const safeTtl = Number.isFinite(ttlSeconds) && ttlSeconds > 0 ? ttlSeconds : 0; - const expiresAt = Date.now() + safeTtl * 1000; - this.store.set(key, { value, expiresAt }); + set(query: string, value: T, ttlSeconds?: number): void { + const key = hashQuery(query); + const ttl = ttlSeconds ?? this.defaultTtlSeconds; + const ttlMs = Math.max(1, Math.round(ttl * 1000)); + + this.store.set(key, { + value, + expiresAt: Date.now() + ttlMs, + }); } clear(): void { @@ -80,42 +81,31 @@ export class InMemoryCache { } stats(): CacheStats { - this.purgeExpired(); + this.pruneExpired(); const total = this.hits + this.misses; return { hits: this.hits, misses: this.misses, hitRate: total === 0 ? 0 : this.hits / total, size: this.store.size, - ttlSeconds: this.defaultTtlSeconds, }; } } -const cache = new InMemoryCache(DEFAULT_CACHE_TTL_SECONDS); +const sharedCache = new InMemoryCache(); -export function get(key: string): T | undefined { - return cache.get(key) as T | undefined; +export function get(query: string): T | undefined { + return sharedCache.get(query) as T | undefined; } -export function set(key: string, value: T, ttlSeconds?: number): void { - cache.set(key, value, ttlSeconds); +export function set(query: string, value: T, ttlSeconds?: number): void { + sharedCache.set(query, value, ttlSeconds); } export function stats(): CacheStats { - return cache.stats(); + return sharedCache.stats(); } export function clear(): void { - cache.clear(); -} - -export default { - get, - set, - stats, - clear, - getCacheKey, - normalizeQuery, - InMemoryCache, -}; \ No newline at end of file + sharedCache.clear(); +} \ No newline at end of file diff --git a/src/logic/rank.ts b/src/logic/rank.ts index f395caa..3c7dce8 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,162 +1,134 @@ import type { SearchResult } from "./types"; -const TRACKING_QUERY_PARAMS = new Set([ - "utm_source", - "utm_medium", - "utm_campaign", - "utm_term", - "utm_content", - "gclid", - "fbclid", - "mc_cid", - "mc_eid", - "ref", - "source", -]); - export interface RankOptions { maxPerDomain?: number; - minDescriptionLength?: number; + maxResults?: number; + minQualityScore?: number; now?: Date; } -export type RankedSearchResult = SearchResult & { rankScore: number }; +export interface RankedSearchResult extends SearchResult { + rank: number; + recencyBoost: number; + qualityScore: number; +} + +function normalizeDomain(value: string): string { + return value.replace(/^www\./i, "").toLowerCase(); +} -function extractDomain(url: string): string { +function getDomain(result: SearchResult): string { + if (result.domain) return normalizeDomain(result.domain); try { - return new URL(url).hostname.replace(/^www\./i, "").toLowerCase(); + return normalizeDomain(new URL(result.url).hostname); } catch { return ""; } } -export function canonicalizeUrl(input: string): string { +function canonicalizeUrl(url: string): string { try { - const url = new URL(input); - url.hash = ""; - - for (const key of [...url.searchParams.keys()]) { - if (TRACKING_QUERY_PARAMS.has(key.toLowerCase())) { - url.searchParams.delete(key); - } - } - - const trimmedPath = url.pathname.replace(/\/+$/, ""); - url.pathname = trimmedPath || "/"; - return url.toString(); + const parsed = new URL(url); + parsed.hash = ""; + parsed.search = ""; + parsed.pathname = parsed.pathname.replace(/\/+$/, ""); + return `${normalizeDomain(parsed.hostname)}${parsed.pathname}`.toLowerCase(); } catch { - return input.trim(); + return url.trim().toLowerCase(); } } -export function recencyBoost(publishedAt: string | undefined, now: Date = new Date()): number { - if (!publishedAt) { - return 0; - } +export function getRecencyBoost(publishedAt?: string, now = new Date()): number { + if (!publishedAt) return 0; + const publishedMs = Date.parse(publishedAt); + if (Number.isNaN(publishedMs)) return 0; - const date = new Date(publishedAt); - if (Number.isNaN(date.getTime())) { - return 0; - } - - const ageMs = now.getTime() - date.getTime(); - if (ageMs < 0) { - return 0.2; - } - - const ageDays = ageMs / (24 * 60 * 60 * 1000); + const ageDays = (now.getTime() - publishedMs) / (1000 * 60 * 60 * 24); + if (ageDays < 0) return 0.18; if (ageDays <= 1) return 0.25; - if (ageDays <= 7) return 0.16; - if (ageDays <= 30) return 0.08; - if (ageDays <= 90) return 0.03; + if (ageDays <= 7) return 0.18; + if (ageDays <= 30) return 0.1; + if (ageDays <= 90) return 0.05; + if (ageDays <= 365) return 0.02; return 0; } -export function isLowQualityResult(result: SearchResult, minDescriptionLength = 40): boolean { - if (!result.title?.trim()) return true; - if (!result.url?.trim()) return true; - if (!result.description?.trim()) return true; - if (result.title.trim().length < 4) return true; - if (result.description.trim().length < minDescriptionLength) return true; - if (!/^https?:\/\//i.test(result.url.trim())) return true; - if (!extractDomain(result.url)) return true; - return false; -} +export function qualityScore(result: SearchResult): number { + const titleLen = result.title.trim().length; + const descLen = result.description.trim().length; + const hasValidUrl = /^https?:\/\//i.test(result.url); -function qualityScore(result: SearchResult, now: Date): number { - const base = typeof result.score === "number" ? result.score : 0.5; - const descriptionBoost = Math.min(0.2, result.description.trim().length / 500); - const titleBoost = Math.min(0.1, result.title.trim().length / 120); - const httpsBoost = result.url.startsWith("https://") ? 0.03 : 0; - return base + descriptionBoost + titleBoost + httpsBoost + recencyBoost(result.publishedAt, now); + const titleScore = Math.max(0, Math.min(1, titleLen / 80)); + const descScore = Math.max(0, Math.min(1, descLen / 180)); + const urlBonus = hasValidUrl ? 0.05 : -0.15; + const domainPenalty = result.domain ? 0 : 0.05; + + const score = titleScore * 0.45 + descScore * 0.5 + urlBonus - domainPenalty; + return Math.max(0, Math.min(1, score)); } -function normalizeResult(result: SearchResult): SearchResult { - const domain = result.domain?.trim().toLowerCase() || extractDomain(result.url); - const source = result.source?.trim() || domain; - return { - ...result, - domain, - source, - }; +export function isLowQuality(result: SearchResult, minQualityScore = 0.3): boolean { + if (!result.title || !result.url || !result.description) return true; + if (result.title.trim().length < 6) return true; + if (result.description.trim().length < 20) return true; + return qualityScore(result) < minQualityScore; } -export function rankResults(results: SearchResult[], options: RankOptions = {}): RankedSearchResult[] { - const maxPerDomain = options.maxPerDomain ?? 2; - const minDescriptionLength = options.minDescriptionLength ?? 40; +export function rankResults( + results: SearchResult[], + options: RankOptions = {}, +): RankedSearchResult[] { + const maxPerDomain = Math.max(1, options.maxPerDomain ?? 2); + const maxResults = Math.max(1, options.maxResults ?? 10); + const minQuality = options.minQualityScore ?? 0.3; const now = options.now ?? new Date(); - const deduped = new Map(); + const seenCanonical = new Set(); + const prepared: RankedSearchResult[] = []; + + for (const item of results) { + const domain = getDomain(item); + const candidate: SearchResult = { ...item, domain }; - for (const raw of results) { - const result = normalizeResult(raw); - if (isLowQualityResult(result, minDescriptionLength)) { - continue; - } + if (isLowQuality(candidate, minQuality)) continue; - const canonicalUrl = canonicalizeUrl(result.url); - const existing = deduped.get(canonicalUrl); + const canonical = canonicalizeUrl(candidate.url); + if (seenCanonical.has(canonical)) continue; + seenCanonical.add(canonical); - if (!existing) { - deduped.set(canonicalUrl, result); - continue; - } + const recency = getRecencyBoost(candidate.publishedAt, now); + const qScore = qualityScore(candidate); + const base = candidate.score ?? 0.5; + const rank = Math.max(0, Math.min(1.5, base * 0.65 + qScore * 0.25 + recency)); - if (qualityScore(result, now) > qualityScore(existing, now)) { - deduped.set(canonicalUrl, result); - } + prepared.push({ + ...candidate, + rank, + recencyBoost: recency, + qualityScore: qScore, + score: rank, + }); } - const ranked = [...deduped.values()] - .map((result) => ({ - ...result, - rankScore: qualityScore(result, now), - })) - .sort((a, b) => b.rankScore - a.rankScore); + prepared.sort((a, b) => b.rank - a.rank); const domainCounts = new Map(); - const limited: RankedSearchResult[] = []; + const output: RankedSearchResult[] = []; - for (const result of ranked) { - const domain = result.domain || extractDomain(result.url); - if (!domain) { - continue; - } + for (const row of prepared) { + const domain = row.domain || ""; + const current = domainCounts.get(domain) ?? 0; + if (current >= maxPerDomain) continue; - const count = domainCounts.get(domain) ?? 0; - if (count >= maxPerDomain) { - continue; - } + domainCounts.set(domain, current + 1); + output.push(row); - domainCounts.set(domain, count + 1); - limited.push(result); + if (output.length >= maxResults) break; } - return limited; + return output; } -export const rankAndDedupResults = rankResults; - export type { SearchResult }; export default rankResults; \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index 3133c85..d657b73 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,241 +1,291 @@ -import type { SearchResult, SynthesisResult } from "./types"; +import type { SearchResult } from "./types"; -export interface SynthOptions { +export interface SynthTokens { + in: number; + out: number; +} + +export interface SynthesisResult { + answer: string; + confidence: number; + tokens: SynthTokens; + model: string; +} + +export interface SynthesizeOptions { + apiKey?: string; + endpoint?: string; model?: string; - temperature?: number; - maxTokens?: number; - signal?: AbortSignal; + timeoutMs?: number; + maxInputResults?: number; } export class SynthError extends Error { status: number; - body?: string; + code: string; + details?: unknown; - constructor(message: string, status = 500, body?: string) { + constructor(message: string, status: number, code: string, details?: unknown) { super(message); this.name = "SynthError"; this.status = status; - this.body = body; + this.code = code; + this.details = details; } } -const DEFAULT_MODEL = "gpt-4o-mini"; -const DEFAULT_SYSTEM_PROMPT = - "You are Queryx synthesis engine for downstream agents. Respond with strict JSON only: " + - '{"answer":"string","confidence":0.0}. ' + - "Rules: concise answer, no markdown, no preamble, ground claims in provided sources, " + - "state uncertainty when evidence is weak."; - -function clamp(value: number, min: number, max: number): number { - return Math.max(min, Math.min(max, value)); -} +const DEFAULT_MODEL = process.env.SYNTH_MODEL ?? "gpt-4o-mini"; +const DEFAULT_ENDPOINT = + process.env.OPENAI_API_ENDPOINT ?? "https://api.openai.com/v1/chat/completions"; + +const SYSTEM_PROMPT = [ + "You are Queryx synthesis engine for downstream agents.", + "Answer strictly from provided sources.", + "Output strict JSON object only:", + '{"answer":"string","confidence":number}', + "Rules:", + "- Keep answer concise and factual.", + "- Include uncertainty when sources are weak.", + "- confidence must be between 0 and 1.", +].join("\n"); export function clampConfidence(value: number): number { if (!Number.isFinite(value)) return 0; - return clamp(value, 0, 1); + return Math.max(0, Math.min(1, value)); } -export function estimateTokens(text: string): number { - const length = text.trim().length; - if (length === 0) return 1; - return Math.max(1, Math.ceil(length / 4)); +export function estimateTokenCount(text: string): number { + const cleaned = text.trim(); + if (!cleaned) return 0; + return Math.max(1, Math.ceil(cleaned.length / 4)); } -function stripCodeFences(text: string): string { - const fenced = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); - return fenced ? fenced[1] : text; +function normalizeDomain(input: string): string { + return input.replace(/^www\./i, "").toLowerCase(); } -function parseModelJSON(content: string): { answer?: string; confidence?: number } { - const cleaned = stripCodeFences(content).trim(); - if (!cleaned) return {}; - +function domainFromUrl(url: string): string { try { - return JSON.parse(cleaned); + return normalizeDomain(new URL(url).hostname); } catch { - const start = cleaned.indexOf("{"); - const end = cleaned.lastIndexOf("}"); - if (start >= 0 && end > start) { - try { - return JSON.parse(cleaned.slice(start, end + 1)); - } catch { - return {}; - } - } - return {}; - } -} - -function extractMessageContent(payload: any): string { - const content = payload?.choices?.[0]?.message?.content; - if (typeof content === "string") return content; - if (Array.isArray(content)) { - return content - .map((item) => { - if (typeof item === "string") return item; - if (typeof item?.text === "string") return item.text; - return ""; - }) - .join("\n"); + return ""; } - return ""; } -function fallbackAnswer(results: SearchResult[]): string { - if (results.length === 0) { - return "Insufficient evidence to answer confidently from current search results."; - } - - const summary = results - .slice(0, 3) - .map((r) => (r.snippet?.trim().length ? r.snippet.trim() : r.title.trim())) - .join(" ") - .replace(/\s+/g, " ") - .trim(); - - return summary.length > 0 - ? summary.slice(0, 500) - : "Relevant sources were found, but they do not contain enough detail for a confident answer."; +function isRecent(publishedAt?: string, withinDays = 30): boolean { + if (!publishedAt) return false; + const ms = Date.parse(publishedAt); + if (Number.isNaN(ms)) return false; + const ageMs = Date.now() - ms; + return ageMs >= 0 && ageMs <= withinDays * 24 * 60 * 60 * 1000; } -export function scoreConfidence(query: string, results: SearchResult[], answer: string): number { - let score = 0.1; - - const uniqueDomains = new Set(results.map((r) => r.domain)).size; - score += Math.min(0.35, results.length * 0.08); - score += Math.min(0.2, uniqueDomains * 0.06); - - if (answer.trim().length >= 80) score += 0.15; +export function computeHeuristicConfidence(results: SearchResult[]): number { + if (results.length === 0) return 0.12; - const hasRecentSource = results.some((r) => { - if (!r.publishedAt) return false; - const ts = Date.parse(r.publishedAt); - if (Number.isNaN(ts)) return false; - const daysOld = (Date.now() - ts) / 86_400_000; - return daysOld <= 30; - }); - if (hasRecentSource) score += 0.1; + const capped = results.slice(0, 8); + const coverage = Math.min(1, capped.length / 6); - if (query.trim().length > 0 && answer.toLowerCase().includes(query.trim().split(/\s+/)[0].toLowerCase())) { - score += 0.05; + const domains = new Set(); + let recentCount = 0; + for (const r of capped) { + domains.add(normalizeDomain(r.domain || domainFromUrl(r.url))); + if (isRecent(r.publishedAt, 30)) recentCount += 1; } - return clampConfidence(score); + const diversity = Math.min(1, domains.size / Math.max(1, Math.min(capped.length, 4))); + const recency = recentCount / capped.length; + + return clampConfidence(0.1 + coverage * 0.45 + diversity * 0.25 + recency * 0.2); } -function buildUserPrompt(query: string, results: SearchResult[]): string { - const compactResults = results.slice(0, 8).map((r, index) => ({ - index: index + 1, +function buildUserPayload(query: string, results: SearchResult[]): string { + const compact = results.map((r, index) => ({ + id: index + 1, title: r.title, url: r.url, domain: r.domain, - snippet: r.snippet, publishedAt: r.publishedAt ?? null, + snippet: r.description, })); return JSON.stringify( { query, - results: compactResults, - instructions: "Return only JSON: {answer: string, confidence: number between 0 and 1}", + sources: compact, + expected_schema: { + answer: "string", + confidence: "number 0..1", + }, }, null, 2, ); } -async function readResponseTextSafe(response: Response): Promise { +function extractMessageContent(payload: any): string { + const messageContent = payload?.choices?.[0]?.message?.content; + if (typeof messageContent === "string") return messageContent; + + if (Array.isArray(messageContent)) { + const parts = messageContent + .map((part) => { + if (!part || typeof part !== "object") return ""; + if (typeof part.text === "string") return part.text; + if (typeof part.content === "string") return part.content; + return ""; + }) + .filter(Boolean); + if (parts.length > 0) return parts.join("\n"); + } + + if (typeof payload?.output_text === "string") return payload.output_text; + + return ""; +} + +function parseAssistantJson(content: string): { answer?: string; confidence?: number } | undefined { + const stripped = content + .trim() + .replace(/^```json\s*/i, "") + .replace(/^```/, "") + .replace(/```$/, "") + .trim(); + + const start = stripped.indexOf("{"); + const end = stripped.lastIndexOf("}"); + if (start === -1 || end === -1 || end <= start) return undefined; + + const jsonSegment = stripped.slice(start, end + 1); try { - return await response.text(); + const parsed = JSON.parse(jsonSegment) as { answer?: string; confidence?: number }; + return parsed; } catch { - return ""; + return undefined; + } +} + +async function fetchWithTimeout( + input: string, + init: RequestInit, + timeoutMs: number, +): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), timeoutMs); + + try { + return await fetch(input, { + ...init, + signal: controller.signal, + }); + } finally { + clearTimeout(timeout); } } -export async function synthesizeAnswer( +export async function synthesize( query: string, results: SearchResult[], - options: SynthOptions = {}, + options: SynthesizeOptions = {}, ): Promise { - const apiKey = process.env.OPENAI_API_KEY; + const trimmedQuery = query.trim(); + const limitedResults = results.slice(0, options.maxInputResults ?? 8); + + const heuristicConfidence = computeHeuristicConfidence(limitedResults); + if (!trimmedQuery || limitedResults.length === 0) { + const answer = "Insufficient evidence in current sources to provide a confident answer."; + return { + answer, + confidence: clampConfidence(heuristicConfidence * 0.8), + tokens: { + in: estimateTokenCount(trimmedQuery), + out: estimateTokenCount(answer), + }, + model: options.model ?? DEFAULT_MODEL, + }; + } + + const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; if (!apiKey) { - throw new SynthError("Missing OPENAI_API_KEY", 500); + throw new SynthError("Missing synthesis API key", 401, "MISSING_API_KEY"); } - const model = options.model ?? process.env.OPENAI_MODEL ?? DEFAULT_MODEL; - const endpoint = process.env.OPENAI_API_URL ?? "https://api.openai.com/v1/chat/completions"; - const systemPrompt = process.env.SYNTH_SYSTEM_PROMPT ?? DEFAULT_SYSTEM_PROMPT; - const userPrompt = buildUserPrompt(query, results); - - const requestBody = { - model, - temperature: options.temperature ?? 0.2, - max_tokens: options.maxTokens ?? 320, - response_format: { type: "json_object" }, - messages: [ - { role: "system", content: systemPrompt }, - { role: "user", content: userPrompt }, - ], - }; + const endpoint = options.endpoint ?? DEFAULT_ENDPOINT; + const model = options.model ?? DEFAULT_MODEL; + const timeoutMs = Math.max(1000, options.timeoutMs ?? 20_000); + + const userPayload = buildUserPayload(trimmedQuery, limitedResults); let response: Response; try { - response = await fetch(endpoint, { - method: "POST", - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", + response = await fetchWithTimeout( + endpoint, + { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model, + temperature: 0.2, + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: userPayload }, + ], + }), }, - body: JSON.stringify(requestBody), - signal: options.signal, - }); + timeoutMs, + ); } catch (error) { - throw new SynthError(`Synthesis request failed: ${(error as Error)?.message ?? "Unknown error"}`, 500); - } - - if (!response.ok) { - const body = await readResponseTextSafe(response); - throw new SynthError(`Synthesis API failed with status ${response.status}`, response.status, body); + throw new SynthError( + error instanceof Error ? error.message : "Synthesis request failed", + 0, + "NETWORK_ERROR", + error, + ); } + const rawText = await response.text(); let payload: any; try { - payload = await response.json(); + payload = rawText ? JSON.parse(rawText) : {}; } catch { - throw new SynthError("Invalid JSON from synthesis API", response.status); + payload = {}; + } + + if (!response.ok) { + const message = payload?.error?.message || `Synthesis API error (${response.status})`; + throw new SynthError(message, response.status, "UPSTREAM_ERROR", payload); } - const content = extractMessageContent(payload); - const parsed = parseModelJSON(content); + const rawContent = extractMessageContent(payload); + const parsed = parseAssistantJson(rawContent); - const answer = typeof parsed.answer === "string" && parsed.answer.trim().length > 0 ? parsed.answer.trim() : fallbackAnswer(results); + const answer = (parsed?.answer || rawContent || "").trim() || "No answer produced."; + const modelConfidence = + typeof parsed?.confidence === "number" ? clampConfidence(parsed.confidence) : undefined; const confidence = - typeof parsed.confidence === "number" - ? clampConfidence(parsed.confidence) - : scoreConfidence(query, results, answer); - - const promptTokens = - typeof payload?.usage?.prompt_tokens === "number" - ? payload.usage.prompt_tokens - : estimateTokens(systemPrompt) + estimateTokens(userPrompt); + modelConfidence === undefined + ? heuristicConfidence + : clampConfidence(modelConfidence * 0.7 + heuristicConfidence * 0.3); - const completionTokens = - typeof payload?.usage?.completion_tokens === "number" - ? payload.usage.completion_tokens - : estimateTokens(content || answer); + const inputTextForEstimate = `${SYSTEM_PROMPT}\n${userPayload}`; + const inTokens = Number(payload?.usage?.prompt_tokens) || estimateTokenCount(inputTextForEstimate); + const outTokens = Number(payload?.usage?.completion_tokens) || estimateTokenCount(answer); return { answer, - confidence, + confidence: clampConfidence(confidence), tokens: { - in: promptTokens, - out: completionTokens, + in: Math.max(0, inTokens), + out: Math.max(0, outTokens), }, - model: typeof payload?.model === "string" && payload.model.length > 0 ? payload.model : model, + model: typeof payload?.model === "string" ? payload.model : model, }; } -export const synthesize = synthesizeAnswer; -export const synth = synthesizeAnswer; -export default synthesizeAnswer; \ No newline at end of file +export type { SearchResult }; +export default synthesize; \ No newline at end of file diff --git a/src/logic/types.ts b/src/logic/types.ts index 9b7bd29..bcf6a95 100644 --- a/src/logic/types.ts +++ b/src/logic/types.ts @@ -3,21 +3,9 @@ export type Freshness = "day" | "week" | "month"; export interface SearchResult { title: string; url: string; - snippet: string; + description: string; domain: string; publishedAt?: string; score?: number; source?: string; -} - -export interface TokenUsage { - in: number; - out: number; -} - -export interface SynthesisResult { - answer: string; - confidence: number; - tokens: TokenUsage; - model: string; } \ No newline at end of file From 8a354009fb49bbe47313378996467d56528e0070 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:41:16 +0000 Subject: [PATCH 10/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 356 ++++++++++++++++++++------------------ src/logic/cache.ts | 95 ++++++---- src/logic/rank.ts | 187 ++++++++++---------- src/logic/synth.ts | 363 +++++++++++++++++---------------------- test/logic/brave.test.ts | 83 +++++++++ test/logic/cache.test.ts | 35 ++++ test/logic/synth.test.ts | 98 +++++++++++ 7 files changed, 719 insertions(+), 498 deletions(-) create mode 100644 test/logic/brave.test.ts create mode 100644 test/logic/cache.test.ts create mode 100644 test/logic/synth.test.ts diff --git a/src/logic/brave.ts b/src/logic/brave.ts index a275aac..c6b3d02 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,228 +1,244 @@ -import type { Freshness, SearchResult } from "./types"; +export type Freshness = "day" | "week" | "month"; + +export interface SearchResult { + title: string; + url: string; + snippet: string; + domain: string; + publishedAt?: string; + source: "brave"; +} export interface BraveSearchOptions { apiKey?: string; - endpoint?: string; - count?: number; freshness?: Freshness; - country?: string; - searchLang?: string; + count?: number; + offset?: number; + endpoint?: string; timeoutMs?: number; + signal?: AbortSignal; + fetchImpl?: typeof fetch; } -export class BraveApiError extends Error { - status: number; - code: string; - retryAfterSeconds?: number; - details?: unknown; - - constructor( - message: string, - status: number, - code: string, - retryAfterSeconds?: number, - details?: unknown, - ) { - super(message); - this.name = "BraveApiError"; - this.status = status; - this.code = code; - this.retryAfterSeconds = retryAfterSeconds; - this.details = details; - } +export interface BraveSearchResponse { + web?: { + results?: BraveRawResult[]; + }; + results?: BraveRawResult[]; + error?: { message?: string } | string; + message?: string; } -interface BraveWebResult { +export interface BraveRawResult { title?: string; url?: string; description?: string; + snippet?: string; + extra_snippets?: string[]; age?: string; page_age?: string; - meta_url?: { - hostname?: string; - }; - profile?: { - long_name?: string; - }; + published?: string; + date?: string; + [key: string]: unknown; } -interface BraveResponse { - web?: { - results?: BraveWebResult[]; - }; - error?: { - code?: string; - message?: string; - }; -} +export class BraveApiError extends Error { + public readonly status: number; + public readonly code: string; + public readonly retryAfterSeconds?: number; -const DEFAULT_BRAVE_ENDPOINT = - process.env.BRAVE_SEARCH_ENDPOINT ?? "https://api.search.brave.com/res/v1/web/search"; - -function mapFreshness(freshness?: Freshness): string | undefined { - if (!freshness) return undefined; - switch (freshness) { - case "day": - return "pd"; - case "week": - return "pw"; - case "month": - return "pm"; - default: - return undefined; + constructor(message: string, status: number, code: string, retryAfterSeconds?: number) { + super(message); + this.name = "BraveApiError"; + this.status = status; + this.code = code; + this.retryAfterSeconds = retryAfterSeconds; } } -function domainFromUrl(url: string): string { +const FRESHNESS_MAP: Record = { + day: "pd", + week: "pw", + month: "pm" +}; + +const DEFAULT_BRAVE_ENDPOINT = + process.env.BRAVE_API_ENDPOINT ?? "https://api.search.brave.com/res/v1/web/search"; + +const DEFAULT_TIMEOUT_MS = Number(process.env.BRAVE_TIMEOUT_MS ?? "10000"); + +function toDomain(rawUrl: string): string { try { - const parsed = new URL(url); - return parsed.hostname.replace(/^www\./i, "").toLowerCase(); + const hostname = new URL(rawUrl).hostname.toLowerCase(); + return hostname.startsWith("www.") ? hostname.slice(4) : hostname; } catch { return ""; } } -function parsePublishedAt(value?: string): string | undefined { - if (!value) return undefined; - const timestamp = Date.parse(value); - if (Number.isNaN(timestamp)) return undefined; - return new Date(timestamp).toISOString(); -} - -function normalizeOne(result: BraveWebResult): SearchResult | null { - if (!result || !result.url || !result.title) return null; - - const description = typeof result.description === "string" ? result.description.trim() : ""; - if (description.length === 0) return null; +function toIsoDate(value: unknown): string | undefined { + if (typeof value !== "string" || value.trim() === "") { + return undefined; + } - const domain = - (result.meta_url?.hostname || domainFromUrl(result.url)).replace(/^www\./i, "").toLowerCase(); + const date = new Date(value); + if (Number.isNaN(date.getTime())) { + return undefined; + } - return { - title: result.title.trim(), - url: result.url, - description, - domain, - publishedAt: parsePublishedAt(result.page_age) ?? parsePublishedAt(result.age), - source: "brave", - }; + return date.toISOString(); } -export function normalizeBraveResponse(payload: unknown): SearchResult[] { - if (!payload || typeof payload !== "object") return []; - const typed = payload as BraveResponse; - const results = typed.web?.results; - if (!Array.isArray(results)) return []; - - const normalized: SearchResult[] = []; - for (const raw of results) { - const item = normalizeOne(raw); - if (item) normalized.push(item); +function parseRetryAfter(value: string | null): number | undefined { + if (!value) return undefined; + const seconds = Number(value); + if (Number.isFinite(seconds) && seconds >= 0) { + return seconds; } - return normalized; + return undefined; } -async function fetchWithTimeout( - input: string, - init: RequestInit, - timeoutMs: number, -): Promise { - const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), timeoutMs); - - try { - return await fetch(input, { - ...init, - signal: controller.signal, - }); - } finally { - clearTimeout(timeout); +function parseErrorMessage(rawBody: string, status: number): string { + if (!rawBody) { + return `Brave API request failed (${status})`; } -} -function parseRetryAfterSeconds(headerValue: string | null): number | undefined { - if (!headerValue) return undefined; - const asNumber = Number(headerValue); - if (Number.isFinite(asNumber) && asNumber >= 0) return asNumber; - - const asDate = Date.parse(headerValue); - if (Number.isNaN(asDate)) return undefined; - const deltaMs = asDate - Date.now(); - return deltaMs > 0 ? Math.ceil(deltaMs / 1000) : 0; -} - -function safeJsonParse(raw: string): unknown { try { - return JSON.parse(raw); + const parsed = JSON.parse(rawBody) as { error?: { message?: string } | string; message?: string }; + if (typeof parsed.error === "string") return parsed.error; + if (parsed.error && typeof parsed.error === "object" && typeof parsed.error.message === "string") { + return parsed.error.message; + } + if (typeof parsed.message === "string") return parsed.message; } catch { - return undefined; + return rawBody; } + + return `Brave API request failed (${status})`; } -export async function searchBrave( - query: string, - options: BraveSearchOptions = {}, -): Promise { - const trimmedQuery = query.trim(); - if (!trimmedQuery) return []; +function normalizeResult(raw: BraveRawResult): SearchResult | null { + const title = (raw.title ?? "").toString().trim(); + const url = (raw.url ?? "").toString().trim(); + const snippet = (raw.description ?? raw.snippet ?? "").toString().trim(); - const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; - if (!apiKey) { - throw new BraveApiError("Missing Brave API key", 401, "MISSING_API_KEY"); + if (!title || !url) { + return null; } + const extraSnippets = Array.isArray(raw.extra_snippets) + ? raw.extra_snippets.filter((v): v is string => typeof v === "string" && v.trim().length > 0) + : []; + + const mergedSnippet = [snippet, ...extraSnippets].filter(Boolean).join(" ").trim(); + + const publishedAt = + toIsoDate(raw.page_age) ?? toIsoDate(raw.age) ?? toIsoDate(raw.published) ?? toIsoDate(raw.date); + + return { + title, + url, + snippet: mergedSnippet, + domain: toDomain(url), + publishedAt, + source: "brave" + }; +} + +function buildEndpoint(query: string, options: BraveSearchOptions): string { const endpoint = options.endpoint ?? DEFAULT_BRAVE_ENDPOINT; - const count = Math.max(1, Math.min(20, options.count ?? 10)); - const timeoutMs = Math.max(1000, options.timeoutMs ?? 12_000); + const freshness = options.freshness ? FRESHNESS_MAP[options.freshness] : undefined; + const count = Math.min(Math.max(options.count ?? 10, 1), 20); + const offset = Math.max(options.offset ?? 0, 0); const url = new URL(endpoint); - url.searchParams.set("q", trimmedQuery); + url.searchParams.set("q", query); url.searchParams.set("count", String(count)); - if (options.country) url.searchParams.set("country", options.country); - if (options.searchLang) url.searchParams.set("search_lang", options.searchLang); - const freshness = mapFreshness(options.freshness); + if (offset > 0) url.searchParams.set("offset", String(offset)); if (freshness) url.searchParams.set("freshness", freshness); - let response: Response; - try { - response = await fetchWithTimeout( - url.toString(), - { - method: "GET", - headers: { - Accept: "application/json", - "X-Subscription-Token": apiKey, - }, + return url.toString(); +} + +export async function searchBrave(query: string, options: BraveSearchOptions = {}): Promise { + const normalizedQuery = query.trim(); + if (!normalizedQuery) { + return []; + } + + const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; + if (!apiKey) { + throw new BraveApiError("Missing BRAVE_API_KEY", 401, "MISSING_API_KEY"); + } + + const fetchImpl = options.fetchImpl ?? fetch; + const timeoutMs = Number.isFinite(options.timeoutMs) ? (options.timeoutMs as number) : DEFAULT_TIMEOUT_MS; + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort("timeout"), timeoutMs); + + if (options.signal) { + options.signal.addEventListener( + "abort", + () => { + controller.abort(options.signal?.reason); }, - timeoutMs, - ); - } catch (error) { - throw new BraveApiError( - error instanceof Error ? error.message : "Brave request failed", - 0, - "NETWORK_ERROR", - undefined, - error, + { once: true } ); } - const rawText = await response.text(); - const payload = safeJsonParse(rawText) as BraveResponse | undefined; + try { + const endpoint = buildEndpoint(normalizedQuery, options); + const response = await fetchImpl(endpoint, { + method: "GET", + headers: { + Accept: "application/json", + "X-Subscription-Token": apiKey + }, + signal: controller.signal + }); - if (!response.ok) { - const retryAfterSeconds = parseRetryAfterSeconds(response.headers.get("retry-after")); - const apiMessage = payload?.error?.message; - const message = apiMessage || `Brave API error (${response.status})`; - const code = - response.status === 429 - ? "RATE_LIMITED" - : payload?.error?.code || (response.status >= 500 ? "UPSTREAM_ERROR" : "REQUEST_ERROR"); + if (response.status === 429) { + const retryAfterSeconds = parseRetryAfter(response.headers.get("retry-after")); + const body = await response.text(); + const message = parseErrorMessage(body, 429); + throw new BraveApiError(message, 429, "RATE_LIMITED", retryAfterSeconds); + } + + if (!response.ok) { + const body = await response.text(); + throw new BraveApiError(parseErrorMessage(body, response.status), response.status, "HTTP_ERROR"); + } + + const payload = (await response.json()) as BraveSearchResponse; + const items = payload.web?.results ?? payload.results ?? []; + if (!Array.isArray(items)) { + return []; + } + + const normalized: SearchResult[] = []; + for (const item of items) { + const result = normalizeResult(item); + if (result) normalized.push(result); + } + + return normalized; + } catch (error) { + if (error instanceof BraveApiError) throw error; - throw new BraveApiError(message, response.status, code, retryAfterSeconds, payload); - } + if (error instanceof Error && error.name === "AbortError") { + throw new BraveApiError("Brave API request timed out", 408, "TIMEOUT"); + } - return normalizeBraveResponse(payload); + throw new BraveApiError( + error instanceof Error ? error.message : "Unexpected Brave API error", + 500, + "UNKNOWN" + ); + } finally { + clearTimeout(timeout); + } } -export type { Freshness, SearchResult }; -export default searchBrave; \ No newline at end of file +export default { + searchBrave +}; \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index f9fbbbd..2a381f5 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -1,10 +1,11 @@ -import { createHash } from "node:crypto"; +import { createHash } from "crypto"; export interface CacheStats { hits: number; misses: number; hitRate: number; size: number; + ttlSeconds: number; } interface CacheEntry { @@ -12,9 +13,11 @@ interface CacheEntry { expiresAt: number; } -function getDefaultTtlSeconds(): number { - const parsed = Number(process.env.CACHE_TTL_SECONDS); - if (!Number.isFinite(parsed) || parsed <= 0) return 300; +function readDefaultTtlSeconds(): number { + const parsed = Number(process.env.CACHE_TTL_SECONDS ?? "300"); + if (!Number.isFinite(parsed) || parsed <= 0) { + return 300; + } return parsed; } @@ -29,31 +32,31 @@ export function hashQuery(query: string): string { export class InMemoryCache { private readonly store = new Map>(); + private readonly defaultTtlSeconds: number; private hits = 0; private misses = 0; - private readonly defaultTtlSeconds: number; - constructor(ttlSeconds: number = getDefaultTtlSeconds()) { - this.defaultTtlSeconds = ttlSeconds; + constructor(ttlSeconds: number = readDefaultTtlSeconds()) { + this.defaultTtlSeconds = ttlSeconds > 0 ? ttlSeconds : 300; } - private pruneExpired(): void { - const now = Date.now(); - for (const [key, entry] of this.store.entries()) { - if (entry.expiresAt <= now) this.store.delete(key); - } + public keyForQuery(query: string): string { + return hashQuery(query); } - get(query: string): T | undefined { - const key = hashQuery(query); - const entry = this.store.get(key); + public get(query: string): T | undefined { + const key = this.keyForQuery(query); + return this.getByKey(key); + } + public getByKey(key: string): T | undefined { + const entry = this.store.get(key); if (!entry) { this.misses += 1; return undefined; } - if (entry.expiresAt <= Date.now()) { + if (Date.now() >= entry.expiresAt) { this.store.delete(key); this.misses += 1; return undefined; @@ -63,43 +66,57 @@ export class InMemoryCache { return entry.value; } - set(query: string, value: T, ttlSeconds?: number): void { - const key = hashQuery(query); - const ttl = ttlSeconds ?? this.defaultTtlSeconds; - const ttlMs = Math.max(1, Math.round(ttl * 1000)); + public set(query: string, value: T, ttlSeconds?: number): string { + const key = this.keyForQuery(query); + this.setByKey(key, value, ttlSeconds); + return key; + } + public setByKey(key: string, value: T, ttlSeconds?: number): void { + const effectiveTtl = ttlSeconds && ttlSeconds > 0 ? ttlSeconds : this.defaultTtlSeconds; this.store.set(key, { value, - expiresAt: Date.now() + ttlMs, + expiresAt: Date.now() + effectiveTtl * 1000 }); } - clear(): void { - this.store.clear(); - this.hits = 0; - this.misses = 0; - } - - stats(): CacheStats { - this.pruneExpired(); + public stats(): CacheStats { + this.cleanupExpired(); const total = this.hits + this.misses; + return { hits: this.hits, misses: this.misses, hitRate: total === 0 ? 0 : this.hits / total, size: this.store.size, + ttlSeconds: this.defaultTtlSeconds }; } + + public clear(): void { + this.store.clear(); + this.hits = 0; + this.misses = 0; + } + + private cleanupExpired(): void { + const now = Date.now(); + for (const [key, entry] of this.store.entries()) { + if (now >= entry.expiresAt) { + this.store.delete(key); + } + } + } } const sharedCache = new InMemoryCache(); -export function get(query: string): T | undefined { +export function get(query: string): T | undefined { return sharedCache.get(query) as T | undefined; } -export function set(query: string, value: T, ttlSeconds?: number): void { - sharedCache.set(query, value, ttlSeconds); +export function set(query: string, value: T, ttlSeconds?: number): string { + return sharedCache.set(query, value, ttlSeconds); } export function stats(): CacheStats { @@ -108,4 +125,16 @@ export function stats(): CacheStats { export function clear(): void { sharedCache.clear(); -} \ No newline at end of file +} + +export function keyForQuery(query: string): string { + return sharedCache.keyForQuery(query); +} + +export default { + get, + set, + stats, + clear, + keyForQuery +}; \ No newline at end of file diff --git a/src/logic/rank.ts b/src/logic/rank.ts index 3c7dce8..7c86cf3 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,134 +1,145 @@ -import type { SearchResult } from "./types"; +import type { SearchResult } from "./brave"; + +export interface RankedSearchResult extends SearchResult { + score: number; +} export interface RankOptions { maxPerDomain?: number; - maxResults?: number; minQualityScore?: number; now?: Date; } -export interface RankedSearchResult extends SearchResult { - rank: number; - recencyBoost: number; - qualityScore: number; -} - -function normalizeDomain(value: string): string { - return value.replace(/^www\./i, "").toLowerCase(); -} - -function getDomain(result: SearchResult): string { - if (result.domain) return normalizeDomain(result.domain); +const TRACKING_PARAMS = new Set([ + "utm_source", + "utm_medium", + "utm_campaign", + "utm_term", + "utm_content", + "fbclid", + "gclid" +]); + +function normalizeDomain(url: string, existing?: string): string { + if (existing && existing.trim()) return existing.trim().toLowerCase(); try { - return normalizeDomain(new URL(result.url).hostname); + const hostname = new URL(url).hostname.toLowerCase(); + return hostname.startsWith("www.") ? hostname.slice(4) : hostname; } catch { return ""; } } -function canonicalizeUrl(url: string): string { +export function canonicalizeUrl(rawUrl: string): string { try { - const parsed = new URL(url); - parsed.hash = ""; - parsed.search = ""; - parsed.pathname = parsed.pathname.replace(/\/+$/, ""); - return `${normalizeDomain(parsed.hostname)}${parsed.pathname}`.toLowerCase(); + const url = new URL(rawUrl); + url.hash = ""; + + const kept = new URLSearchParams(); + for (const [key, value] of url.searchParams.entries()) { + if (!TRACKING_PARAMS.has(key.toLowerCase())) { + kept.append(key, value); + } + } + + url.search = kept.toString(); + if (url.pathname !== "/" && url.pathname.endsWith("/")) { + url.pathname = url.pathname.slice(0, -1); + } + + return url.toString(); } catch { - return url.trim().toLowerCase(); + return rawUrl.trim(); } } -export function getRecencyBoost(publishedAt?: string, now = new Date()): number { +function isLowQuality(result: SearchResult): boolean { + const titleLen = result.title.trim().length; + const snippetLen = result.snippet.trim().length; + + if (!result.url || titleLen < 8) return true; + if (snippetLen < 25) return true; + if (/^(home|index|untitled)$/i.test(result.title.trim())) return true; + + return false; +} + +function recencyBoost(publishedAt: string | undefined, now: Date): number { if (!publishedAt) return 0; - const publishedMs = Date.parse(publishedAt); - if (Number.isNaN(publishedMs)) return 0; - const ageDays = (now.getTime() - publishedMs) / (1000 * 60 * 60 * 24); + const timestamp = new Date(publishedAt).getTime(); + if (Number.isNaN(timestamp)) return 0; + + const ageDays = (now.getTime() - timestamp) / (1000 * 60 * 60 * 24); - if (ageDays < 0) return 0.18; + if (ageDays < 0) return 0.02; if (ageDays <= 1) return 0.25; if (ageDays <= 7) return 0.18; - if (ageDays <= 30) return 0.1; - if (ageDays <= 90) return 0.05; - if (ageDays <= 365) return 0.02; + if (ageDays <= 30) return 0.12; + if (ageDays <= 90) return 0.06; return 0; } -export function qualityScore(result: SearchResult): number { - const titleLen = result.title.trim().length; - const descLen = result.description.trim().length; - const hasValidUrl = /^https?:\/\//i.test(result.url); - - const titleScore = Math.max(0, Math.min(1, titleLen / 80)); - const descScore = Math.max(0, Math.min(1, descLen / 180)); - const urlBonus = hasValidUrl ? 0.05 : -0.15; - const domainPenalty = result.domain ? 0 : 0.05; +function qualityScore(result: SearchResult): number { + const titleScore = Math.min(1, result.title.trim().length / 90) * 0.25; + const snippetScore = Math.min(1, result.snippet.trim().length / 260) * 0.5; + const httpsScore = result.url.startsWith("https://") ? 0.05 : 0; + const domainScore = result.domain ? 0.1 : 0; - const score = titleScore * 0.45 + descScore * 0.5 + urlBonus - domainPenalty; - return Math.max(0, Math.min(1, score)); + return titleScore + snippetScore + httpsScore + domainScore; } -export function isLowQuality(result: SearchResult, minQualityScore = 0.3): boolean { - if (!result.title || !result.url || !result.description) return true; - if (result.title.trim().length < 6) return true; - if (result.description.trim().length < 20) return true; - return qualityScore(result) < minQualityScore; -} - -export function rankResults( - results: SearchResult[], - options: RankOptions = {}, -): RankedSearchResult[] { - const maxPerDomain = Math.max(1, options.maxPerDomain ?? 2); - const maxResults = Math.max(1, options.maxResults ?? 10); - const minQuality = options.minQualityScore ?? 0.3; +export function rankResults(results: SearchResult[], options: RankOptions = {}): RankedSearchResult[] { + const maxPerDomain = options.maxPerDomain ?? 2; + const minQualityScore = options.minQualityScore ?? 0.35; const now = options.now ?? new Date(); - const seenCanonical = new Set(); - const prepared: RankedSearchResult[] = []; - - for (const item of results) { - const domain = getDomain(item); - const candidate: SearchResult = { ...item, domain }; + const seenUrls = new Set(); + const filtered: SearchResult[] = []; - if (isLowQuality(candidate, minQuality)) continue; + for (const result of results) { + if (isLowQuality(result)) continue; - const canonical = canonicalizeUrl(candidate.url); - if (seenCanonical.has(canonical)) continue; - seenCanonical.add(canonical); + const canonicalUrl = canonicalizeUrl(result.url); + if (seenUrls.has(canonicalUrl)) continue; - const recency = getRecencyBoost(candidate.publishedAt, now); - const qScore = qualityScore(candidate); - const base = candidate.score ?? 0.5; - const rank = Math.max(0, Math.min(1.5, base * 0.65 + qScore * 0.25 + recency)); - - prepared.push({ - ...candidate, - rank, - recencyBoost: recency, - qualityScore: qScore, - score: rank, + seenUrls.add(canonicalUrl); + filtered.push({ + ...result, + domain: normalizeDomain(result.url, result.domain) }); } - prepared.sort((a, b) => b.rank - a.rank); - - const domainCounts = new Map(); + const scored: RankedSearchResult[] = filtered + .map((result) => { + const score = qualityScore(result) + recencyBoost(result.publishedAt, now); + return { + ...result, + score: Number(score.toFixed(6)) + }; + }) + .filter((result) => result.score >= minQualityScore) + .sort((a, b) => b.score - a.score); + + const perDomainCount = new Map(); const output: RankedSearchResult[] = []; - for (const row of prepared) { - const domain = row.domain || ""; - const current = domainCounts.get(domain) ?? 0; - if (current >= maxPerDomain) continue; + for (const result of scored) { + const domain = result.domain || "unknown"; + const current = perDomainCount.get(domain) ?? 0; - domainCounts.set(domain, current + 1); - output.push(row); + if (current >= maxPerDomain) { + continue; + } - if (output.length >= maxResults) break; + perDomainCount.set(domain, current + 1); + output.push(result); } return output; } -export type { SearchResult }; -export default rankResults; \ No newline at end of file +export default { + rankResults, + canonicalizeUrl +}; \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index d657b73..6c9bbcc 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,291 +1,240 @@ -import type { SearchResult } from "./types"; +import type { SearchResult } from "./brave"; -export interface SynthTokens { +export interface SynthTokenUsage { in: number; out: number; } -export interface SynthesisResult { +export interface SynthResult { answer: string; confidence: number; - tokens: SynthTokens; + tokens: SynthTokenUsage; model: string; } -export interface SynthesizeOptions { +export interface SynthOptions { apiKey?: string; + model?: string; endpoint?: string; + temperature?: number; + maxOutputTokens?: number; + signal?: AbortSignal; + fetchImpl?: typeof fetch; +} + +interface OpenAIChatResponse { model?: string; - timeoutMs?: number; - maxInputResults?: number; + choices?: Array<{ + message?: { + content?: string; + }; + }>; + usage?: { + prompt_tokens?: number; + completion_tokens?: number; + }; } export class SynthError extends Error { - status: number; - code: string; - details?: unknown; + public readonly status: number; + public readonly code: string; - constructor(message: string, status: number, code: string, details?: unknown) { + constructor(message: string, status: number, code: string) { super(message); this.name = "SynthError"; this.status = status; this.code = code; - this.details = details; } } const DEFAULT_MODEL = process.env.SYNTH_MODEL ?? "gpt-4o-mini"; -const DEFAULT_ENDPOINT = - process.env.OPENAI_API_ENDPOINT ?? "https://api.openai.com/v1/chat/completions"; +const DEFAULT_ENDPOINT = process.env.OPENAI_API_ENDPOINT ?? "https://api.openai.com/v1/chat/completions"; const SYSTEM_PROMPT = [ - "You are Queryx synthesis engine for downstream agents.", - "Answer strictly from provided sources.", - "Output strict JSON object only:", - '{"answer":"string","confidence":number}', + "You are Queryx synthesis engine.", + "Your audience is autonomous agents that need concise, factual summaries from search results.", "Rules:", - "- Keep answer concise and factual.", - "- Include uncertainty when sources are weak.", - "- confidence must be between 0 and 1.", + "1) Return strict JSON only: {\"answer\":\"string\",\"confidence\":number}.", + "2) Answer in 2-5 short sentences. No markdown.", + "3) Confidence must be between 0 and 1.", + "4) If evidence is weak or conflicting, say so and lower confidence." ].join("\n"); -export function clampConfidence(value: number): number { +function clampConfidence(value: number): number { if (!Number.isFinite(value)) return 0; - return Math.max(0, Math.min(1, value)); + return Math.min(1, Math.max(0, value)); } -export function estimateTokenCount(text: string): number { - const cleaned = text.trim(); - if (!cleaned) return 0; - return Math.max(1, Math.ceil(cleaned.length / 4)); +function estimateTokens(text: string): number { + const trimmed = text.trim(); + if (!trimmed) return 1; + return Math.max(1, Math.ceil(trimmed.length / 4)); } -function normalizeDomain(input: string): string { - return input.replace(/^www\./i, "").toLowerCase(); +function stripCodeFence(content: string): string { + const fenced = content.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); + if (fenced?.[1]) return fenced[1].trim(); + return content.trim(); } -function domainFromUrl(url: string): string { +function parseModelJson(content: string): Record { + const stripped = stripCodeFence(content); + try { - return normalizeDomain(new URL(url).hostname); + return JSON.parse(stripped) as Record; } catch { - return ""; + const objectMatch = stripped.match(/\{[\s\S]*\}/); + if (!objectMatch) return {}; + try { + return JSON.parse(objectMatch[0]) as Record; + } catch { + return {}; + } } } -function isRecent(publishedAt?: string, withinDays = 30): boolean { - if (!publishedAt) return false; - const ms = Date.parse(publishedAt); - if (Number.isNaN(ms)) return false; - const ageMs = Date.now() - ms; - return ageMs >= 0 && ageMs <= withinDays * 24 * 60 * 60 * 1000; +function fallbackAnswer(query: string, results: SearchResult[]): string { + if (!results.length) { + return `No reliable sources were found for "${query}".`; + } + + const top = results.slice(0, 3); + const bulletText = top + .map((r) => r.snippet || r.title) + .filter(Boolean) + .join(" ") + .trim(); + + if (!bulletText) { + return `Found ${results.length} sources for "${query}", but snippets were too sparse to synthesize strongly.`; + } + + return bulletText.slice(0, 600); } -export function computeHeuristicConfidence(results: SearchResult[]): number { - if (results.length === 0) return 0.12; +function heuristicConfidence(query: string, results: SearchResult[], answer: string): number { + if (!results.length) return 0.1; - const capped = results.slice(0, 8); - const coverage = Math.min(1, capped.length / 6); + const uniqueDomains = new Set(results.map((r) => r.domain).filter(Boolean)).size; + const domainScore = Math.min(1, uniqueDomains / Math.min(results.length, 6)) * 0.2; - const domains = new Set(); - let recentCount = 0; - for (const r of capped) { - domains.add(normalizeDomain(r.domain || domainFromUrl(r.url))); - if (isRecent(r.publishedAt, 30)) recentCount += 1; - } + const snippetCoverage = + results.filter((r) => (r.snippet ?? "").trim().length >= 40).length / Math.max(1, results.length); + const snippetScore = snippetCoverage * 0.3; - const diversity = Math.min(1, domains.size / Math.max(1, Math.min(capped.length, 4))); - const recency = recentCount / capped.length; + const now = Date.now(); + const recentCount = results.filter((r) => { + if (!r.publishedAt) return false; + const ts = new Date(r.publishedAt).getTime(); + if (Number.isNaN(ts)) return false; + const ageDays = (now - ts) / (1000 * 60 * 60 * 24); + return ageDays <= 30; + }).length; + const recencyScore = (recentCount / Math.max(1, results.length)) * 0.2; - return clampConfidence(0.1 + coverage * 0.45 + diversity * 0.25 + recency * 0.2); + const volumeScore = Math.min(1, results.length / 8) * 0.2; + const answerScore = Math.min(1, answer.trim().length / 240) * 0.1; + + const uncertaintyPenalty = query.trim().length < 3 ? 0.15 : 0; + + return clampConfidence(domainScore + snippetScore + recencyScore + volumeScore + answerScore - uncertaintyPenalty); +} + +function parseConfidence(value: unknown): number | undefined { + if (typeof value === "number") return value; + if (typeof value === "string") { + const n = Number(value); + if (Number.isFinite(n)) return n; + } + return undefined; } -function buildUserPayload(query: string, results: SearchResult[]): string { - const compact = results.map((r, index) => ({ - id: index + 1, +function createUserPrompt(query: string, results: SearchResult[]): string { + const compactSources = results.slice(0, 10).map((r) => ({ title: r.title, url: r.url, + snippet: r.snippet, domain: r.domain, - publishedAt: r.publishedAt ?? null, - snippet: r.description, + publishedAt: r.publishedAt })); return JSON.stringify( { query, - sources: compact, - expected_schema: { - answer: "string", - confidence: "number 0..1", - }, + sources: compactSources }, null, - 2, + 2 ); } -function extractMessageContent(payload: any): string { - const messageContent = payload?.choices?.[0]?.message?.content; - if (typeof messageContent === "string") return messageContent; - - if (Array.isArray(messageContent)) { - const parts = messageContent - .map((part) => { - if (!part || typeof part !== "object") return ""; - if (typeof part.text === "string") return part.text; - if (typeof part.content === "string") return part.content; - return ""; - }) - .filter(Boolean); - if (parts.length > 0) return parts.join("\n"); - } - - if (typeof payload?.output_text === "string") return payload.output_text; - - return ""; -} - -function parseAssistantJson(content: string): { answer?: string; confidence?: number } | undefined { - const stripped = content - .trim() - .replace(/^```json\s*/i, "") - .replace(/^```/, "") - .replace(/```$/, "") - .trim(); - - const start = stripped.indexOf("{"); - const end = stripped.lastIndexOf("}"); - if (start === -1 || end === -1 || end <= start) return undefined; - - const jsonSegment = stripped.slice(start, end + 1); - try { - const parsed = JSON.parse(jsonSegment) as { answer?: string; confidence?: number }; - return parsed; - } catch { - return undefined; - } -} - -async function fetchWithTimeout( - input: string, - init: RequestInit, - timeoutMs: number, -): Promise { - const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), timeoutMs); - - try { - return await fetch(input, { - ...init, - signal: controller.signal, - }); - } finally { - clearTimeout(timeout); - } -} - -export async function synthesize( - query: string, - results: SearchResult[], - options: SynthesizeOptions = {}, -): Promise { - const trimmedQuery = query.trim(); - const limitedResults = results.slice(0, options.maxInputResults ?? 8); - - const heuristicConfidence = computeHeuristicConfidence(limitedResults); - if (!trimmedQuery || limitedResults.length === 0) { - const answer = "Insufficient evidence in current sources to provide a confident answer."; - return { - answer, - confidence: clampConfidence(heuristicConfidence * 0.8), - tokens: { - in: estimateTokenCount(trimmedQuery), - out: estimateTokenCount(answer), - }, - model: options.model ?? DEFAULT_MODEL, - }; - } - +export async function synthesize(query: string, results: SearchResult[], options: SynthOptions = {}): Promise { const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; if (!apiKey) { - throw new SynthError("Missing synthesis API key", 401, "MISSING_API_KEY"); + throw new SynthError("Missing OPENAI_API_KEY", 401, "MISSING_API_KEY"); } + const fetchImpl = options.fetchImpl ?? fetch; const endpoint = options.endpoint ?? DEFAULT_ENDPOINT; const model = options.model ?? DEFAULT_MODEL; - const timeoutMs = Math.max(1000, options.timeoutMs ?? 20_000); - - const userPayload = buildUserPayload(trimmedQuery, limitedResults); - - let response: Response; - try { - response = await fetchWithTimeout( - endpoint, - { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - }, - body: JSON.stringify({ - model, - temperature: 0.2, - response_format: { type: "json_object" }, - messages: [ - { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: userPayload }, - ], - }), - }, - timeoutMs, - ); - } catch (error) { - throw new SynthError( - error instanceof Error ? error.message : "Synthesis request failed", - 0, - "NETWORK_ERROR", - error, - ); - } - - const rawText = await response.text(); - let payload: any; - try { - payload = rawText ? JSON.parse(rawText) : {}; - } catch { - payload = {}; - } + const temperature = options.temperature ?? 0.2; + const userPrompt = createUserPrompt(query, results); + + const response = await fetchImpl(endpoint, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json" + }, + signal: options.signal, + body: JSON.stringify({ + model, + temperature, + max_tokens: options.maxOutputTokens ?? 350, + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: userPrompt } + ] + }) + }); if (!response.ok) { - const message = payload?.error?.message || `Synthesis API error (${response.status})`; - throw new SynthError(message, response.status, "UPSTREAM_ERROR", payload); + const body = await response.text(); + throw new SynthError(body || `Synthesis request failed (${response.status})`, response.status, "HTTP_ERROR"); } - const rawContent = extractMessageContent(payload); - const parsed = parseAssistantJson(rawContent); + const payload = (await response.json()) as OpenAIChatResponse; + const content = payload.choices?.[0]?.message?.content ?? ""; + const parsed = parseModelJson(content); - const answer = (parsed?.answer || rawContent || "").trim() || "No answer produced."; - const modelConfidence = - typeof parsed?.confidence === "number" ? clampConfidence(parsed.confidence) : undefined; + const answer = (typeof parsed.answer === "string" && parsed.answer.trim().length > 0 + ? parsed.answer + : fallbackAnswer(query, results) + ).trim(); - const confidence = - modelConfidence === undefined - ? heuristicConfidence - : clampConfidence(modelConfidence * 0.7 + heuristicConfidence * 0.3); + const parsedConfidence = parseConfidence(parsed.confidence); + const confidence = clampConfidence( + parsedConfidence ?? heuristicConfidence(query, results, answer) + ); - const inputTextForEstimate = `${SYSTEM_PROMPT}\n${userPayload}`; - const inTokens = Number(payload?.usage?.prompt_tokens) || estimateTokenCount(inputTextForEstimate); - const outTokens = Number(payload?.usage?.completion_tokens) || estimateTokenCount(answer); + const tokensIn = + payload.usage?.prompt_tokens ?? estimateTokens(`${SYSTEM_PROMPT}\n${userPrompt}`); + const tokensOut = payload.usage?.completion_tokens ?? estimateTokens(answer); return { answer, - confidence: clampConfidence(confidence), + confidence, tokens: { - in: Math.max(0, inTokens), - out: Math.max(0, outTokens), + in: tokensIn, + out: tokensOut }, - model: typeof payload?.model === "string" ? payload.model : model, + model: payload.model ?? model }; } -export type { SearchResult }; -export default synthesize; \ No newline at end of file +export { clampConfidence, heuristicConfidence, estimateTokens }; + +export default { + synthesize +}; \ No newline at end of file diff --git a/test/logic/brave.test.ts b/test/logic/brave.test.ts new file mode 100644 index 0000000..ed0106d --- /dev/null +++ b/test/logic/brave.test.ts @@ -0,0 +1,83 @@ +import { afterEach, describe, expect, it } from "bun:test"; +import { BraveApiError, searchBrave } from "../../src/logic/brave"; + +const originalFetch = globalThis.fetch; + +function mockFetch( + handler: (input: RequestInfo | URL, init?: RequestInit) => Promise | Response +): void { + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + return handler(input, init); + }) as typeof fetch; +} + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("logic/brave", () => { + it("normalises Brave response into SearchResult[] and passes freshness param", async () => { + let calledUrl = ""; + + mockFetch((input) => { + calledUrl = typeof input === "string" ? input : input.toString(); + return new Response( + JSON.stringify({ + web: { + results: [ + { + title: "Queryx launch notes", + url: "https://www.example.com/blog/queryx", + description: "Queryx data layer is now live.", + page_age: "2026-03-03T12:00:00Z" + } + ] + } + }), + { status: 200, headers: { "content-type": "application/json" } } + ); + }); + + const results = await searchBrave("queryx", { + apiKey: "test-api-key", + freshness: "week" + }); + + expect(results).toHaveLength(1); + expect(results[0]).toMatchObject({ + title: "Queryx launch notes", + url: "https://www.example.com/blog/queryx", + snippet: "Queryx data layer is now live.", + domain: "example.com", + source: "brave" + }); + expect(results[0].publishedAt).toBe("2026-03-03T12:00:00.000Z"); + + const url = new URL(calledUrl); + expect(url.searchParams.get("freshness")).toBe("pw"); + expect(url.searchParams.get("q")).toBe("queryx"); + }); + + it("throws BraveApiError on rate limit with retry metadata", async () => { + mockFetch(() => { + return new Response(JSON.stringify({ error: { message: "Too many requests" } }), { + status: 429, + headers: { + "content-type": "application/json", + "retry-after": "15" + } + }); + }); + + try { + await searchBrave("queryx", { apiKey: "test-api-key" }); + throw new Error("Expected searchBrave to throw"); + } catch (error) { + expect(error).toBeInstanceOf(BraveApiError); + const typed = error as BraveApiError; + expect(typed.status).toBe(429); + expect(typed.code).toBe("RATE_LIMITED"); + expect(typed.retryAfterSeconds).toBe(15); + } + }); +}); \ No newline at end of file diff --git a/test/logic/cache.test.ts b/test/logic/cache.test.ts new file mode 100644 index 0000000..ff9b0cf --- /dev/null +++ b/test/logic/cache.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from "bun:test"; +import { InMemoryCache } from "../../src/logic/cache"; + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +describe("logic/cache", () => { + it("expires entries after TTL", async () => { + const cache = new InMemoryCache(0.02); // 20ms + cache.set("Queryx cache", "value"); + + expect(cache.get("Queryx cache")).toBe("value"); + await sleep(30); + expect(cache.get("Queryx cache")).toBeUndefined(); + + const s = cache.stats(); + expect(s.hits).toBe(1); + expect(s.misses).toBe(1); + }); + + it("tracks hit/miss stats and hit rate", () => { + const cache = new InMemoryCache(60); + + expect(cache.get("missing")).toBeUndefined(); // miss + cache.set("hello world", "ok"); + expect(cache.get("hello world")).toBe("ok"); // hit + + const s = cache.stats(); + expect(s.hits).toBe(1); + expect(s.misses).toBe(1); + expect(s.hitRate).toBe(0.5); + expect(s.size).toBe(1); + }); +}); \ No newline at end of file diff --git a/test/logic/synth.test.ts b/test/logic/synth.test.ts new file mode 100644 index 0000000..f28d7a7 --- /dev/null +++ b/test/logic/synth.test.ts @@ -0,0 +1,98 @@ +import { afterEach, describe, expect, it } from "bun:test"; +import type { SearchResult } from "../../src/logic/brave"; +import { synthesize } from "../../src/logic/synth"; + +const originalFetch = globalThis.fetch; + +function mockFetch( + handler: (input: RequestInfo | URL, init?: RequestInit) => Promise | Response +): void { + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + return handler(input, init); + }) as typeof fetch; +} + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("logic/synth", () => { + const sampleResults: SearchResult[] = [ + { + title: "Queryx architecture", + url: "https://docs.queryx.ai/architecture", + snippet: "Queryx uses Brave search retrieval, source ranking, and synthesis over curated snippets.", + domain: "docs.queryx.ai", + publishedAt: "2026-03-03T10:00:00.000Z", + source: "brave" + }, + { + title: "Queryx API changes", + url: "https://blog.queryx.ai/api-update", + snippet: "The latest update adds cache TTL controls and confidence-calibrated answer synthesis.", + domain: "blog.queryx.ai", + publishedAt: "2026-03-02T10:00:00.000Z", + source: "brave" + } + ]; + + it("clamps confidence and uses explicit token usage from API response", async () => { + mockFetch((_input, init) => { + expect(init?.method).toBe("POST"); + + return new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: "{\"answer\":\"Queryx combines search, ranking, caching, and concise synthesis.\",\"confidence\":1.7}" + } + } + ], + usage: { + prompt_tokens: 120, + completion_tokens: 32 + } + }), + { status: 200, headers: { "content-type": "application/json" } } + ); + }); + + const output = await synthesize("How does Queryx work?", sampleResults, { + apiKey: "test-openai-key" + }); + + expect(output.answer).toContain("Queryx"); + expect(output.confidence).toBe(1); + expect(output.tokens).toEqual({ in: 120, out: 32 }); + expect(output.model).toBe("gpt-4o-mini"); + }); + + it("estimates tokens when usage is absent and computes confidence within [0,1]", async () => { + mockFetch(() => { + return new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: "{\"answer\":\"Queryx aggregates multiple sources and returns concise factual output.\"}" + } + } + ] + }), + { status: 200, headers: { "content-type": "application/json" } } + ); + }); + + const output = await synthesize("Queryx summary", sampleResults, { + apiKey: "test-openai-key" + }); + + expect(output.tokens.in).toBeGreaterThan(0); + expect(output.tokens.out).toBeGreaterThan(0); + expect(output.confidence).toBeGreaterThanOrEqual(0); + expect(output.confidence).toBeLessThanOrEqual(1); + }); +}); \ No newline at end of file From 4777164139c363725a9fc95690ad27b182244b4e Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:46:45 +0000 Subject: [PATCH 11/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 289 ++++++++++++++++++++----------------- src/logic/cache.ts | 136 ++++++++--------- src/logic/index.ts | 1 - src/logic/rank.ts | 171 +++++++++------------- src/logic/synth.ts | 353 ++++++++++++++++++++++----------------------- test/brave.test.ts | 102 ++++++------- test/cache.test.ts | 44 +++--- test/rank.test.ts | 91 +++++------- test/synth.test.ts | 86 +++++------ 9 files changed, 588 insertions(+), 685 deletions(-) diff --git a/src/logic/brave.ts b/src/logic/brave.ts index c6b3d02..f2bd59a 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -4,241 +4,258 @@ export interface SearchResult { title: string; url: string; snippet: string; - domain: string; + sourceDomain: string; publishedAt?: string; - source: "brave"; + language?: string; } export interface BraveSearchOptions { apiKey?: string; - freshness?: Freshness; count?: number; offset?: number; - endpoint?: string; + freshness?: Freshness; + country?: string; + searchLang?: string; + safeSearch?: "off" | "moderate" | "strict"; timeoutMs?: number; signal?: AbortSignal; - fetchImpl?: typeof fetch; + now?: Date; } -export interface BraveSearchResponse { - web?: { - results?: BraveRawResult[]; - }; - results?: BraveRawResult[]; - error?: { message?: string } | string; - message?: string; -} - -export interface BraveRawResult { +interface BraveWebResult { title?: string; url?: string; description?: string; - snippet?: string; - extra_snippets?: string[]; age?: string; page_age?: string; - published?: string; - date?: string; - [key: string]: unknown; + language?: string; + profile?: { + name?: string; + }; } -export class BraveApiError extends Error { - public readonly status: number; +interface BraveResponse { + web?: { + results?: BraveWebResult[]; + }; + error?: { + detail?: string; + message?: string; + }; +} + +export class BraveClientError extends Error { public readonly code: string; + public readonly status?: number; public readonly retryAfterSeconds?: number; - constructor(message: string, status: number, code: string, retryAfterSeconds?: number) { + constructor(message: string, code: string, status?: number, retryAfterSeconds?: number) { super(message); - this.name = "BraveApiError"; - this.status = status; + this.name = "BraveClientError"; this.code = code; + this.status = status; this.retryAfterSeconds = retryAfterSeconds; } } +const BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"; + const FRESHNESS_MAP: Record = { day: "pd", week: "pw", - month: "pm" + month: "pm", }; -const DEFAULT_BRAVE_ENDPOINT = - process.env.BRAVE_API_ENDPOINT ?? "https://api.search.brave.com/res/v1/web/search"; - -const DEFAULT_TIMEOUT_MS = Number(process.env.BRAVE_TIMEOUT_MS ?? "10000"); - -function toDomain(rawUrl: string): string { +function normalizeDomain(url: string): string { try { - const hostname = new URL(rawUrl).hostname.toLowerCase(); + const hostname = new URL(url).hostname.toLowerCase(); return hostname.startsWith("www.") ? hostname.slice(4) : hostname; } catch { return ""; } } -function toIsoDate(value: unknown): string | undefined { - if (typeof value !== "string" || value.trim() === "") { +function parseRelativeAge(age: string, now: Date): string | undefined { + const match = age.trim().toLowerCase().match(/^(\d+)\s+(minute|hour|day|week|month|year)s?\s+ago$/); + if (!match) { return undefined; } - const date = new Date(value); - if (Number.isNaN(date.getTime())) { - return undefined; - } + const value = Number(match[1]); + const unit = match[2]; - return date.toISOString(); -} + const msPerUnit: Record = { + minute: 60_000, + hour: 3_600_000, + day: 86_400_000, + week: 604_800_000, + month: 2_592_000_000, + year: 31_536_000_000, + }; -function parseRetryAfter(value: string | null): number | undefined { - if (!value) return undefined; - const seconds = Number(value); - if (Number.isFinite(seconds) && seconds >= 0) { - return seconds; + const ms = msPerUnit[unit]; + if (!ms) { + return undefined; } - return undefined; + + return new Date(now.getTime() - value * ms).toISOString(); } -function parseErrorMessage(rawBody: string, status: number): string { - if (!rawBody) { - return `Brave API request failed (${status})`; +function parsePublishedAt(item: BraveWebResult, now: Date): string | undefined { + if (item.page_age) { + const d = new Date(item.page_age); + if (!Number.isNaN(d.getTime())) { + return d.toISOString(); + } } - try { - const parsed = JSON.parse(rawBody) as { error?: { message?: string } | string; message?: string }; - if (typeof parsed.error === "string") return parsed.error; - if (parsed.error && typeof parsed.error === "object" && typeof parsed.error.message === "string") { - return parsed.error.message; + if (item.age) { + const d = new Date(item.age); + if (!Number.isNaN(d.getTime())) { + return d.toISOString(); } - if (typeof parsed.message === "string") return parsed.message; - } catch { - return rawBody; + return parseRelativeAge(item.age, now); } - return `Brave API request failed (${status})`; + return undefined; } -function normalizeResult(raw: BraveRawResult): SearchResult | null { - const title = (raw.title ?? "").toString().trim(); - const url = (raw.url ?? "").toString().trim(); - const snippet = (raw.description ?? raw.snippet ?? "").toString().trim(); +function normalizeResult(item: BraveWebResult, now: Date): SearchResult | null { + const title = item.title?.trim() ?? ""; + const url = item.url?.trim() ?? ""; + const snippet = item.description?.trim() ?? ""; - if (!title || !url) { + if (!title || !url || !snippet) { return null; } - const extraSnippets = Array.isArray(raw.extra_snippets) - ? raw.extra_snippets.filter((v): v is string => typeof v === "string" && v.trim().length > 0) - : []; - - const mergedSnippet = [snippet, ...extraSnippets].filter(Boolean).join(" ").trim(); - - const publishedAt = - toIsoDate(raw.page_age) ?? toIsoDate(raw.age) ?? toIsoDate(raw.published) ?? toIsoDate(raw.date); + const sourceDomain = normalizeDomain(url); + if (!sourceDomain) { + return null; + } return { title, url, - snippet: mergedSnippet, - domain: toDomain(url), - publishedAt, - source: "brave" + snippet, + sourceDomain, + publishedAt: parsePublishedAt(item, now), + language: item.language, }; } -function buildEndpoint(query: string, options: BraveSearchOptions): string { - const endpoint = options.endpoint ?? DEFAULT_BRAVE_ENDPOINT; - const freshness = options.freshness ? FRESHNESS_MAP[options.freshness] : undefined; - const count = Math.min(Math.max(options.count ?? 10, 1), 20); - const offset = Math.max(options.offset ?? 0, 0); - - const url = new URL(endpoint); - url.searchParams.set("q", query); - url.searchParams.set("count", String(count)); - if (offset > 0) url.searchParams.set("offset", String(offset)); - if (freshness) url.searchParams.set("freshness", freshness); +async function safeReadErrorMessage(response: Response): Promise { + try { + const text = await response.text(); + if (!text) { + return undefined; + } - return url.toString(); + try { + const parsed = JSON.parse(text) as BraveResponse; + return parsed.error?.detail ?? parsed.error?.message ?? text; + } catch { + return text; + } + } catch { + return undefined; + } } export async function searchBrave(query: string, options: BraveSearchOptions = {}): Promise { - const normalizedQuery = query.trim(); - if (!normalizedQuery) { + const trimmedQuery = query.trim(); + if (!trimmedQuery) { return []; } const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; if (!apiKey) { - throw new BraveApiError("Missing BRAVE_API_KEY", 401, "MISSING_API_KEY"); + throw new BraveClientError("Missing BRAVE_API_KEY.", "missing_api_key"); + } + + const params = new URLSearchParams({ + q: trimmedQuery, + count: String(options.count ?? 10), + offset: String(options.offset ?? 0), + country: options.country ?? "US", + search_lang: options.searchLang ?? "en", + safesearch: options.safeSearch ?? "moderate", + }); + + if (options.freshness) { + params.set("freshness", FRESHNESS_MAP[options.freshness]); } - const fetchImpl = options.fetchImpl ?? fetch; - const timeoutMs = Number.isFinite(options.timeoutMs) ? (options.timeoutMs as number) : DEFAULT_TIMEOUT_MS; + const endpoint = `${BRAVE_SEARCH_ENDPOINT}?${params.toString()}`; + const timeoutMs = options.timeoutMs ?? 10_000; + const now = options.now ?? new Date(); + const controller = new AbortController(); const timeout = setTimeout(() => controller.abort("timeout"), timeoutMs); + let externalAbortHandler: (() => void) | undefined; if (options.signal) { - options.signal.addEventListener( - "abort", - () => { - controller.abort(options.signal?.reason); - }, - { once: true } - ); + externalAbortHandler = () => controller.abort(options.signal?.reason ?? "aborted"); + if (options.signal.aborted) { + externalAbortHandler(); + } else { + options.signal.addEventListener("abort", externalAbortHandler, { once: true }); + } } try { - const endpoint = buildEndpoint(normalizedQuery, options); - const response = await fetchImpl(endpoint, { + const response = await fetch(endpoint, { method: "GET", headers: { Accept: "application/json", - "X-Subscription-Token": apiKey + "X-Subscription-Token": apiKey, }, - signal: controller.signal + signal: controller.signal, }); - if (response.status === 429) { - const retryAfterSeconds = parseRetryAfter(response.headers.get("retry-after")); - const body = await response.text(); - const message = parseErrorMessage(body, 429); - throw new BraveApiError(message, 429, "RATE_LIMITED", retryAfterSeconds); - } - if (!response.ok) { - const body = await response.text(); - throw new BraveApiError(parseErrorMessage(body, response.status), response.status, "HTTP_ERROR"); + const errorText = await safeReadErrorMessage(response); + if (response.status === 429) { + const retryAfterRaw = response.headers.get("retry-after"); + const retryAfterSeconds = retryAfterRaw ? Number(retryAfterRaw) : undefined; + throw new BraveClientError( + errorText ?? "Brave API rate limit exceeded.", + "rate_limited", + 429, + Number.isFinite(retryAfterSeconds) ? retryAfterSeconds : undefined, + ); + } + + if (response.status === 401 || response.status === 403) { + throw new BraveClientError(errorText ?? "Brave API authentication failed.", "auth_error", response.status); + } + + throw new BraveClientError(errorText ?? "Brave API request failed.", "upstream_error", response.status); } - const payload = (await response.json()) as BraveSearchResponse; - const items = payload.web?.results ?? payload.results ?? []; - if (!Array.isArray(items)) { - return []; - } + const body = (await response.json()) as BraveResponse; + const rawResults = body.web?.results ?? []; - const normalized: SearchResult[] = []; - for (const item of items) { - const result = normalizeResult(item); - if (result) normalized.push(result); - } - - return normalized; + return rawResults + .map((item) => normalizeResult(item, now)) + .filter((item): item is SearchResult => item !== null); } catch (error) { - if (error instanceof BraveApiError) throw error; + if (error instanceof BraveClientError) { + throw error; + } if (error instanceof Error && error.name === "AbortError") { - throw new BraveApiError("Brave API request timed out", 408, "TIMEOUT"); + throw new BraveClientError("Brave request timed out.", "timeout"); } - throw new BraveApiError( - error instanceof Error ? error.message : "Unexpected Brave API error", - 500, - "UNKNOWN" + throw new BraveClientError( + error instanceof Error ? error.message : "Unknown network error while calling Brave API.", + "network_error", ); } finally { clearTimeout(timeout); + if (options.signal && externalAbortHandler) { + options.signal.removeEventListener("abort", externalAbortHandler); + } } -} - -export default { - searchBrave -}; \ No newline at end of file +} \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index 2a381f5..62541e7 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -1,11 +1,10 @@ -import { createHash } from "crypto"; +import { createHash } from "node:crypto"; export interface CacheStats { hits: number; misses: number; hitRate: number; size: number; - ttlSeconds: number; } interface CacheEntry { @@ -13,40 +12,61 @@ interface CacheEntry { expiresAt: number; } -function readDefaultTtlSeconds(): number { - const parsed = Number(process.env.CACHE_TTL_SECONDS ?? "300"); +export interface QueryCacheOptions { + ttlSeconds?: number; + now?: () => number; +} + +function getDefaultTtlSeconds(): number { + const raw = process.env.CACHE_TTL_SECONDS ?? "300"; + const parsed = Number(raw); if (!Number.isFinite(parsed) || parsed <= 0) { return 300; } return parsed; } -export function normalizeQuery(query: string): string { - return query.trim().toLowerCase().replace(/\s+/g, " "); -} - -export function hashQuery(query: string): string { - const normalized = normalizeQuery(query); - return createHash("sha256").update(normalized).digest("hex"); -} - -export class InMemoryCache { +export class QueryCache { private readonly store = new Map>(); - private readonly defaultTtlSeconds: number; + private readonly now: () => number; + private readonly defaultTtlMs: number; + private hits = 0; private misses = 0; - constructor(ttlSeconds: number = readDefaultTtlSeconds()) { - this.defaultTtlSeconds = ttlSeconds > 0 ? ttlSeconds : 300; + constructor(options: QueryCacheOptions = {}) { + this.now = options.now ?? Date.now; + this.defaultTtlMs = Math.max(1, (options.ttlSeconds ?? getDefaultTtlSeconds()) * 1000); + } + + static normalizeQuery(query: string): string { + return query.trim().toLowerCase().replace(/\s+/g, " "); + } + + static hashQuery(query: string): string { + return createHash("sha256").update(QueryCache.normalizeQuery(query)).digest("hex"); } public keyForQuery(query: string): string { - return hashQuery(query); + return QueryCache.hashQuery(query); } - public get(query: string): T | undefined { + public set(query: string, value: T, ttlSeconds?: number): string { const key = this.keyForQuery(query); - return this.getByKey(key); + return this.setByKey(key, value, ttlSeconds); + } + + public setByKey(key: string, value: T, ttlSeconds?: number): string { + const ttlMs = Math.max(1, (ttlSeconds ?? this.defaultTtlMs / 1000) * 1000); + this.store.set(key, { + value, + expiresAt: this.now() + ttlMs, + }); + return key; + } + + public get(query: string): T | undefined { + return this.getByKey(this.keyForQuery(query)); } public getByKey(key: string): T | undefined { @@ -56,7 +76,7 @@ export class InMemoryCache { return undefined; } - if (Date.now() >= entry.expiresAt) { + if (entry.expiresAt <= this.now()) { this.store.delete(key); this.misses += 1; return undefined; @@ -66,31 +86,8 @@ export class InMemoryCache { return entry.value; } - public set(query: string, value: T, ttlSeconds?: number): string { - const key = this.keyForQuery(query); - this.setByKey(key, value, ttlSeconds); - return key; - } - - public setByKey(key: string, value: T, ttlSeconds?: number): void { - const effectiveTtl = ttlSeconds && ttlSeconds > 0 ? ttlSeconds : this.defaultTtlSeconds; - this.store.set(key, { - value, - expiresAt: Date.now() + effectiveTtl * 1000 - }); - } - - public stats(): CacheStats { - this.cleanupExpired(); - const total = this.hits + this.misses; - - return { - hits: this.hits, - misses: this.misses, - hitRate: total === 0 ? 0 : this.hits / total, - size: this.store.size, - ttlSeconds: this.defaultTtlSeconds - }; + public delete(query: string): boolean { + return this.store.delete(this.keyForQuery(query)); } public clear(): void { @@ -99,42 +96,27 @@ export class InMemoryCache { this.misses = 0; } - private cleanupExpired(): void { - const now = Date.now(); + public cleanup(): void { + const now = this.now(); for (const [key, entry] of this.store.entries()) { - if (now >= entry.expiresAt) { + if (entry.expiresAt <= now) { this.store.delete(key); } } } -} - -const sharedCache = new InMemoryCache(); - -export function get(query: string): T | undefined { - return sharedCache.get(query) as T | undefined; -} -export function set(query: string, value: T, ttlSeconds?: number): string { - return sharedCache.set(query, value, ttlSeconds); -} - -export function stats(): CacheStats { - return sharedCache.stats(); -} - -export function clear(): void { - sharedCache.clear(); -} - -export function keyForQuery(query: string): string { - return sharedCache.keyForQuery(query); + public stats(): CacheStats { + this.cleanup(); + const total = this.hits + this.misses; + return { + hits: this.hits, + misses: this.misses, + hitRate: total === 0 ? 0 : this.hits / total, + size: this.store.size, + }; + } } -export default { - get, - set, - stats, - clear, - keyForQuery -}; \ No newline at end of file +export const cache = new QueryCache(); +export const normalizeQuery = QueryCache.normalizeQuery; +export const hashQuery = QueryCache.hashQuery; \ No newline at end of file diff --git a/src/logic/index.ts b/src/logic/index.ts index 4ac3f4a..6351282 100644 --- a/src/logic/index.ts +++ b/src/logic/index.ts @@ -1,4 +1,3 @@ -export * from "./types"; export * from "./brave"; export * from "./synth"; export * from "./cache"; diff --git a/src/logic/rank.ts b/src/logic/rank.ts index 7c86cf3..24b48b3 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,27 +1,20 @@ import type { SearchResult } from "./brave"; export interface RankedSearchResult extends SearchResult { + qualityScore: number; + recencyScore: number; score: number; } export interface RankOptions { - maxPerDomain?: number; - minQualityScore?: number; now?: Date; + maxPerDomain?: number; + limit?: number; + minQuality?: number; + recencyHalfLifeDays?: number; } -const TRACKING_PARAMS = new Set([ - "utm_source", - "utm_medium", - "utm_campaign", - "utm_term", - "utm_content", - "fbclid", - "gclid" -]); - -function normalizeDomain(url: string, existing?: string): string { - if (existing && existing.trim()) return existing.trim().toLowerCase(); +function normalizeDomain(url: string): string { try { const hostname = new URL(url).hostname.toLowerCase(); return hostname.startsWith("www.") ? hostname.slice(4) : hostname; @@ -30,116 +23,94 @@ function normalizeDomain(url: string, existing?: string): string { } } -export function canonicalizeUrl(rawUrl: string): string { - try { - const url = new URL(rawUrl); - url.hash = ""; - - const kept = new URLSearchParams(); - for (const [key, value] of url.searchParams.entries()) { - if (!TRACKING_PARAMS.has(key.toLowerCase())) { - kept.append(key, value); - } - } - - url.search = kept.toString(); - if (url.pathname !== "/" && url.pathname.endsWith("/")) { - url.pathname = url.pathname.slice(0, -1); - } +function qualityScore(result: SearchResult): number { + const titleLen = result.title.trim().length; + const snippetLen = result.snippet.trim().length; + const hasValidUrl = /^https?:\/\//i.test(result.url.trim()); - return url.toString(); - } catch { - return rawUrl.trim(); + if (!titleLen || !snippetLen || !hasValidUrl) { + return 0; } -} -function isLowQuality(result: SearchResult): boolean { - const titleLen = result.title.trim().length; - const snippetLen = result.snippet.trim().length; + const titleComponent = Math.min(titleLen / 80, 1) * 0.35; + const snippetComponent = Math.min(snippetLen / 240, 1) * 0.45; + const urlComponent = 0.2; + + let score = titleComponent + snippetComponent + urlComponent; - if (!result.url || titleLen < 8) return true; - if (snippetLen < 25) return true; - if (/^(home|index|untitled)$/i.test(result.title.trim())) return true; + if (snippetLen < 40) { + score *= 0.6; + } - return false; + if (/\/(tag|category|author)\//i.test(result.url)) { + score *= 0.85; + } + + return Math.max(0, Math.min(1, score)); } -function recencyBoost(publishedAt: string | undefined, now: Date): number { - if (!publishedAt) return 0; +function recencyScore(publishedAt: string | undefined, now: Date, halfLifeDays: number): number { + if (!publishedAt) { + return 0.2; + } - const timestamp = new Date(publishedAt).getTime(); - if (Number.isNaN(timestamp)) return 0; + const ts = new Date(publishedAt).getTime(); + if (Number.isNaN(ts)) { + return 0.2; + } - const ageDays = (now.getTime() - timestamp) / (1000 * 60 * 60 * 24); + const ageMs = Math.max(0, now.getTime() - ts); + const ageDays = ageMs / 86_400_000; + const lambda = Math.log(2) / Math.max(1, halfLifeDays); - if (ageDays < 0) return 0.02; - if (ageDays <= 1) return 0.25; - if (ageDays <= 7) return 0.18; - if (ageDays <= 30) return 0.12; - if (ageDays <= 90) return 0.06; - return 0; + return Math.exp(-lambda * ageDays); } -function qualityScore(result: SearchResult): number { - const titleScore = Math.min(1, result.title.trim().length / 90) * 0.25; - const snippetScore = Math.min(1, result.snippet.trim().length / 260) * 0.5; - const httpsScore = result.url.startsWith("https://") ? 0.05 : 0; - const domainScore = result.domain ? 0.1 : 0; - - return titleScore + snippetScore + httpsScore + domainScore; +function scoreResult(result: SearchResult, now: Date, halfLifeDays: number): RankedSearchResult { + const q = qualityScore(result); + const r = recencyScore(result.publishedAt, now, halfLifeDays); + const score = q * 0.7 + r * 0.3; + + return { + ...result, + sourceDomain: result.sourceDomain || normalizeDomain(result.url), + qualityScore: q, + recencyScore: r, + score, + }; } export function rankResults(results: SearchResult[], options: RankOptions = {}): RankedSearchResult[] { - const maxPerDomain = options.maxPerDomain ?? 2; - const minQualityScore = options.minQualityScore ?? 0.35; const now = options.now ?? new Date(); + const maxPerDomain = options.maxPerDomain ?? 2; + const limit = options.limit ?? 10; + const minQuality = options.minQuality ?? 0.3; + const halfLifeDays = options.recencyHalfLifeDays ?? 14; - const seenUrls = new Set(); - const filtered: SearchResult[] = []; - - for (const result of results) { - if (isLowQuality(result)) continue; - - const canonicalUrl = canonicalizeUrl(result.url); - if (seenUrls.has(canonicalUrl)) continue; - - seenUrls.add(canonicalUrl); - filtered.push({ - ...result, - domain: normalizeDomain(result.url, result.domain) - }); - } + const scored = results + .map((r) => scoreResult(r, now, halfLifeDays)) + .filter((r) => r.qualityScore >= minQuality && Boolean(r.sourceDomain)); - const scored: RankedSearchResult[] = filtered - .map((result) => { - const score = qualityScore(result) + recencyBoost(result.publishedAt, now); - return { - ...result, - score: Number(score.toFixed(6)) - }; - }) - .filter((result) => result.score >= minQualityScore) - .sort((a, b) => b.score - a.score); + scored.sort((a, b) => b.score - a.score); - const perDomainCount = new Map(); - const output: RankedSearchResult[] = []; + const domainCounts = new Map(); + const ranked: RankedSearchResult[] = []; for (const result of scored) { - const domain = result.domain || "unknown"; - const current = perDomainCount.get(domain) ?? 0; + const domain = result.sourceDomain; + const count = domainCounts.get(domain) ?? 0; - if (current >= maxPerDomain) { + if (count >= maxPerDomain) { continue; } - perDomainCount.set(domain, current + 1); - output.push(result); - } + domainCounts.set(domain, count + 1); + ranked.push(result); - return output; -} + if (ranked.length >= limit) { + break; + } + } -export default { - rankResults, - canonicalizeUrl -}; \ No newline at end of file + return ranked; +} \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index 6c9bbcc..19c8c6a 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,240 +1,237 @@ import type { SearchResult } from "./brave"; -export interface SynthTokenUsage { +export interface SynthTokens { in: number; out: number; } -export interface SynthResult { +export interface SynthesisResult { answer: string; confidence: number; - tokens: SynthTokenUsage; + tokens: SynthTokens; model: string; } export interface SynthOptions { apiKey?: string; model?: string; - endpoint?: string; temperature?: number; - maxOutputTokens?: number; + maxTokens?: number; + timeoutMs?: number; signal?: AbortSignal; - fetchImpl?: typeof fetch; +} + +interface OpenAIUsage { + prompt_tokens?: number; + completion_tokens?: number; +} + +interface OpenAIChoice { + message?: { + content?: string | Array<{ type?: string; text?: string }>; + }; } interface OpenAIChatResponse { model?: string; - choices?: Array<{ - message?: { - content?: string; - }; - }>; - usage?: { - prompt_tokens?: number; - completion_tokens?: number; + usage?: OpenAIUsage; + choices?: OpenAIChoice[]; + error?: { + message?: string; }; } -export class SynthError extends Error { - public readonly status: number; +export class SynthesisError extends Error { public readonly code: string; + public readonly status?: number; - constructor(message: string, status: number, code: string) { + constructor(message: string, code: string, status?: number) { super(message); - this.name = "SynthError"; - this.status = status; + this.name = "SynthesisError"; this.code = code; + this.status = status; } } -const DEFAULT_MODEL = process.env.SYNTH_MODEL ?? "gpt-4o-mini"; -const DEFAULT_ENDPOINT = process.env.OPENAI_API_ENDPOINT ?? "https://api.openai.com/v1/chat/completions"; - -const SYSTEM_PROMPT = [ - "You are Queryx synthesis engine.", - "Your audience is autonomous agents that need concise, factual summaries from search results.", - "Rules:", - "1) Return strict JSON only: {\"answer\":\"string\",\"confidence\":number}.", - "2) Answer in 2-5 short sentences. No markdown.", - "3) Confidence must be between 0 and 1.", - "4) If evidence is weak or conflicting, say so and lower confidence." -].join("\n"); - -function clampConfidence(value: number): number { - if (!Number.isFinite(value)) return 0; - return Math.min(1, Math.max(0, value)); -} +const DEFAULT_MODEL = "gpt-4o-mini"; +const OPENAI_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions"; -function estimateTokens(text: string): number { - const trimmed = text.trim(); - if (!trimmed) return 1; - return Math.max(1, Math.ceil(trimmed.length / 4)); -} +const SYSTEM_PROMPT = + "You are Queryx synthesis engine. Return strict JSON only with keys: answer (string), confidence (number 0..1). " + + "Answer must be concise for machine consumers, grounded in provided sources, and avoid speculation."; -function stripCodeFence(content: string): string { - const fenced = content.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); - if (fenced?.[1]) return fenced[1].trim(); - return content.trim(); +function clamp01(value: number): number { + if (!Number.isFinite(value)) { + return 0; + } + if (value < 0) { + return 0; + } + if (value > 1) { + return 1; + } + return value; } -function parseModelJson(content: string): Record { - const stripped = stripCodeFence(content); - - try { - return JSON.parse(stripped) as Record; - } catch { - const objectMatch = stripped.match(/\{[\s\S]*\}/); - if (!objectMatch) return {}; - try { - return JSON.parse(objectMatch[0]) as Record; - } catch { - return {}; - } - } +function estimateTokens(input: string): number { + return Math.max(1, Math.ceil(input.length / 4)); } -function fallbackAnswer(query: string, results: SearchResult[]): string { - if (!results.length) { - return `No reliable sources were found for "${query}".`; +function getChoiceContent(choice: OpenAIChoice | undefined): string { + const content = choice?.message?.content; + if (typeof content === "string") { + return content.trim(); } - const top = results.slice(0, 3); - const bulletText = top - .map((r) => r.snippet || r.title) - .filter(Boolean) - .join(" ") - .trim(); - - if (!bulletText) { - return `Found ${results.length} sources for "${query}", but snippets were too sparse to synthesize strongly.`; + if (Array.isArray(content)) { + return content + .map((part) => part.text ?? "") + .join("") + .trim(); } - return bulletText.slice(0, 600); + return ""; } -function heuristicConfidence(query: string, results: SearchResult[], answer: string): number { - if (!results.length) return 0.1; - - const uniqueDomains = new Set(results.map((r) => r.domain).filter(Boolean)).size; - const domainScore = Math.min(1, uniqueDomains / Math.min(results.length, 6)) * 0.2; - - const snippetCoverage = - results.filter((r) => (r.snippet ?? "").trim().length >= 40).length / Math.max(1, results.length); - const snippetScore = snippetCoverage * 0.3; - - const now = Date.now(); - const recentCount = results.filter((r) => { - if (!r.publishedAt) return false; - const ts = new Date(r.publishedAt).getTime(); - if (Number.isNaN(ts)) return false; - const ageDays = (now - ts) / (1000 * 60 * 60 * 24); - return ageDays <= 30; - }).length; - const recencyScore = (recentCount / Math.max(1, results.length)) * 0.2; - - const volumeScore = Math.min(1, results.length / 8) * 0.2; - const answerScore = Math.min(1, answer.trim().length / 240) * 0.1; - - const uncertaintyPenalty = query.trim().length < 3 ? 0.15 : 0; - - return clampConfidence(domainScore + snippetScore + recencyScore + volumeScore + answerScore - uncertaintyPenalty); +function safeJsonParse(value: string): T | undefined { + try { + return JSON.parse(value) as T; + } catch { + return undefined; + } } -function parseConfidence(value: unknown): number | undefined { - if (typeof value === "number") return value; - if (typeof value === "string") { - const n = Number(value); - if (Number.isFinite(n)) return n; - } - return undefined; +function heuristicConfidence(results: SearchResult[], answer: string): number { + const sourceFactor = Math.min(results.length, 8) / 8; + const answerFactor = Math.min(answer.length, 280) / 280; + const uncertaintyPenalty = /\b(maybe|unclear|not enough information|insufficient)\b/i.test(answer) ? 0.2 : 0; + return clamp01(0.2 + sourceFactor * 0.6 + answerFactor * 0.2 - uncertaintyPenalty); } -function createUserPrompt(query: string, results: SearchResult[]): string { - const compactSources = results.slice(0, 10).map((r) => ({ +function buildUserPrompt(query: string, results: SearchResult[]): string { + const compactSources = results.slice(0, 8).map((r, index) => ({ + id: index + 1, title: r.title, url: r.url, snippet: r.snippet, - domain: r.domain, - publishedAt: r.publishedAt + publishedAt: r.publishedAt ?? null, + domain: r.sourceDomain, })); - return JSON.stringify( - { - query, - sources: compactSources - }, - null, - 2 - ); + return JSON.stringify({ + query, + sources: compactSources, + instructions: [ + "Synthesize a direct answer.", + "If evidence is weak, explicitly say what is missing.", + "Keep answer concise and factual.", + "Output strict JSON only.", + ], + }); } -export async function synthesize(query: string, results: SearchResult[], options: SynthOptions = {}): Promise { +export async function synthesize( + query: string, + results: SearchResult[], + options: SynthOptions = {}, +): Promise { const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; if (!apiKey) { - throw new SynthError("Missing OPENAI_API_KEY", 401, "MISSING_API_KEY"); + throw new SynthesisError("Missing OPENAI_API_KEY.", "missing_api_key"); } - const fetchImpl = options.fetchImpl ?? fetch; - const endpoint = options.endpoint ?? DEFAULT_ENDPOINT; - const model = options.model ?? DEFAULT_MODEL; - const temperature = options.temperature ?? 0.2; - const userPrompt = createUserPrompt(query, results); - - const response = await fetchImpl(endpoint, { - method: "POST", - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json" - }, - signal: options.signal, - body: JSON.stringify({ - model, - temperature, - max_tokens: options.maxOutputTokens ?? 350, - response_format: { type: "json_object" }, - messages: [ - { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: userPrompt } - ] - }) - }); + const model = options.model ?? process.env.OPENAI_MODEL ?? DEFAULT_MODEL; + const endpoint = process.env.OPENAI_BASE_URL ?? OPENAI_CHAT_COMPLETIONS_URL; + const timeoutMs = options.timeoutMs ?? 15_000; + + const userPrompt = buildUserPrompt(query, results); + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort("timeout"), timeoutMs); - if (!response.ok) { - const body = await response.text(); - throw new SynthError(body || `Synthesis request failed (${response.status})`, response.status, "HTTP_ERROR"); + let externalAbortHandler: (() => void) | undefined; + if (options.signal) { + externalAbortHandler = () => controller.abort(options.signal?.reason ?? "aborted"); + if (options.signal.aborted) { + externalAbortHandler(); + } else { + options.signal.addEventListener("abort", externalAbortHandler, { once: true }); + } } - const payload = (await response.json()) as OpenAIChatResponse; - const content = payload.choices?.[0]?.message?.content ?? ""; - const parsed = parseModelJson(content); - - const answer = (typeof parsed.answer === "string" && parsed.answer.trim().length > 0 - ? parsed.answer - : fallbackAnswer(query, results) - ).trim(); - - const parsedConfidence = parseConfidence(parsed.confidence); - const confidence = clampConfidence( - parsedConfidence ?? heuristicConfidence(query, results, answer) - ); - - const tokensIn = - payload.usage?.prompt_tokens ?? estimateTokens(`${SYSTEM_PROMPT}\n${userPrompt}`); - const tokensOut = payload.usage?.completion_tokens ?? estimateTokens(answer); - - return { - answer, - confidence, - tokens: { - in: tokensIn, - out: tokensOut - }, - model: payload.model ?? model - }; -} + try { + const response = await fetch(endpoint, { + method: "POST", + headers: { + "content-type": "application/json", + authorization: `Bearer ${apiKey}`, + }, + signal: controller.signal, + body: JSON.stringify({ + model, + temperature: options.temperature ?? 0.2, + max_tokens: options.maxTokens ?? 350, + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: userPrompt }, + ], + }), + }); + + if (!response.ok) { + let message = "Synthesis upstream request failed."; + try { + const body = (await response.json()) as OpenAIChatResponse; + if (body.error?.message) { + message = body.error.message; + } + } catch { + // ignore parse errors + } + + const code = response.status === 429 ? "rate_limited" : "upstream_error"; + throw new SynthesisError(message, code, response.status); + } -export { clampConfidence, heuristicConfidence, estimateTokens }; + const data = (await response.json()) as OpenAIChatResponse; + const rawContent = getChoiceContent(data.choices?.[0]); + + const parsed = safeJsonParse<{ answer?: string; confidence?: number }>(rawContent); + const answer = (parsed?.answer ?? rawContent).trim(); + const confidence = clamp01( + typeof parsed?.confidence === "number" ? parsed.confidence : heuristicConfidence(results, answer), + ); + + const promptTokens = data.usage?.prompt_tokens ?? estimateTokens(`${SYSTEM_PROMPT}\n${userPrompt}`); + const completionTokens = data.usage?.completion_tokens ?? estimateTokens(answer); + + return { + answer, + confidence, + tokens: { + in: promptTokens, + out: completionTokens, + }, + model: data.model ?? model, + }; + } catch (error) { + if (error instanceof SynthesisError) { + throw error; + } + + if (error instanceof Error && error.name === "AbortError") { + throw new SynthesisError("Synthesis request timed out.", "timeout"); + } -export default { - synthesize -}; \ No newline at end of file + throw new SynthesisError( + error instanceof Error ? error.message : "Unknown synthesis network error.", + "network_error", + ); + } finally { + clearTimeout(timeout); + if (options.signal && externalAbortHandler) { + options.signal.removeEventListener("abort", externalAbortHandler); + } + } +} \ No newline at end of file diff --git a/test/brave.test.ts b/test/brave.test.ts index 02d7449..18a7861 100644 --- a/test/brave.test.ts +++ b/test/brave.test.ts @@ -1,97 +1,77 @@ -import { afterEach, beforeEach, describe, expect, it } from "bun:test"; -import { BraveApiError, BraveRateLimitError, normalizeBraveResponse, searchBrave } from "../src/logic/brave"; +import { afterEach, describe, expect, test } from "bun:test"; +import { BraveClientError, searchBrave } from "../src/logic/brave"; const originalFetch = globalThis.fetch; -const originalApiKey = process.env.BRAVE_API_KEY; -describe("brave.ts", () => { - beforeEach(() => { - process.env.BRAVE_API_KEY = "test-brave-key"; - }); +afterEach(() => { + globalThis.fetch = originalFetch; +}); - afterEach(() => { - (globalThis as { fetch: typeof fetch }).fetch = originalFetch; - if (originalApiKey === undefined) { - delete process.env.BRAVE_API_KEY; - } else { - process.env.BRAVE_API_KEY = originalApiKey; - } - }); +describe("brave.ts", () => { + test("normalises Brave response into SearchResult[] and maps freshness", async () => { + let capturedUrl = ""; - it("normalises results and maps freshness parameter", async () => { - let requestedUrl = ""; + globalThis.fetch = (async (input: RequestInfo | URL) => { + capturedUrl = typeof input === "string" ? input : input.toString(); - (globalThis as { fetch: typeof fetch }).fetch = (async (input: RequestInfo | URL) => { - requestedUrl = String(input); return new Response( JSON.stringify({ web: { results: [ { - title: "Example Result", - url: "https://www.example.com/post", - description: "Useful snippet", - page_age: "2026-03-03T12:00:00.000Z", + title: "Queryx Launch", + url: "https://example.com/post", + description: "Queryx ships fast and clean results.", + page_age: "2026-03-01T12:00:00.000Z", + language: "en", }, { - title: "Invalid Row", + title: "Invalid URL result", url: "not-a-url", - description: "Should be filtered", + description: "This should be filtered out", }, ], }, }), - { status: 200, headers: { "Content-Type": "application/json" } }, + { status: 200, headers: { "content-type": "application/json" } }, ); }) as typeof fetch; - const results = await searchBrave(" test query ", { freshness: "week" }); + const results = await searchBrave(" Queryx ", { + apiKey: "test-key", + freshness: "week", + count: 5, + }); - expect(requestedUrl).toContain("freshness=pw"); - expect(results).toHaveLength(1); - expect(results[0].domain).toBe("example.com"); - expect(results[0].snippet).toBe("Useful snippet"); - expect(results[0].source).toBe("brave"); + expect(capturedUrl.includes("freshness=pw")).toBe(true); + expect(results.length).toBe(1); + expect(results[0]?.title).toBe("Queryx Launch"); + expect(results[0]?.sourceDomain).toBe("example.com"); + expect(results[0]?.publishedAt).toBe("2026-03-01T12:00:00.000Z"); }); - it("throws rate-limit errors with retry-after metadata", async () => { - (globalThis as { fetch: typeof fetch }).fetch = (async () => { - return new Response(JSON.stringify({ error: { message: "Rate limit exceeded" } }), { + test("throws rate limit error with retry-after", async () => { + globalThis.fetch = (async () => { + return new Response(JSON.stringify({ error: { detail: "Rate limit exceeded" } }), { status: 429, - headers: { "Retry-After": "12" }, - }); - }) as typeof fetch; - - let thrown: unknown; - try { - await searchBrave("rate limited"); - } catch (error) { - thrown = error; - } - - expect(thrown).toBeInstanceOf(BraveRateLimitError); - expect((thrown as BraveRateLimitError).retryAfterSeconds).toBe(12); - }); - - it("throws API errors for non-429 failures", async () => { - (globalThis as { fetch: typeof fetch }).fetch = (async () => { - return new Response(JSON.stringify({ message: "Internal server error" }), { - status: 500, + headers: { + "content-type": "application/json", + "retry-after": "12", + }, }); }) as typeof fetch; let thrown: unknown; try { - await searchBrave("server fail"); + await searchBrave("queryx", { apiKey: "test-key" }); } catch (error) { thrown = error; } - expect(thrown).toBeInstanceOf(BraveApiError); - expect((thrown as BraveApiError).status).toBe(500); - }); - - it("normalise helper returns empty array safely", () => { - expect(normalizeBraveResponse({})).toEqual([]); + expect(thrown instanceof BraveClientError).toBe(true); + const err = thrown as BraveClientError; + expect(err.code).toBe("rate_limited"); + expect(err.status).toBe(429); + expect(err.retryAfterSeconds).toBe(12); }); }); \ No newline at end of file diff --git a/test/cache.test.ts b/test/cache.test.ts index 0ff0b47..027b47f 100644 --- a/test/cache.test.ts +++ b/test/cache.test.ts @@ -1,36 +1,30 @@ -import { describe, expect, it } from "bun:test"; -import { createCache } from "../src/logic/cache"; - -function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} +import { describe, expect, test } from "bun:test"; +import { QueryCache } from "../src/logic/cache"; describe("cache.ts", () => { - it("expires entries after TTL", async () => { - const cache = createCache(0.05); + test("expires entries after TTL", async () => { + const cache = new QueryCache({ ttlSeconds: 0.05 }); - cache.set(" Hello World ", "value"); - expect(cache.get("hello world")).toBe("value"); + cache.set(" Queryx API ", "cached"); + expect(cache.get("queryx api")).toBe("cached"); - await sleep(70); - expect(cache.get("hello world")).toBeUndefined(); + await new Promise((resolve) => setTimeout(resolve, 70)); - const s = cache.stats(); - expect(s.hits).toBe(1); - expect(s.misses).toBe(1); + expect(cache.get("queryx api")).toBeUndefined(); }); - it("tracks hit/miss and hit rate", () => { - const cache = createCache(60); + test("tracks hit/miss stats", () => { + const cache = new QueryCache({ ttlSeconds: 30 }); + + cache.set("a", 1); + cache.get("a"); // hit + cache.get("b"); // miss - cache.set("foo", 123); - expect(cache.get("foo")).toBe(123); - expect(cache.get("bar")).toBeUndefined(); + const stats = cache.stats(); - const s = cache.stats(); - expect(s.size).toBe(1); - expect(s.hits).toBe(1); - expect(s.misses).toBe(1); - expect(s.hitRate).toBeCloseTo(0.5, 5); + expect(stats.hits).toBe(1); + expect(stats.misses).toBe(1); + expect(stats.hitRate).toBe(0.5); + expect(stats.size).toBe(1); }); }); \ No newline at end of file diff --git a/test/rank.test.ts b/test/rank.test.ts index 938b5a8..16c90f9 100644 --- a/test/rank.test.ts +++ b/test/rank.test.ts @@ -1,88 +1,67 @@ -import { describe, expect, it } from "bun:test"; +import { describe, expect, test } from "bun:test"; +import type { SearchResult } from "../src/logic/brave"; import { rankResults } from "../src/logic/rank"; -import type { SearchResult } from "../src/logic/types"; describe("rank.ts", () => { - it("deduplicates by canonical URL and caps results per domain", () => { - const now = new Date("2026-03-04T00:00:00.000Z"); + test("deduplicates to max 2 results per domain", () => { const input: SearchResult[] = [ { title: "A1", - url: "https://example.com/post?id=1&utm_source=x", - snippet: "A long, useful snippet with enough context to pass quality thresholds.", - domain: "example.com", + url: "https://a.com/1", + snippet: "Detailed explanation about queryx ranking system and internals.", + sourceDomain: "a.com", publishedAt: "2026-03-03T00:00:00.000Z", }, - { - title: "A1 duplicate canonical", - url: "https://example.com/post?id=1&utm_source=y", - snippet: "Duplicate URL once utm params are removed.", - domain: "example.com", - publishedAt: "2026-03-02T00:00:00.000Z", - }, { title: "A2", - url: "https://example.com/post-2", - snippet: "Another strong snippet that should be retained.", - domain: "example.com", - publishedAt: "2026-03-01T00:00:00.000Z", + url: "https://a.com/2", + snippet: "Another detailed explanation about queryx ranking and scoring.", + sourceDomain: "a.com", + publishedAt: "2026-03-02T00:00:00.000Z", }, { title: "A3", - url: "https://example.com/post-3", - snippet: "Would exceed the per-domain cap.", - domain: "example.com", - publishedAt: "2026-02-28T00:00:00.000Z", + url: "https://a.com/3", + snippet: "Third article on same domain with enough content to be valid.", + sourceDomain: "a.com", + publishedAt: "2026-03-01T00:00:00.000Z", }, { title: "B1", - url: "https://other.net/news", - snippet: "Different domain with quality content and unique perspective.", - domain: "other.net", - publishedAt: "2026-03-02T00:00:00.000Z", + url: "https://b.com/1", + snippet: "Strong alternative source from another domain with good details.", + sourceDomain: "b.com", + publishedAt: "2026-03-01T00:00:00.000Z", }, ]; - const ranked = rankResults(input, { - now, - maxPerDomain: 2, - minQualityScore: 0.1, - }); - - const uniqueUrls = new Set(ranked.map((r) => r.url)); - const fromExample = ranked.filter((r) => r.domain === "example.com"); + const ranked = rankResults(input, { maxPerDomain: 2, limit: 10, now: new Date("2026-03-04T00:00:00.000Z") }); + const aCount = ranked.filter((r) => r.sourceDomain === "a.com").length; - expect(uniqueUrls.size).toBe(ranked.length); - expect(fromExample.length).toBeLessThanOrEqual(2); + expect(aCount).toBe(2); }); - it("boosts newer results when quality is similar", () => { - const now = new Date("2026-03-04T00:00:00.000Z"); - + test("applies recency boost", () => { const input: SearchResult[] = [ { - title: "Fresh result", - url: "https://fresh.io/a", - snippet: "Detailed and high-quality context for the same topic.", - domain: "fresh.io", - publishedAt: "2026-03-03T00:00:00.000Z", + title: "Older source", + url: "https://old.com/post", + snippet: "Comprehensive but old content about queryx architecture and usage.", + sourceDomain: "old.com", + publishedAt: "2020-01-01T00:00:00.000Z", }, { - title: "Old result", - url: "https://archive.io/b", - snippet: "Detailed and high-quality context for the same topic.", - domain: "archive.io", - publishedAt: "2024-01-01T00:00:00.000Z", + title: "Recent source", + url: "https://new.com/post", + snippet: "Comprehensive and recent content about queryx architecture and usage.", + sourceDomain: "new.com", + publishedAt: "2026-03-03T00:00:00.000Z", }, ]; - const ranked = rankResults(input, { - now, - maxPerDomain: 2, - minQualityScore: 0.1, - }); + const ranked = rankResults(input, { now: new Date("2026-03-04T00:00:00.000Z") }); - expect(ranked[0].title).toBe("Fresh result"); - expect(ranked[0].recencyScore).toBeGreaterThan(ranked[1].recencyScore); + expect(ranked[0]?.sourceDomain).toBe("new.com"); + expect(ranked[0]?.score > ranked[1]?.score).toBe(true); }); }); \ No newline at end of file diff --git a/test/synth.test.ts b/test/synth.test.ts index 84087fb..418e6fb 100644 --- a/test/synth.test.ts +++ b/test/synth.test.ts @@ -1,75 +1,59 @@ -import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { afterEach, describe, expect, test } from "bun:test"; +import type { SearchResult } from "../src/logic/brave"; import { synthesize } from "../src/logic/synth"; -import type { SearchResult } from "../src/logic/types"; const originalFetch = globalThis.fetch; -const originalApiKey = process.env.OPENAI_API_KEY; const sampleResults: SearchResult[] = [ { - title: "Bun 1.2 Released", - url: "https://example.com/bun-release", - snippet: "Bun ships speed improvements and runtime updates.", - domain: "example.com", + title: "Queryx documentation", + url: "https://docs.example.com/queryx", + snippet: "Queryx is a search API with ranking, synthesis, and cache support.", + sourceDomain: "docs.example.com", publishedAt: "2026-03-01T00:00:00.000Z", - }, - { - title: "Runtime Benchmarks", - url: "https://another.com/bench", - snippet: "Independent benchmark data for modern runtimes.", - domain: "another.com", - publishedAt: "2026-02-28T00:00:00.000Z", + language: "en", }, ]; -describe("synth.ts", () => { - beforeEach(() => { - process.env.OPENAI_API_KEY = "test-openai-key"; - }); +afterEach(() => { + globalThis.fetch = originalFetch; +}); - afterEach(() => { - (globalThis as { fetch: typeof fetch }).fetch = originalFetch; - if (originalApiKey === undefined) { - delete process.env.OPENAI_API_KEY; - } else { - process.env.OPENAI_API_KEY = originalApiKey; - } - }); - - it("clamps confidence and uses returned token usage", async () => { - (globalThis as { fetch: typeof fetch }).fetch = (async () => { +describe("synth.ts", () => { + test("clamps confidence and returns token usage", async () => { + globalThis.fetch = (async () => { return new Response( JSON.stringify({ - model: "gpt-4o-mini", + model: "gpt-4o-mini-2026", + usage: { + prompt_tokens: 120, + completion_tokens: 45, + }, choices: [ { message: { content: JSON.stringify({ - answer: "Bun delivers runtime and tooling improvements.", - confidence: 1.8, + answer: "Queryx combines retrieval and synthesis for concise responses.", + confidence: 1.7, }), }, }, ], - usage: { - prompt_tokens: 120, - completion_tokens: 25, - }, }), - { status: 200, headers: { "Content-Type": "application/json" } }, + { status: 200, headers: { "content-type": "application/json" } }, ); }) as typeof fetch; - const output = await synthesize("What changed in Bun?", sampleResults); + const result = await synthesize("What is Queryx?", sampleResults, { apiKey: "test-key" }); - expect(output.answer).toContain("Bun"); - expect(output.confidence).toBe(1); - expect(output.tokens).toEqual({ in: 120, out: 25 }); - expect(output.model).toBe("gpt-4o-mini"); + expect(result.confidence).toBe(1); + expect(result.tokens.in).toBe(120); + expect(result.tokens.out).toBe(45); + expect(result.model).toBe("gpt-4o-mini-2026"); }); - it("estimates tokens when usage is missing and keeps confidence in range", async () => { - (globalThis as { fetch: typeof fetch }).fetch = (async () => { + test("estimates token usage when upstream usage is missing", async () => { + globalThis.fetch = (async () => { return new Response( JSON.stringify({ model: "gpt-4o-mini", @@ -77,21 +61,21 @@ describe("synth.ts", () => { { message: { content: JSON.stringify({ - answer: "Evidence indicates improved runtime performance and DX updates.", + answer: "There is not enough information to fully answer this.", + confidence: -0.5, }), }, }, ], }), - { status: 200, headers: { "Content-Type": "application/json" } }, + { status: 200, headers: { "content-type": "application/json" } }, ); }) as typeof fetch; - const output = await synthesize("Summarize the updates", sampleResults); + const result = await synthesize("unknown", sampleResults, { apiKey: "test-key" }); - expect(output.tokens.in).toBeGreaterThan(0); - expect(output.tokens.out).toBeGreaterThan(0); - expect(output.confidence).toBeGreaterThanOrEqual(0); - expect(output.confidence).toBeLessThanOrEqual(1); + expect(result.confidence).toBe(0); + expect(result.tokens.in > 0).toBe(true); + expect(result.tokens.out > 0).toBe(true); }); }); \ No newline at end of file From 53b394e8a2ba8d04314a3ad3a31580b1769b4e50 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:53:19 +0000 Subject: [PATCH 12/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- src/logic/brave.ts | 316 ++++++++++++++++++++------------------ src/logic/cache.ts | 113 +++++++------- src/logic/rank.ts | 231 +++++++++++++++++++--------- src/logic/synth.ts | 366 +++++++++++++++++++++++++-------------------- 4 files changed, 587 insertions(+), 439 deletions(-) diff --git a/src/logic/brave.ts b/src/logic/brave.ts index f2bd59a..15b44d8 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -4,258 +4,278 @@ export interface SearchResult { title: string; url: string; snippet: string; - sourceDomain: string; - publishedAt?: string; - language?: string; + source: string; + publishedDate?: string; + age?: string; + score?: number; } export interface BraveSearchOptions { - apiKey?: string; + freshness?: Freshness; count?: number; offset?: number; - freshness?: Freshness; country?: string; - searchLang?: string; - safeSearch?: "off" | "moderate" | "strict"; - timeoutMs?: number; + language?: string; + safeSearch?: "strict" | "moderate" | "off"; signal?: AbortSignal; - now?: Date; } interface BraveWebResult { title?: string; url?: string; description?: string; + extra_snippets?: string[]; age?: string; - page_age?: string; - language?: string; - profile?: { - name?: string; - }; + page_age?: string | { age?: string; last_modified?: string }; + score?: number; } -interface BraveResponse { - web?: { - results?: BraveWebResult[]; - }; - error?: { - detail?: string; - message?: string; - }; +export class BraveApiError extends Error { + public readonly status: number; + public readonly details?: string; + + constructor(message: string, status: number, details?: string) { + super(message); + this.name = "BraveApiError"; + this.status = status; + this.details = details; + } } -export class BraveClientError extends Error { - public readonly code: string; - public readonly status?: number; +export class BraveRateLimitError extends BraveApiError { public readonly retryAfterSeconds?: number; - constructor(message: string, code: string, status?: number, retryAfterSeconds?: number) { - super(message); - this.name = "BraveClientError"; - this.code = code; - this.status = status; + constructor(message: string, retryAfterSeconds?: number, details?: string) { + super(message, 429, details); + this.name = "BraveRateLimitError"; this.retryAfterSeconds = retryAfterSeconds; } } -const BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"; - const FRESHNESS_MAP: Record = { day: "pd", week: "pw", month: "pm", }; -function normalizeDomain(url: string): string { - try { - const hostname = new URL(url).hostname.toLowerCase(); - return hostname.startsWith("www.") ? hostname.slice(4) : hostname; - } catch { - return ""; +function clamp01(value: number): number { + if (Number.isNaN(value)) { + return 0; } + return Math.max(0, Math.min(1, value)); } -function parseRelativeAge(age: string, now: Date): string | undefined { - const match = age.trim().toLowerCase().match(/^(\d+)\s+(minute|hour|day|week|month|year)s?\s+ago$/); - if (!match) { +function normalizeScore(value: unknown): number | undefined { + if (typeof value !== "number" || !Number.isFinite(value)) { return undefined; } + if (value >= 0 && value <= 1) { + return clamp01(value); + } + if (value > 1 && value <= 100) { + return clamp01(value / 100); + } + return clamp01(value); +} - const value = Number(match[1]); - const unit = match[2]; +function parseRetryAfter(raw: string | null): number | undefined { + if (!raw) { + return undefined; + } - const msPerUnit: Record = { - minute: 60_000, - hour: 3_600_000, - day: 86_400_000, - week: 604_800_000, - month: 2_592_000_000, - year: 31_536_000_000, - }; + const asInt = Number.parseInt(raw, 10); + if (Number.isFinite(asInt) && asInt >= 0) { + return asInt; + } - const ms = msPerUnit[unit]; - if (!ms) { + const dateMs = Date.parse(raw); + if (Number.isNaN(dateMs)) { return undefined; } - return new Date(now.getTime() - value * ms).toISOString(); + const seconds = Math.ceil((dateMs - Date.now()) / 1000); + return seconds > 0 ? seconds : 0; } -function parsePublishedAt(item: BraveWebResult, now: Date): string | undefined { - if (item.page_age) { - const d = new Date(item.page_age); - if (!Number.isNaN(d.getTime())) { - return d.toISOString(); - } +function extractDomain(rawUrl: string): string { + try { + return new URL(rawUrl).hostname.toLowerCase().replace(/^www\./, ""); + } catch { + return "unknown"; } +} - if (item.age) { - const d = new Date(item.age); - if (!Number.isNaN(d.getTime())) { - return d.toISOString(); +function normalizePublishedDate(pageAge: BraveWebResult["page_age"]): string | undefined { + if (!pageAge) { + return undefined; + } + + if (typeof pageAge === "object" && typeof pageAge.last_modified === "string") { + return pageAge.last_modified; + } + + if (typeof pageAge === "string") { + const parsed = Date.parse(pageAge); + if (!Number.isNaN(parsed)) { + return new Date(parsed).toISOString(); } - return parseRelativeAge(item.age, now); } return undefined; } -function normalizeResult(item: BraveWebResult, now: Date): SearchResult | null { - const title = item.title?.trim() ?? ""; - const url = item.url?.trim() ?? ""; - const snippet = item.description?.trim() ?? ""; +function normalizeAge(raw: BraveWebResult): string | undefined { + if (typeof raw.age === "string" && raw.age.trim() !== "") { + return raw.age; + } + if (typeof raw.page_age === "object" && typeof raw.page_age.age === "string") { + return raw.page_age.age; + } + return undefined; +} + +function normalizeSnippet(raw: BraveWebResult): string { + const chunks: string[] = []; + if (typeof raw.description === "string" && raw.description.trim() !== "") { + chunks.push(raw.description.trim()); + } + if (Array.isArray(raw.extra_snippets)) { + for (const snippet of raw.extra_snippets) { + if (typeof snippet === "string" && snippet.trim() !== "") { + chunks.push(snippet.trim()); + } + } + } + return chunks.join(" ").replace(/\s+/g, " ").trim(); +} - if (!title || !url || !snippet) { +function normalizeBraveResult(raw: BraveWebResult): SearchResult | null { + if (typeof raw.url !== "string" || raw.url.trim() === "") { return null; } - const sourceDomain = normalizeDomain(url); - if (!sourceDomain) { + const title = typeof raw.title === "string" ? raw.title.trim() : ""; + const snippet = normalizeSnippet(raw); + + if (title === "" && snippet === "") { return null; } return { title, - url, + url: raw.url, snippet, - sourceDomain, - publishedAt: parsePublishedAt(item, now), - language: item.language, + source: extractDomain(raw.url), + publishedDate: normalizePublishedDate(raw.page_age), + age: normalizeAge(raw), + score: normalizeScore(raw.score), }; } -async function safeReadErrorMessage(response: Response): Promise { +function buildRequestUrl(endpoint: string, params: URLSearchParams): string { + const separator = endpoint.includes("?") ? "&" : "?"; + return `${endpoint}${separator}${params.toString()}`; +} + +async function safeReadText(response: Response): Promise { try { const text = await response.text(); - if (!text) { - return undefined; - } - - try { - const parsed = JSON.parse(text) as BraveResponse; - return parsed.error?.detail ?? parsed.error?.message ?? text; - } catch { - return text; - } + return text.trim() === "" ? undefined : text; } catch { return undefined; } } export async function searchBrave(query: string, options: BraveSearchOptions = {}): Promise { - const trimmedQuery = query.trim(); - if (!trimmedQuery) { + const normalizedQuery = query.trim(); + if (normalizedQuery === "") { return []; } - const apiKey = options.apiKey ?? process.env.BRAVE_API_KEY; + const endpoint = process.env.BRAVE_API_URL; + if (!endpoint) { + throw new Error("BRAVE_API_URL is not configured."); + } + + const apiKey = process.env.BRAVE_API_KEY; if (!apiKey) { - throw new BraveClientError("Missing BRAVE_API_KEY.", "missing_api_key"); + throw new Error("BRAVE_API_KEY is not configured."); + } + + const params = new URLSearchParams(); + params.set("q", normalizedQuery); + + if (typeof options.count === "number" && Number.isFinite(options.count)) { + params.set("count", String(Math.max(1, Math.floor(options.count)))); } - const params = new URLSearchParams({ - q: trimmedQuery, - count: String(options.count ?? 10), - offset: String(options.offset ?? 0), - country: options.country ?? "US", - search_lang: options.searchLang ?? "en", - safesearch: options.safeSearch ?? "moderate", - }); + if (typeof options.offset === "number" && Number.isFinite(options.offset)) { + params.set("offset", String(Math.max(0, Math.floor(options.offset)))); + } if (options.freshness) { params.set("freshness", FRESHNESS_MAP[options.freshness]); } - const endpoint = `${BRAVE_SEARCH_ENDPOINT}?${params.toString()}`; - const timeoutMs = options.timeoutMs ?? 10_000; - const now = options.now ?? new Date(); + if (options.country) { + params.set("country", options.country); + } - const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort("timeout"), timeoutMs); + if (options.language) { + params.set("search_lang", options.language); + } - let externalAbortHandler: (() => void) | undefined; - if (options.signal) { - externalAbortHandler = () => controller.abort(options.signal?.reason ?? "aborted"); - if (options.signal.aborted) { - externalAbortHandler(); - } else { - options.signal.addEventListener("abort", externalAbortHandler, { once: true }); - } + if (options.safeSearch) { + params.set("safesearch", options.safeSearch); } + const requestUrl = buildRequestUrl(endpoint, params); + try { - const response = await fetch(endpoint, { + const response = await fetch(requestUrl, { method: "GET", + signal: options.signal, headers: { Accept: "application/json", "X-Subscription-Token": apiKey, }, - signal: controller.signal, }); - if (!response.ok) { - const errorText = await safeReadErrorMessage(response); - if (response.status === 429) { - const retryAfterRaw = response.headers.get("retry-after"); - const retryAfterSeconds = retryAfterRaw ? Number(retryAfterRaw) : undefined; - throw new BraveClientError( - errorText ?? "Brave API rate limit exceeded.", - "rate_limited", - 429, - Number.isFinite(retryAfterSeconds) ? retryAfterSeconds : undefined, - ); - } + if (response.status === 429) { + const details = await safeReadText(response); + const retryAfterSeconds = parseRetryAfter(response.headers.get("retry-after")); + throw new BraveRateLimitError("Brave API rate limit exceeded.", retryAfterSeconds, details); + } - if (response.status === 401 || response.status === 403) { - throw new BraveClientError(errorText ?? "Brave API authentication failed.", "auth_error", response.status); - } + if (!response.ok) { + const details = await safeReadText(response); + throw new BraveApiError(`Brave API request failed with status ${response.status}.`, response.status, details); + } - throw new BraveClientError(errorText ?? "Brave API request failed.", "upstream_error", response.status); + let payload: unknown; + try { + payload = await response.json(); + } catch { + throw new BraveApiError("Brave API returned invalid JSON.", response.status); } - const body = (await response.json()) as BraveResponse; - const rawResults = body.web?.results ?? []; + const rawResults = (payload as { web?: { results?: BraveWebResult[] } })?.web?.results; + if (!Array.isArray(rawResults)) { + return []; + } return rawResults - .map((item) => normalizeResult(item, now)) + .map((item) => normalizeBraveResult(item)) .filter((item): item is SearchResult => item !== null); } catch (error) { - if (error instanceof BraveClientError) { + if (error instanceof BraveApiError) { throw error; } - if (error instanceof Error && error.name === "AbortError") { - throw new BraveClientError("Brave request timed out.", "timeout"); - } - - throw new BraveClientError( - error instanceof Error ? error.message : "Unknown network error while calling Brave API.", - "network_error", - ); - } finally { - clearTimeout(timeout); - if (options.signal && externalAbortHandler) { - options.signal.removeEventListener("abort", externalAbortHandler); - } + const message = error instanceof Error ? error.message : "Unknown fetch error"; + throw new BraveApiError(`Brave API network error: ${message}`, 0); } -} \ No newline at end of file +} + +export const braveSearch = searchBrave; +export default searchBrave; \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index 62541e7..2095468 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -5,6 +5,7 @@ export interface CacheStats { misses: number; hitRate: number; size: number; + ttlSeconds: number; } interface CacheEntry { @@ -12,72 +13,61 @@ interface CacheEntry { expiresAt: number; } -export interface QueryCacheOptions { - ttlSeconds?: number; - now?: () => number; -} - -function getDefaultTtlSeconds(): number { - const raw = process.env.CACHE_TTL_SECONDS ?? "300"; - const parsed = Number(raw); +function parseDefaultTtlSeconds(): number { + const raw = process.env.CACHE_TTL_SECONDS; + const parsed = raw ? Number(raw) : 300; if (!Number.isFinite(parsed) || parsed <= 0) { return 300; } return parsed; } -export class QueryCache { - private readonly store = new Map>(); - private readonly now: () => number; - private readonly defaultTtlMs: number; +export function normalizeQuery(query: string): string { + return query.normalize("NFKC").trim().toLowerCase().replace(/\s+/g, " "); +} + +export function normalizedQueryHash(query: string): string { + const normalized = normalizeQuery(query); + return createHash("sha256").update(normalized).digest("hex"); +} +export class InMemoryCache { + private readonly entries = new Map>(); + private readonly now: () => number; + private readonly defaultTtlSeconds: number; private hits = 0; private misses = 0; - constructor(options: QueryCacheOptions = {}) { - this.now = options.now ?? Date.now; - this.defaultTtlMs = Math.max(1, (options.ttlSeconds ?? getDefaultTtlSeconds()) * 1000); + constructor(ttlSeconds: number = parseDefaultTtlSeconds(), now: () => number = () => Date.now()) { + this.defaultTtlSeconds = Number.isFinite(ttlSeconds) && ttlSeconds > 0 ? ttlSeconds : 300; + this.now = now; } - static normalizeQuery(query: string): string { - return query.trim().toLowerCase().replace(/\s+/g, " "); - } - - static hashQuery(query: string): string { - return createHash("sha256").update(QueryCache.normalizeQuery(query)).digest("hex"); - } - - public keyForQuery(query: string): string { - return QueryCache.hashQuery(query); - } - - public set(query: string, value: T, ttlSeconds?: number): string { - const key = this.keyForQuery(query); - return this.setByKey(key, value, ttlSeconds); + private purgeExpired(): void { + const now = this.now(); + for (const [key, entry] of this.entries) { + if (entry.expiresAt <= now) { + this.entries.delete(key); + } + } } - public setByKey(key: string, value: T, ttlSeconds?: number): string { - const ttlMs = Math.max(1, (ttlSeconds ?? this.defaultTtlMs / 1000) * 1000); - this.store.set(key, { - value, - expiresAt: this.now() + ttlMs, - }); - return key; + private keyForQuery(query: string): string { + return normalizedQueryHash(query); } public get(query: string): T | undefined { - return this.getByKey(this.keyForQuery(query)); - } + this.purgeExpired(); + const key = this.keyForQuery(query); + const entry = this.entries.get(key); - public getByKey(key: string): T | undefined { - const entry = this.store.get(key); if (!entry) { this.misses += 1; return undefined; } if (entry.expiresAt <= this.now()) { - this.store.delete(key); + this.entries.delete(key); this.misses += 1; return undefined; } @@ -86,37 +76,34 @@ export class QueryCache { return entry.value; } - public delete(query: string): boolean { - return this.store.delete(this.keyForQuery(query)); - } - - public clear(): void { - this.store.clear(); - this.hits = 0; - this.misses = 0; - } + public set(query: string, value: T, ttlSeconds?: number): void { + this.purgeExpired(); + const key = this.keyForQuery(query); + const effectiveTtl = Number.isFinite(ttlSeconds) && (ttlSeconds as number) > 0 ? (ttlSeconds as number) : this.defaultTtlSeconds; + const expiresAt = this.now() + effectiveTtl * 1000; - public cleanup(): void { - const now = this.now(); - for (const [key, entry] of this.store.entries()) { - if (entry.expiresAt <= now) { - this.store.delete(key); - } - } + this.entries.set(key, { value, expiresAt }); } public stats(): CacheStats { - this.cleanup(); + this.purgeExpired(); const total = this.hits + this.misses; + return { hits: this.hits, misses: this.misses, hitRate: total === 0 ? 0 : this.hits / total, - size: this.store.size, + size: this.entries.size, + ttlSeconds: this.defaultTtlSeconds, }; } + + public clear(): void { + this.entries.clear(); + this.hits = 0; + this.misses = 0; + } } -export const cache = new QueryCache(); -export const normalizeQuery = QueryCache.normalizeQuery; -export const hashQuery = QueryCache.hashQuery; \ No newline at end of file +export const queryCache = new InMemoryCache(); +export default queryCache; \ No newline at end of file diff --git a/src/logic/rank.ts b/src/logic/rank.ts index 24b48b3..ba5a3e5 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,116 +1,213 @@ import type { SearchResult } from "./brave"; export interface RankedSearchResult extends SearchResult { - qualityScore: number; - recencyScore: number; - score: number; + domain: string; + rankScore: number; + canonicalUrl: string; } export interface RankOptions { - now?: Date; maxPerDomain?: number; limit?: number; - minQuality?: number; - recencyHalfLifeDays?: number; + now?: Date; +} + +const TRACKING_PARAM_NAMES = new Set(["fbclid", "gclid", "mc_cid", "mc_eid", "igshid", "ref", "ref_src"]); +const LOW_QUALITY_DOMAIN_FRAGMENTS = ["pinterest.", "quora.", "reddit."]; + +function clamp01(value: number): number { + if (!Number.isFinite(value)) { + return 0; + } + return Math.max(0, Math.min(1, value)); } -function normalizeDomain(url: string): string { +function extractDomain(rawUrl: string): string { try { - const hostname = new URL(url).hostname.toLowerCase(); - return hostname.startsWith("www.") ? hostname.slice(4) : hostname; + return new URL(rawUrl).hostname.toLowerCase().replace(/^www\./, ""); } catch { - return ""; + return "unknown"; } } -function qualityScore(result: SearchResult): number { - const titleLen = result.title.trim().length; - const snippetLen = result.snippet.trim().length; - const hasValidUrl = /^https?:\/\//i.test(result.url.trim()); +function canonicalizeUrl(rawUrl: string): string { + try { + const url = new URL(rawUrl); + url.hash = ""; - if (!titleLen || !snippetLen || !hasValidUrl) { - return 0; + for (const key of [...url.searchParams.keys()]) { + if (key.startsWith("utm_") || TRACKING_PARAM_NAMES.has(key)) { + url.searchParams.delete(key); + } + } + + if (url.pathname.length > 1) { + url.pathname = url.pathname.replace(/\/+$/, ""); + } + + const normalized = url.toString(); + return normalized.endsWith("/") ? normalized.slice(0, -1) : normalized; + } catch { + return rawUrl.trim().toLowerCase(); } +} - const titleComponent = Math.min(titleLen / 80, 1) * 0.35; - const snippetComponent = Math.min(snippetLen / 240, 1) * 0.45; - const urlComponent = 0.2; +function parseRelativeAgeToDays(age: string): number | undefined { + const match = age.toLowerCase().match(/(\d+)\s*(minute|hour|day|week|month|year)/); + if (!match) { + return undefined; + } - let score = titleComponent + snippetComponent + urlComponent; + const value = Number.parseInt(match[1], 10); + if (!Number.isFinite(value) || value < 0) { + return undefined; + } - if (snippetLen < 40) { - score *= 0.6; + const unit = match[2]; + switch (unit) { + case "minute": + return value / (24 * 60); + case "hour": + return value / 24; + case "day": + return value; + case "week": + return value * 7; + case "month": + return value * 30; + case "year": + return value * 365; + default: + return undefined; } +} - if (/\/(tag|category|author)\//i.test(result.url)) { - score *= 0.85; +function recencyBoost(result: SearchResult, nowMs: number): number { + if (result.publishedDate) { + const publishedMs = Date.parse(result.publishedDate); + if (!Number.isNaN(publishedMs) && publishedMs <= nowMs) { + const ageDays = (nowMs - publishedMs) / (1000 * 60 * 60 * 24); + if (ageDays <= 1) return 0.25; + if (ageDays <= 7) return 0.2; + if (ageDays <= 30) return 0.12; + if (ageDays <= 180) return 0.06; + if (ageDays <= 365) return 0.03; + return 0; + } + } + + if (result.age) { + const ageDays = parseRelativeAgeToDays(result.age); + if (typeof ageDays === "number") { + if (ageDays <= 1) return 0.22; + if (ageDays <= 7) return 0.18; + if (ageDays <= 30) return 0.1; + if (ageDays <= 180) return 0.05; + } } - return Math.max(0, Math.min(1, score)); + return 0; } -function recencyScore(publishedAt: string | undefined, now: Date, halfLifeDays: number): number { - if (!publishedAt) { - return 0.2; +function qualityScore(result: SearchResult): number { + const title = result.title.trim(); + const snippet = result.snippet.trim(); + const domain = extractDomain(result.url); + + let score = 0; + + if (title.length >= 12) score += 0.3; + else if (title.length >= 6) score += 0.16; + + if (snippet.length >= 60) score += 0.34; + else if (snippet.length >= 25) score += 0.2; + + if (/^https:\/\//i.test(result.url)) score += 0.1; + + const isLowQualityDomain = LOW_QUALITY_DOMAIN_FRAGMENTS.some((fragment) => domain.includes(fragment)); + if (!isLowQualityDomain) score += 0.12; + else score -= 0.2; + + const snippetWords = snippet.split(/\s+/).filter(Boolean).length; + if (snippetWords >= 10) score += 0.1; + + if (typeof result.score === "number") { + score += clamp01(result.score) * 0.14; } - const ts = new Date(publishedAt).getTime(); - if (Number.isNaN(ts)) { - return 0.2; + return clamp01(score); +} + +function isLowQuality(result: SearchResult): boolean { + const quality = qualityScore(result); + if (quality < 0.35) { + return true; } - const ageMs = Math.max(0, now.getTime() - ts); - const ageDays = ageMs / 86_400_000; - const lambda = Math.log(2) / Math.max(1, halfLifeDays); + const lowerUrl = result.url.toLowerCase(); + if (lowerUrl.includes("/search?") || lowerUrl.includes("/tag/") || lowerUrl.includes("/login")) { + return true; + } - return Math.exp(-lambda * ageDays); + return false; } -function scoreResult(result: SearchResult, now: Date, halfLifeDays: number): RankedSearchResult { - const q = qualityScore(result); - const r = recencyScore(result.publishedAt, now, halfLifeDays); - const score = q * 0.7 + r * 0.3; - - return { - ...result, - sourceDomain: result.sourceDomain || normalizeDomain(result.url), - qualityScore: q, - recencyScore: r, - score, - }; +function computeRankScore(result: SearchResult, nowMs: number): number { + const quality = qualityScore(result); + const recency = recencyBoost(result, nowMs); + return clamp01(quality + recency); } -export function rankResults(results: SearchResult[], options: RankOptions = {}): RankedSearchResult[] { - const now = options.now ?? new Date(); +export function rankAndDedup(results: SearchResult[], options: RankOptions = {}): RankedSearchResult[] { + const nowMs = (options.now ?? new Date()).getTime(); const maxPerDomain = options.maxPerDomain ?? 2; - const limit = options.limit ?? 10; - const minQuality = options.minQuality ?? 0.3; - const halfLifeDays = options.recencyHalfLifeDays ?? 14; + const limit = options.limit ?? Number.MAX_SAFE_INTEGER; - const scored = results - .map((r) => scoreResult(r, now, halfLifeDays)) - .filter((r) => r.qualityScore >= minQuality && Boolean(r.sourceDomain)); + const dedupMap = new Map(); - scored.sort((a, b) => b.score - a.score); + for (const result of results) { + if (isLowQuality(result)) { + continue; + } - const domainCounts = new Map(); - const ranked: RankedSearchResult[] = []; + const canonicalUrl = canonicalizeUrl(result.url); + const domain = extractDomain(result.url); + const rankScore = computeRankScore(result, nowMs); - for (const result of scored) { - const domain = result.sourceDomain; - const count = domainCounts.get(domain) ?? 0; + const ranked: RankedSearchResult = { + ...result, + canonicalUrl, + domain, + rankScore, + }; + + const existing = dedupMap.get(canonicalUrl); + if (!existing || ranked.rankScore > existing.rankScore) { + dedupMap.set(canonicalUrl, ranked); + } + } - if (count >= maxPerDomain) { + const sorted = [...dedupMap.values()].sort((a, b) => b.rankScore - a.rankScore); + + const output: RankedSearchResult[] = []; + const domainCounts = new Map(); + + for (const item of sorted) { + const currentCount = domainCounts.get(item.domain) ?? 0; + if (currentCount >= maxPerDomain) { continue; } - domainCounts.set(domain, count + 1); - ranked.push(result); + output.push(item); + domainCounts.set(item.domain, currentCount + 1); - if (ranked.length >= limit) { + if (output.length >= limit) { break; } } - return ranked; -} \ No newline at end of file + return output; +} + +export const rankResults = rankAndDedup; +export default rankAndDedup; \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index 19c8c6a..9f9fbdc 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,6 +1,6 @@ import type { SearchResult } from "./brave"; -export interface SynthTokens { +export interface SynthesisTokens { in: number; out: number; } @@ -8,230 +8,274 @@ export interface SynthTokens { export interface SynthesisResult { answer: string; confidence: number; - tokens: SynthTokens; + tokens: SynthesisTokens; model: string; } -export interface SynthOptions { - apiKey?: string; +export interface SynthesisOptions { model?: string; temperature?: number; - maxTokens?: number; - timeoutMs?: number; + maxOutputTokens?: number; signal?: AbortSignal; } -interface OpenAIUsage { - prompt_tokens?: number; - completion_tokens?: number; -} - -interface OpenAIChoice { - message?: { - content?: string | Array<{ type?: string; text?: string }>; - }; -} - -interface OpenAIChatResponse { - model?: string; - usage?: OpenAIUsage; - choices?: OpenAIChoice[]; - error?: { - message?: string; - }; -} - export class SynthesisError extends Error { - public readonly code: string; public readonly status?: number; + public readonly details?: string; - constructor(message: string, code: string, status?: number) { + constructor(message: string, status?: number, details?: string) { super(message); this.name = "SynthesisError"; - this.code = code; this.status = status; + this.details = details; } } const DEFAULT_MODEL = "gpt-4o-mini"; -const OPENAI_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions"; const SYSTEM_PROMPT = - "You are Queryx synthesis engine. Return strict JSON only with keys: answer (string), confidence (number 0..1). " + - "Answer must be concise for machine consumers, grounded in provided sources, and avoid speculation."; + "You are a synthesis engine for downstream agents. Return only JSON with shape " + + '{"answer":"string","confidence":0..1}. ' + + "Use concise factual prose, include caveats only when required, and avoid markdown."; function clamp01(value: number): number { if (!Number.isFinite(value)) { return 0; } - if (value < 0) { + return Math.max(0, Math.min(1, value)); +} + +function estimateTokens(text: string): number { + const normalized = text.trim(); + if (normalized === "") { return 0; } - if (value > 1) { - return 1; - } - return value; + return Math.max(1, Math.ceil(normalized.length / 4)); +} + +function numberOrUndefined(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) ? value : undefined; } -function estimateTokens(input: string): number { - return Math.max(1, Math.ceil(input.length / 4)); +function buildSourcesPayload(results: SearchResult[]): Array> { + return results.slice(0, 10).map((item) => ({ + title: item.title, + url: item.url, + snippet: item.snippet, + source: item.source, + publishedDate: item.publishedDate ?? "", + })); } -function getChoiceContent(choice: OpenAIChoice | undefined): string { - const content = choice?.message?.content; - if (typeof content === "string") { - return content.trim(); +function buildUserPrompt(query: string, results: SearchResult[]): string { + const payload = { + query, + sources: buildSourcesPayload(results), + instructions: [ + "Synthesize only from provided sources.", + "If evidence is weak, lower confidence.", + "Avoid hedging unless uncertainty is material.", + ], + }; + + return JSON.stringify(payload); +} + +function extractJsonObject(text: string): Record | null { + const trimmed = text.trim(); + const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i); + const candidate = (fencedMatch?.[1] ?? trimmed).trim(); + const start = candidate.indexOf("{"); + const end = candidate.lastIndexOf("}"); + + if (start < 0 || end <= start) { + return null; } - if (Array.isArray(content)) { - return content - .map((part) => part.text ?? "") - .join("") - .trim(); + const jsonCandidate = candidate.slice(start, end + 1); + try { + const parsed = JSON.parse(jsonCandidate); + if (parsed && typeof parsed === "object") { + return parsed as Record; + } + return null; + } catch { + return null; + } +} + +function getAssistantText(payload: Record): string { + if (typeof payload.output_text === "string") { + return payload.output_text; + } + + const choices = payload.choices; + if (Array.isArray(choices) && choices.length > 0) { + const first = choices[0] as { message?: { content?: unknown } }; + const content = first?.message?.content; + + if (typeof content === "string") { + return content; + } + + if (Array.isArray(content)) { + const parts = content + .map((part) => { + if (typeof part === "string") { + return part; + } + if (part && typeof part === "object" && "text" in part) { + const text = (part as { text?: unknown }).text; + return typeof text === "string" ? text : ""; + } + return ""; + }) + .filter(Boolean); + + return parts.join("\n").trim(); + } } return ""; } -function safeJsonParse(value: string): T | undefined { - try { - return JSON.parse(value) as T; - } catch { - return undefined; +function parseAnswerAndConfidence(rawContent: string): { answer: string; confidence?: number } { + const json = extractJsonObject(rawContent); + if (!json) { + return { answer: rawContent.trim() }; } + + const answer = + typeof json.answer === "string" + ? json.answer.trim() + : typeof json.final_answer === "string" + ? json.final_answer.trim() + : rawContent.trim(); + + const confidence = numberOrUndefined(json.confidence); + return { answer, confidence }; } -function heuristicConfidence(results: SearchResult[], answer: string): number { - const sourceFactor = Math.min(results.length, 8) / 8; - const answerFactor = Math.min(answer.length, 280) / 280; - const uncertaintyPenalty = /\b(maybe|unclear|not enough information|insufficient)\b/i.test(answer) ? 0.2 : 0; - return clamp01(0.2 + sourceFactor * 0.6 + answerFactor * 0.2 - uncertaintyPenalty); +function heuristicConfidence(query: string, results: SearchResult[], answer: string): number { + let score = 0.3; + score += Math.min(results.length, 8) * 0.07; + + if (query.trim().length > 20) { + score += 0.05; + } + + if (answer.trim().length >= 80) { + score += 0.08; + } + + if (results.length <= 1) { + score -= 0.12; + } + + return clamp01(score); } -function buildUserPrompt(query: string, results: SearchResult[]): string { - const compactSources = results.slice(0, 8).map((r, index) => ({ - id: index + 1, - title: r.title, - url: r.url, - snippet: r.snippet, - publishedAt: r.publishedAt ?? null, - domain: r.sourceDomain, - })); +function extractUsageTokens(payload: Record): { in?: number; out?: number } { + const usage = payload.usage as Record | undefined; + if (!usage) { + return {}; + } - return JSON.stringify({ - query, - sources: compactSources, - instructions: [ - "Synthesize a direct answer.", - "If evidence is weak, explicitly say what is missing.", - "Keep answer concise and factual.", - "Output strict JSON only.", - ], - }); + const inTokens = numberOrUndefined(usage.prompt_tokens) ?? numberOrUndefined(usage.input_tokens); + const outTokens = numberOrUndefined(usage.completion_tokens) ?? numberOrUndefined(usage.output_tokens); + + return { + in: inTokens, + out: outTokens, + }; +} + +async function safeReadText(response: Response): Promise { + try { + const text = await response.text(); + return text.trim() === "" ? undefined : text; + } catch { + return undefined; + } } -export async function synthesize( +export async function synthesizeAnswer( query: string, results: SearchResult[], - options: SynthOptions = {}, + options: SynthesisOptions = {}, ): Promise { - const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; - if (!apiKey) { - throw new SynthesisError("Missing OPENAI_API_KEY.", "missing_api_key"); + const endpoint = process.env.OPENAI_API_URL; + if (!endpoint) { + throw new SynthesisError("OPENAI_API_URL is not configured."); } - const model = options.model ?? process.env.OPENAI_MODEL ?? DEFAULT_MODEL; - const endpoint = process.env.OPENAI_BASE_URL ?? OPENAI_CHAT_COMPLETIONS_URL; - const timeoutMs = options.timeoutMs ?? 15_000; + const apiKey = process.env.OPENAI_API_KEY; + if (!apiKey) { + throw new SynthesisError("OPENAI_API_KEY is not configured."); + } + const model = options.model ?? DEFAULT_MODEL; const userPrompt = buildUserPrompt(query, results); - const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort("timeout"), timeoutMs); - - let externalAbortHandler: (() => void) | undefined; - if (options.signal) { - externalAbortHandler = () => controller.abort(options.signal?.reason ?? "aborted"); - if (options.signal.aborted) { - externalAbortHandler(); - } else { - options.signal.addEventListener("abort", externalAbortHandler, { once: true }); - } - } + const requestBody = { + model, + temperature: options.temperature ?? 0.2, + max_tokens: options.maxOutputTokens ?? 350, + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: userPrompt }, + ], + }; + let response: Response; try { - const response = await fetch(endpoint, { + response = await fetch(endpoint, { method: "POST", + signal: options.signal, headers: { - "content-type": "application/json", - authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, }, - signal: controller.signal, - body: JSON.stringify({ - model, - temperature: options.temperature ?? 0.2, - max_tokens: options.maxTokens ?? 350, - response_format: { type: "json_object" }, - messages: [ - { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: userPrompt }, - ], - }), + body: JSON.stringify(requestBody), }); + } catch (error) { + const message = error instanceof Error ? error.message : "Unknown fetch error"; + throw new SynthesisError(`Synthesis request failed: ${message}`); + } - if (!response.ok) { - let message = "Synthesis upstream request failed."; - try { - const body = (await response.json()) as OpenAIChatResponse; - if (body.error?.message) { - message = body.error.message; - } - } catch { - // ignore parse errors - } - - const code = response.status === 429 ? "rate_limited" : "upstream_error"; - throw new SynthesisError(message, code, response.status); - } + if (!response.ok) { + const details = await safeReadText(response); + throw new SynthesisError(`Synthesis API request failed with status ${response.status}.`, response.status, details); + } - const data = (await response.json()) as OpenAIChatResponse; - const rawContent = getChoiceContent(data.choices?.[0]); + let payload: Record; + try { + payload = (await response.json()) as Record; + } catch { + throw new SynthesisError("Synthesis API returned invalid JSON.", response.status); + } - const parsed = safeJsonParse<{ answer?: string; confidence?: number }>(rawContent); - const answer = (parsed?.answer ?? rawContent).trim(); - const confidence = clamp01( - typeof parsed?.confidence === "number" ? parsed.confidence : heuristicConfidence(results, answer), - ); + const rawContent = getAssistantText(payload).trim(); + const parsed = parseAnswerAndConfidence(rawContent); - const promptTokens = data.usage?.prompt_tokens ?? estimateTokens(`${SYSTEM_PROMPT}\n${userPrompt}`); - const completionTokens = data.usage?.completion_tokens ?? estimateTokens(answer); + const answer = parsed.answer || "No answer generated."; + const confidence = clamp01(parsed.confidence ?? heuristicConfidence(query, results, answer)); - return { - answer, - confidence, - tokens: { - in: promptTokens, - out: completionTokens, - }, - model: data.model ?? model, - }; - } catch (error) { - if (error instanceof SynthesisError) { - throw error; - } + const usage = extractUsageTokens(payload); + const tokenIn = usage.in ?? estimateTokens(SYSTEM_PROMPT + userPrompt); + const tokenOut = usage.out ?? estimateTokens(answer); - if (error instanceof Error && error.name === "AbortError") { - throw new SynthesisError("Synthesis request timed out.", "timeout"); - } + return { + answer, + confidence, + tokens: { + in: tokenIn, + out: tokenOut, + }, + model: typeof payload.model === "string" ? payload.model : model, + }; +} - throw new SynthesisError( - error instanceof Error ? error.message : "Unknown synthesis network error.", - "network_error", - ); - } finally { - clearTimeout(timeout); - if (options.signal && externalAbortHandler) { - options.signal.removeEventListener("abort", externalAbortHandler); - } - } -} \ No newline at end of file +export const synthesize = synthesizeAnswer; +export default synthesizeAnswer; \ No newline at end of file From c28febf9cb2419a90a70162621f46a18bb7e67a1 Mon Sep 17 00:00:00 2001 From: Jackson Date: Wed, 4 Mar 2026 16:55:42 +0000 Subject: [PATCH 13/13] feat: implement task 0x9a74dfdbbaaf87c2d7 via TaskMarket agent #24583 --- IMPLEMENTATION.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 IMPLEMENTATION.md diff --git a/IMPLEMENTATION.md b/IMPLEMENTATION.md new file mode 100644 index 0000000..e69de29