diff --git a/IMPLEMENTATION.md b/IMPLEMENTATION.md new file mode 100644 index 0000000..e69de29 diff --git a/src/logic/__tests__/brave.test.ts b/src/logic/__tests__/brave.test.ts new file mode 100644 index 0000000..06023d9 --- /dev/null +++ b/src/logic/__tests__/brave.test.ts @@ -0,0 +1,86 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { BraveApiError, normalizeBraveResponse, searchBrave } from "../brave"; + +const originalFetch = globalThis.fetch; + +describe("brave.ts", () => { + beforeEach(() => { + process.env.BRAVE_API_KEY = "test-key"; + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + delete process.env.BRAVE_API_KEY; + }); + + it("normalises Brave response into SearchResult[]", async () => { + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + web: { + results: [ + { + title: "First result", + url: "https://news.example.com/story?utm=1", + description: "This is the first test result description.", + page_age: "2026-02-28T08:00:00Z", + meta_url: { hostname: "news.example.com" }, + }, + { + title: "Second result", + url: "https://blog.example.org/post", + description: "This is the second test result description.", + }, + ], + }, + }), + { status: 200 }, + )) as typeof fetch; + + const results = await searchBrave("queryx", { + endpoint: "https://github.com/langoustine69/queryx", + }); + + expect(results).toHaveLength(2); + expect(results[0].title).toBe("First result"); + expect(results[0].domain).toBe("news.example.com"); + expect(results[0].publishedAt).toBe("2026-02-28T08:00:00.000Z"); + expect(results[1].domain).toBe("blog.example.org"); + }); + + it("throws typed rate-limit errors", async () => { + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + error: { + message: "Rate limited", + code: "too_many_requests", + }, + }), + { + status: 429, + headers: { + "retry-after": "9", + }, + }, + )) as typeof fetch; + + try { + await searchBrave("queryx", { + endpoint: "https://github.com/langoustine69/queryx", + }); + throw new Error("Expected searchBrave to throw"); + } catch (error) { + expect(error).toBeInstanceOf(BraveApiError); + const typed = error as BraveApiError; + expect(typed.status).toBe(429); + expect(typed.code).toBe("RATE_LIMITED"); + expect(typed.retryAfterSeconds).toBe(9); + } + }); + + it("handles malformed payloads safely", () => { + const results = normalizeBraveResponse({ web: { results: "not-an-array" } }); + expect(results).toEqual([]); + }); +}); \ No newline at end of file diff --git a/src/logic/brave.test.ts b/src/logic/brave.test.ts new file mode 100644 index 0000000..46e545a --- /dev/null +++ b/src/logic/brave.test.ts @@ -0,0 +1,74 @@ +import { afterEach, describe, expect, it } from "bun:test"; +import { + BraveApiError, + BraveRateLimitError, + normalizeBraveResponse, + searchBrave, +} from "./brave"; + +const originalFetch = globalThis.fetch; + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("brave.ts", () => { + it("normalizes Brave response into SearchResult[]", () => { + const results = normalizeBraveResponse({ + web: { + results: [ + { + title: "Example Title", + url: "https://www.example.com/path", + description: "Example description", + profile: { name: "Example Source" }, + published_at: "2026-03-01T12:00:00Z", + }, + { + title: "Second Result", + url: "https://docs.example.org/page", + snippet: "Second snippet", + }, + ], + }, + }); + + expect(results.length).toBe(2); + expect(results[0]).toEqual({ + title: "Example Title", + url: "https://www.example.com/path", + snippet: "Example description", + domain: "example.com", + source: "Example Source", + publishedAt: "2026-03-01T12:00:00.000Z", + }); + expect(results[1].domain).toBe("docs.example.org"); + }); + + it("throws BraveRateLimitError on 429", async () => { + globalThis.fetch = (async () => + new Response(JSON.stringify({ error: "too many requests" }), { + status: 429, + headers: { + "content-type": "application/json", + "retry-after": "7", + }, + })) as typeof fetch; + + await expect( + searchBrave("test query", { apiKey: "brave_test_key" }) + ).rejects.toBeInstanceOf(BraveRateLimitError); + }); + + it("throws BraveApiError on non-429 error", async () => { + globalThis.fetch = (async () => + new Response(JSON.stringify({ message: "server error" }), { + status: 500, + headers: { "content-type": "application/json" }, + })) as typeof fetch; + + await expect( + searchBrave("test query", { apiKey: "brave_test_key" }) + ).rejects.toBeInstanceOf(BraveApiError); + }); +}); \ No newline at end of file diff --git a/src/logic/brave.ts b/src/logic/brave.ts index 69b4c70..15b44d8 100644 --- a/src/logic/brave.ts +++ b/src/logic/brave.ts @@ -1,127 +1,281 @@ -/** - * Brave Search API client. - * Wraps the Brave Web Search API and normalizes results. - */ +export type Freshness = "day" | "week" | "month"; export interface SearchResult { title: string; url: string; snippet: string; - published?: string; + source: string; + publishedDate?: string; + age?: string; score?: number; } export interface BraveSearchOptions { - freshness?: "day" | "week" | "month"; + freshness?: Freshness; count?: number; - type?: "web" | "news"; + offset?: number; + country?: string; + language?: string; + safeSearch?: "strict" | "moderate" | "off"; + signal?: AbortSignal; +} + +interface BraveWebResult { + title?: string; + url?: string; + description?: string; + extra_snippets?: string[]; + age?: string; + page_age?: string | { age?: string; last_modified?: string }; + score?: number; } export class BraveApiError extends Error { - constructor( - public statusCode: number, - message: string, - ) { + public readonly status: number; + public readonly details?: string; + + constructor(message: string, status: number, details?: string) { super(message); this.name = "BraveApiError"; + this.status = status; + this.details = details; } } export class BraveRateLimitError extends BraveApiError { - constructor() { - super(429, "Brave API rate limit exceeded"); + public readonly retryAfterSeconds?: number; + + constructor(message: string, retryAfterSeconds?: number, details?: string) { + super(message, 429, details); this.name = "BraveRateLimitError"; + this.retryAfterSeconds = retryAfterSeconds; + } +} + +const FRESHNESS_MAP: Record = { + day: "pd", + week: "pw", + month: "pm", +}; + +function clamp01(value: number): number { + if (Number.isNaN(value)) { + return 0; } + return Math.max(0, Math.min(1, value)); } -export class BraveAuthError extends BraveApiError { - constructor() { - super(401, "Invalid Brave API key"); - this.name = "BraveAuthError"; +function normalizeScore(value: unknown): number | undefined { + if (typeof value !== "number" || !Number.isFinite(value)) { + return undefined; + } + if (value >= 0 && value <= 1) { + return clamp01(value); } + if (value > 1 && value <= 100) { + return clamp01(value / 100); + } + return clamp01(value); } -const BRAVE_API_BASE = "https://api.search.brave.com/res/v1"; +function parseRetryAfter(raw: string | null): number | undefined { + if (!raw) { + return undefined; + } -export async function braveSearch( - query: string, - opts?: BraveSearchOptions, -): Promise { - const apiKey = process.env.BRAVE_API_KEY; - if (!apiKey) { - throw new BraveAuthError(); + const asInt = Number.parseInt(raw, 10); + if (Number.isFinite(asInt) && asInt >= 0) { + return asInt; } - const count = opts?.count ?? 10; - const searchType = opts?.type ?? "web"; - const endpoint = searchType === "news" ? "news/search" : "web/search"; + const dateMs = Date.parse(raw); + if (Number.isNaN(dateMs)) { + return undefined; + } - const params = new URLSearchParams({ - q: query, - count: String(count), - }); + const seconds = Math.ceil((dateMs - Date.now()) / 1000); + return seconds > 0 ? seconds : 0; +} - if (opts?.freshness) { - params.set("freshness", opts.freshness); +function extractDomain(rawUrl: string): string { + try { + return new URL(rawUrl).hostname.toLowerCase().replace(/^www\./, ""); + } catch { + return "unknown"; } +} - const url = `${BRAVE_API_BASE}/${endpoint}?${params}`; +function normalizePublishedDate(pageAge: BraveWebResult["page_age"]): string | undefined { + if (!pageAge) { + return undefined; + } - const response = await fetch(url, { - headers: { - Accept: "application/json", - "Accept-Encoding": "gzip", - "X-Subscription-Token": apiKey, - }, - }); + if (typeof pageAge === "object" && typeof pageAge.last_modified === "string") { + return pageAge.last_modified; + } - if (response.status === 429) throw new BraveRateLimitError(); - if (response.status === 401) throw new BraveAuthError(); - if (!response.ok) { - throw new BraveApiError( - response.status, - `Brave API error: ${response.status} ${response.statusText}`, - ); + if (typeof pageAge === "string") { + const parsed = Date.parse(pageAge); + if (!Number.isNaN(parsed)) { + return new Date(parsed).toISOString(); + } } - const body = await response.json(); + return undefined; +} - if (searchType === "news") { - return normalizeNewsResults(body); +function normalizeAge(raw: BraveWebResult): string | undefined { + if (typeof raw.age === "string" && raw.age.trim() !== "") { + return raw.age; } - return normalizeWebResults(body); + if (typeof raw.page_age === "object" && typeof raw.page_age.age === "string") { + return raw.page_age.age; + } + return undefined; } -function normalizeWebResults(body: any): SearchResult[] { - const results: SearchResult[] = []; - const webResults = body?.web?.results ?? []; +function normalizeSnippet(raw: BraveWebResult): string { + const chunks: string[] = []; + if (typeof raw.description === "string" && raw.description.trim() !== "") { + chunks.push(raw.description.trim()); + } + if (Array.isArray(raw.extra_snippets)) { + for (const snippet of raw.extra_snippets) { + if (typeof snippet === "string" && snippet.trim() !== "") { + chunks.push(snippet.trim()); + } + } + } + return chunks.join(" ").replace(/\s+/g, " ").trim(); +} - for (const r of webResults) { - results.push({ - title: r.title ?? "", - url: r.url ?? "", - snippet: r.description ?? "", - published: r.page_age ?? undefined, - score: r.relevance_score ?? undefined, - }); +function normalizeBraveResult(raw: BraveWebResult): SearchResult | null { + if (typeof raw.url !== "string" || raw.url.trim() === "") { + return null; + } + + const title = typeof raw.title === "string" ? raw.title.trim() : ""; + const snippet = normalizeSnippet(raw); + + if (title === "" && snippet === "") { + return null; } - return results; + return { + title, + url: raw.url, + snippet, + source: extractDomain(raw.url), + publishedDate: normalizePublishedDate(raw.page_age), + age: normalizeAge(raw), + score: normalizeScore(raw.score), + }; } -function normalizeNewsResults(body: any): SearchResult[] { - const results: SearchResult[] = []; - const newsResults = body?.results ?? []; +function buildRequestUrl(endpoint: string, params: URLSearchParams): string { + const separator = endpoint.includes("?") ? "&" : "?"; + return `${endpoint}${separator}${params.toString()}`; +} - for (const r of newsResults) { - results.push({ - title: r.title ?? "", - url: r.url ?? "", - snippet: r.description ?? "", - published: r.age ?? undefined, - score: r.relevance_score ?? undefined, - }); +async function safeReadText(response: Response): Promise { + try { + const text = await response.text(); + return text.trim() === "" ? undefined : text; + } catch { + return undefined; + } +} + +export async function searchBrave(query: string, options: BraveSearchOptions = {}): Promise { + const normalizedQuery = query.trim(); + if (normalizedQuery === "") { + return []; + } + + const endpoint = process.env.BRAVE_API_URL; + if (!endpoint) { + throw new Error("BRAVE_API_URL is not configured."); + } + + const apiKey = process.env.BRAVE_API_KEY; + if (!apiKey) { + throw new Error("BRAVE_API_KEY is not configured."); } - return results; + const params = new URLSearchParams(); + params.set("q", normalizedQuery); + + if (typeof options.count === "number" && Number.isFinite(options.count)) { + params.set("count", String(Math.max(1, Math.floor(options.count)))); + } + + if (typeof options.offset === "number" && Number.isFinite(options.offset)) { + params.set("offset", String(Math.max(0, Math.floor(options.offset)))); + } + + if (options.freshness) { + params.set("freshness", FRESHNESS_MAP[options.freshness]); + } + + if (options.country) { + params.set("country", options.country); + } + + if (options.language) { + params.set("search_lang", options.language); + } + + if (options.safeSearch) { + params.set("safesearch", options.safeSearch); + } + + const requestUrl = buildRequestUrl(endpoint, params); + + try { + const response = await fetch(requestUrl, { + method: "GET", + signal: options.signal, + headers: { + Accept: "application/json", + "X-Subscription-Token": apiKey, + }, + }); + + if (response.status === 429) { + const details = await safeReadText(response); + const retryAfterSeconds = parseRetryAfter(response.headers.get("retry-after")); + throw new BraveRateLimitError("Brave API rate limit exceeded.", retryAfterSeconds, details); + } + + if (!response.ok) { + const details = await safeReadText(response); + throw new BraveApiError(`Brave API request failed with status ${response.status}.`, response.status, details); + } + + let payload: unknown; + try { + payload = await response.json(); + } catch { + throw new BraveApiError("Brave API returned invalid JSON.", response.status); + } + + const rawResults = (payload as { web?: { results?: BraveWebResult[] } })?.web?.results; + if (!Array.isArray(rawResults)) { + return []; + } + + return rawResults + .map((item) => normalizeBraveResult(item)) + .filter((item): item is SearchResult => item !== null); + } catch (error) { + if (error instanceof BraveApiError) { + throw error; + } + + const message = error instanceof Error ? error.message : "Unknown fetch error"; + throw new BraveApiError(`Brave API network error: ${message}`, 0); + } } + +export const braveSearch = searchBrave; +export default searchBrave; \ No newline at end of file diff --git a/src/logic/cache.test.ts b/src/logic/cache.test.ts new file mode 100644 index 0000000..37e59b0 --- /dev/null +++ b/src/logic/cache.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, it } from "bun:test"; +import { QueryCache } from "./cache"; + +describe("cache.ts", () => { + it("expires entries based on TTL", () => { + let now = 0; + const cache = new QueryCache({ + ttlSeconds: 1, + now: () => now, + }); + + cache.set("Hello World", "value"); + expect(cache.get("hello world")).toBe("value"); + + now = 1001; + expect(cache.get("hello world")).toBeUndefined(); + }); + + it("tracks hit/miss stats and normalizes query keys", () => { + let now = 0; + const cache = new QueryCache({ + ttlSeconds: 60, + now: () => now, + }); + + expect(cache.get("missing key")).toBeUndefined(); // miss + cache.set(" Query X ", "cached"); + expect(cache.get("query x")).toBe("cached"); // hit + expect(cache.get("QUERY X")).toBe("cached"); // hit + + now = 61_000; + expect(cache.get("query x")).toBeUndefined(); // expired miss + + const stats = cache.stats(); + expect(stats.hits).toBe(2); + expect(stats.misses).toBe(2); + expect(stats.hitRate).toBe(0.5); + expect(stats.size).toBe(0); + }); +}); \ No newline at end of file diff --git a/src/logic/cache.ts b/src/logic/cache.ts index cee17f5..2095468 100644 --- a/src/logic/cache.ts +++ b/src/logic/cache.ts @@ -1,89 +1,109 @@ -/** - * In-memory cache with TTL. - * Reduces Brave API calls on repeated queries. - */ +import { createHash } from "node:crypto"; + +export interface CacheStats { + hits: number; + misses: number; + hitRate: number; + size: number; + ttlSeconds: number; +} interface CacheEntry { value: T; expiresAt: number; } -export interface CacheStats { - hits: number; - misses: number; - size: number; - hitRate: number; +function parseDefaultTtlSeconds(): number { + const raw = process.env.CACHE_TTL_SECONDS; + const parsed = raw ? Number(raw) : 300; + if (!Number.isFinite(parsed) || parsed <= 0) { + return 300; + } + return parsed; +} + +export function normalizeQuery(query: string): string { + return query.normalize("NFKC").trim().toLowerCase().replace(/\s+/g, " "); +} + +export function normalizedQueryHash(query: string): string { + const normalized = normalizeQuery(query); + return createHash("sha256").update(normalized).digest("hex"); } -export class Cache { - private store = new Map>(); +export class InMemoryCache { + private readonly entries = new Map>(); + private readonly now: () => number; + private readonly defaultTtlSeconds: number; private hits = 0; private misses = 0; - private ttlMs: number; - constructor(ttlSeconds?: number) { - this.ttlMs = (ttlSeconds ?? Number(process.env.CACHE_TTL_SECONDS) ?? 300) * 1000; + constructor(ttlSeconds: number = parseDefaultTtlSeconds(), now: () => number = () => Date.now()) { + this.defaultTtlSeconds = Number.isFinite(ttlSeconds) && ttlSeconds > 0 ? ttlSeconds : 300; + this.now = now; + } + + private purgeExpired(): void { + const now = this.now(); + for (const [key, entry] of this.entries) { + if (entry.expiresAt <= now) { + this.entries.delete(key); + } + } } - /** - * Normalize a query string into a stable cache key. - */ - static normalizeKey(query: string, params?: Record): string { - const normalized = query.toLowerCase().trim().replace(/\s+/g, " "); - if (!params || Object.keys(params).length === 0) return normalized; - const sorted = Object.entries(params) - .sort(([a], [b]) => a.localeCompare(b)) - .map(([k, v]) => `${k}=${v}`) - .join("&"); - return `${normalized}|${sorted}`; + private keyForQuery(query: string): string { + return normalizedQueryHash(query); } - get(key: string): { value: T; stale: boolean } | null { - const entry = this.store.get(key); + public get(query: string): T | undefined { + this.purgeExpired(); + const key = this.keyForQuery(query); + const entry = this.entries.get(key); + if (!entry) { - this.misses++; - return null; + this.misses += 1; + return undefined; } - const now = Date.now(); - if (now > entry.expiresAt) { - this.store.delete(key); - this.misses++; - return null; + if (entry.expiresAt <= this.now()) { + this.entries.delete(key); + this.misses += 1; + return undefined; } - this.hits++; - return { value: entry.value, stale: false }; + this.hits += 1; + return entry.value; } - set(key: string, value: T): void { - this.store.set(key, { - value, - expiresAt: Date.now() + this.ttlMs, - }); - } + public set(query: string, value: T, ttlSeconds?: number): void { + this.purgeExpired(); + const key = this.keyForQuery(query); + const effectiveTtl = Number.isFinite(ttlSeconds) && (ttlSeconds as number) > 0 ? (ttlSeconds as number) : this.defaultTtlSeconds; + const expiresAt = this.now() + effectiveTtl * 1000; - stats(): CacheStats { - // Clean expired entries - const now = Date.now(); - for (const [key, entry] of this.store) { - if (now > entry.expiresAt) { - this.store.delete(key); - } - } + this.entries.set(key, { value, expiresAt }); + } + public stats(): CacheStats { + this.purgeExpired(); const total = this.hits + this.misses; + return { hits: this.hits, misses: this.misses, - size: this.store.size, - hitRate: total > 0 ? this.hits / total : 0, + hitRate: total === 0 ? 0 : this.hits / total, + size: this.entries.size, + ttlSeconds: this.defaultTtlSeconds, }; } - clear(): void { - this.store.clear(); + public clear(): void { + this.entries.clear(); this.hits = 0; this.misses = 0; } } + +export const queryCache = new InMemoryCache(); +export default queryCache; \ No newline at end of file diff --git a/src/logic/index.ts b/src/logic/index.ts new file mode 100644 index 0000000..6351282 --- /dev/null +++ b/src/logic/index.ts @@ -0,0 +1,4 @@ +export * from "./brave"; +export * from "./synth"; +export * from "./cache"; +export * from "./rank"; \ No newline at end of file diff --git a/src/logic/rank.test.ts b/src/logic/rank.test.ts new file mode 100644 index 0000000..08042b2 --- /dev/null +++ b/src/logic/rank.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, it } from "bun:test"; +import { rankSearchResults } from "./rank"; +import type { SearchResult } from "./brave"; + +describe("rank.ts", () => { + it("deduplicates and enforces max 2 results per domain", () => { + const input: SearchResult[] = [ + { + title: "A complete guide to Queryx", + url: "https://example.com/guide?utm_source=test", + snippet: "Long and useful article about using Queryx effectively in production.", + domain: "example.com", + publishedAt: "2026-03-01T00:00:00Z", + }, + { + title: "A complete guide to Queryx", + url: "https://example.com/guide", + snippet: "Duplicate of the same article with tracking params removed.", + domain: "example.com", + publishedAt: "2026-03-01T00:00:00Z", + }, + { + title: "Queryx architecture deep dive", + url: "https://example.com/architecture", + snippet: "Detailed architecture decisions and implementation details.", + domain: "example.com", + publishedAt: "2026-02-20T00:00:00Z", + }, + { + title: "Queryx API quickstart", + url: "https://example.com/quickstart", + snippet: "Quickstart tutorial for integrating Queryx APIs.", + domain: "example.com", + publishedAt: "2026-02-15T00:00:00Z", + }, + { + title: "Independent review of Queryx", + url: "https://another.com/review", + snippet: "Third-party review with practical examples and benchmark notes.", + domain: "another.com", + publishedAt: "2026-02-28T00:00:00Z", + }, + ]; + + const ranked = rankSearchResults(input, { + maxPerDomain: 2, + maxResults: 10, + now: new Date("2026-03-04T00:00:00Z"), + }); + + const exampleCount = ranked.filter((r) => r.domain === "example.com").length; + expect(exampleCount).toBe(2); + + const urls = ranked.map((r) => r.url); + const uniqueUrls = new Set(urls); + expect(uniqueUrls.size).toBe(urls.length); + }); + + it("boosts newer content over older content", () => { + const input: SearchResult[] = [ + { + title: "Queryx release today", + url: "https://fresh.dev/queryx-release", + snippet: "Today's update includes major improvements and migration notes.", + domain: "fresh.dev", + publishedAt: "2026-03-03T12:00:00Z", + }, + { + title: "Queryx release notes archive", + url: "https://old.dev/queryx-archive", + snippet: "Historical notes from previous years and legacy behavior.", + domain: "old.dev", + publishedAt: "2022-01-01T00:00:00Z", + }, + ]; + + const ranked = rankSearchResults(input, { + maxResults: 10, + now: new Date("2026-03-04T00:00:00Z"), + }); + + expect(ranked[0]?.url).toBe("https://fresh.dev/queryx-release"); + }); + + it("filters low-quality results", () => { + const input: SearchResult[] = [ + { + title: "ok", + url: "https://spam.dev/a", + snippet: "tiny", + domain: "spam.dev", + }, + { + title: "Useful Queryx guide for developers", + url: "https://good.dev/guide", + snippet: + "This guide explains setup, ranking behavior, and synthesis output patterns in depth.", + domain: "good.dev", + }, + ]; + + const ranked = rankSearchResults(input, { maxResults: 10 }); + expect(ranked.length).toBe(1); + expect(ranked[0].domain).toBe("good.dev"); + }); +}); \ No newline at end of file diff --git a/src/logic/rank.ts b/src/logic/rank.ts index 371b20c..ba5a3e5 100644 --- a/src/logic/rank.ts +++ b/src/logic/rank.ts @@ -1,107 +1,213 @@ -/** - * Source ranking and deduplication. - * Deduplicates by domain, boosts recency, filters low quality. - */ - import type { SearchResult } from "./brave"; -const SUSPICIOUS_DOMAINS = new Set([ - "pinterest.com", - "quora.com", - "slideshare.net", - "scribd.com", -]); +export interface RankedSearchResult extends SearchResult { + domain: string; + rankScore: number; + canonicalUrl: string; +} + +export interface RankOptions { + maxPerDomain?: number; + limit?: number; + now?: Date; +} + +const TRACKING_PARAM_NAMES = new Set(["fbclid", "gclid", "mc_cid", "mc_eid", "igshid", "ref", "ref_src"]); +const LOW_QUALITY_DOMAIN_FRAGMENTS = ["pinterest.", "quora.", "reddit."]; -const MAX_PER_DOMAIN = 2; +function clamp01(value: number): number { + if (!Number.isFinite(value)) { + return 0; + } + return Math.max(0, Math.min(1, value)); +} -function extractDomain(url: string): string { +function extractDomain(rawUrl: string): string { try { - const hostname = new URL(url).hostname; - // Strip www. - return hostname.replace(/^www\./, ""); + return new URL(rawUrl).hostname.toLowerCase().replace(/^www\./, ""); } catch { - return url; + return "unknown"; } } -function recencyScore(published?: string): number { - if (!published) return 0; - +function canonicalizeUrl(rawUrl: string): string { try { - const pubDate = new Date(published); - const now = new Date(); - const ageMs = now.getTime() - pubDate.getTime(); - const ageDays = ageMs / (1000 * 60 * 60 * 24); + const url = new URL(rawUrl); + url.hash = ""; - // More recent = higher score (max 1.0 for today, decays over 90 days) - return Math.max(0, 1 - ageDays / 90); + for (const key of [...url.searchParams.keys()]) { + if (key.startsWith("utm_") || TRACKING_PARAM_NAMES.has(key)) { + url.searchParams.delete(key); + } + } + + if (url.pathname.length > 1) { + url.pathname = url.pathname.replace(/\/+$/, ""); + } + + const normalized = url.toString(); + return normalized.endsWith("/") ? normalized.slice(0, -1) : normalized; } catch { - return 0; + return rawUrl.trim().toLowerCase(); } } -function qualityScore(result: SearchResult): number { - let score = 0; +function parseRelativeAgeToDays(age: string): number | undefined { + const match = age.toLowerCase().match(/(\d+)\s*(minute|hour|day|week|month|year)/); + if (!match) { + return undefined; + } - // Has snippet (essential) - if (result.snippet && result.snippet.length > 20) { - score += 0.4; - } else { - return 0; // No snippet = filtered out + const value = Number.parseInt(match[1], 10); + if (!Number.isFinite(value) || value < 0) { + return undefined; } - // Snippet length bonus - score += Math.min(result.snippet.length / 500, 0.2); + const unit = match[2]; + switch (unit) { + case "minute": + return value / (24 * 60); + case "hour": + return value / 24; + case "day": + return value; + case "week": + return value * 7; + case "month": + return value * 30; + case "year": + return value * 365; + default: + return undefined; + } +} - // Has title - if (result.title && result.title.length > 5) { - score += 0.1; +function recencyBoost(result: SearchResult, nowMs: number): number { + if (result.publishedDate) { + const publishedMs = Date.parse(result.publishedDate); + if (!Number.isNaN(publishedMs) && publishedMs <= nowMs) { + const ageDays = (nowMs - publishedMs) / (1000 * 60 * 60 * 24); + if (ageDays <= 1) return 0.25; + if (ageDays <= 7) return 0.2; + if (ageDays <= 30) return 0.12; + if (ageDays <= 180) return 0.06; + if (ageDays <= 365) return 0.03; + return 0; + } } - // Recency - score += recencyScore(result.published) * 0.2; + if (result.age) { + const ageDays = parseRelativeAgeToDays(result.age); + if (typeof ageDays === "number") { + if (ageDays <= 1) return 0.22; + if (ageDays <= 7) return 0.18; + if (ageDays <= 30) return 0.1; + if (ageDays <= 180) return 0.05; + } + } - // Existing relevance score from Brave - if (result.score) { - score += Math.min(result.score, 0.1); + return 0; +} + +function qualityScore(result: SearchResult): number { + const title = result.title.trim(); + const snippet = result.snippet.trim(); + const domain = extractDomain(result.url); + + let score = 0; + + if (title.length >= 12) score += 0.3; + else if (title.length >= 6) score += 0.16; + + if (snippet.length >= 60) score += 0.34; + else if (snippet.length >= 25) score += 0.2; + + if (/^https:\/\//i.test(result.url)) score += 0.1; + + const isLowQualityDomain = LOW_QUALITY_DOMAIN_FRAGMENTS.some((fragment) => domain.includes(fragment)); + if (!isLowQualityDomain) score += 0.12; + else score -= 0.2; + + const snippetWords = snippet.split(/\s+/).filter(Boolean).length; + if (snippetWords >= 10) score += 0.1; + + if (typeof result.score === "number") { + score += clamp01(result.score) * 0.14; } - return score; + return clamp01(score); } -export function rank( - results: SearchResult[], - topN?: number, -): SearchResult[] { - const n = topN ?? 10; - - // Filter suspicious domains and low quality - const filtered = results.filter((r) => { - const domain = extractDomain(r.url); - if (SUSPICIOUS_DOMAINS.has(domain)) return false; - if (!r.snippet || r.snippet.length < 20) return false; +function isLowQuality(result: SearchResult): boolean { + const quality = qualityScore(result); + if (quality < 0.35) { return true; - }); + } + + const lowerUrl = result.url.toLowerCase(); + if (lowerUrl.includes("/search?") || lowerUrl.includes("/tag/") || lowerUrl.includes("/login")) { + return true; + } + + return false; +} + +function computeRankScore(result: SearchResult, nowMs: number): number { + const quality = qualityScore(result); + const recency = recencyBoost(result, nowMs); + return clamp01(quality + recency); +} - // Score and sort - const scored = filtered.map((r) => ({ - result: r, - score: qualityScore(r), - })); - scored.sort((a, b) => b.score - a.score); +export function rankAndDedup(results: SearchResult[], options: RankOptions = {}): RankedSearchResult[] { + const nowMs = (options.now ?? new Date()).getTime(); + const maxPerDomain = options.maxPerDomain ?? 2; + const limit = options.limit ?? Number.MAX_SAFE_INTEGER; - // Deduplicate by domain (max 2 per domain) - const domainCount = new Map(); - const deduped: SearchResult[] = []; + const dedupMap = new Map(); - for (const { result } of scored) { + for (const result of results) { + if (isLowQuality(result)) { + continue; + } + + const canonicalUrl = canonicalizeUrl(result.url); const domain = extractDomain(result.url); - const count = domainCount.get(domain) ?? 0; - if (count >= MAX_PER_DOMAIN) continue; - domainCount.set(domain, count + 1); - deduped.push(result); - if (deduped.length >= n) break; + const rankScore = computeRankScore(result, nowMs); + + const ranked: RankedSearchResult = { + ...result, + canonicalUrl, + domain, + rankScore, + }; + + const existing = dedupMap.get(canonicalUrl); + if (!existing || ranked.rankScore > existing.rankScore) { + dedupMap.set(canonicalUrl, ranked); + } } - return deduped; + const sorted = [...dedupMap.values()].sort((a, b) => b.rankScore - a.rankScore); + + const output: RankedSearchResult[] = []; + const domainCounts = new Map(); + + for (const item of sorted) { + const currentCount = domainCounts.get(item.domain) ?? 0; + if (currentCount >= maxPerDomain) { + continue; + } + + output.push(item); + domainCounts.set(item.domain, currentCount + 1); + + if (output.length >= limit) { + break; + } + } + + return output; } + +export const rankResults = rankAndDedup; +export default rankAndDedup; \ No newline at end of file diff --git a/src/logic/synth.test.ts b/src/logic/synth.test.ts new file mode 100644 index 0000000..105cb92 --- /dev/null +++ b/src/logic/synth.test.ts @@ -0,0 +1,81 @@ +import { afterEach, describe, expect, it } from "bun:test"; +import { synthesizeAnswer } from "./synth"; +import type { SearchResult } from "./brave"; + +const originalFetch = globalThis.fetch; + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +const sampleResults: SearchResult[] = [ + { + title: "Queryx release notes", + url: "https://example.com/release", + snippet: "Queryx ships improvements to ranking and synthesis.", + domain: "example.com", + publishedAt: "2026-03-02T00:00:00Z", + }, +]; + +describe("synth.ts", () => { + it("clamps confidence to 0-1 and uses API token counts", async () => { + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: JSON.stringify({ + answer: "Queryx added ranking and caching updates.", + confidence: 1.7, + }), + }, + }, + ], + usage: { + prompt_tokens: 42, + completion_tokens: 13, + }, + }), + { status: 200, headers: { "content-type": "application/json" } } + )) as typeof fetch; + + const output = await synthesizeAnswer("What changed in Queryx?", sampleResults, { + apiKey: "openai_test_key", + }); + + expect(output.confidence).toBe(1); + expect(output.tokens.in).toBe(42); + expect(output.tokens.out).toBe(13); + expect(output.model).toBe("gpt-4o-mini"); + }); + + it("falls back to estimated tokens when usage is absent", async () => { + globalThis.fetch = (async () => + new Response( + JSON.stringify({ + choices: [ + { + message: { + content: JSON.stringify({ + answer: "No definitive update found.", + confidence: -2, + }), + }, + }, + ], + }), + { status: 200, headers: { "content-type": "application/json" } } + )) as typeof fetch; + + const output = await synthesizeAnswer("Unknown query", sampleResults, { + apiKey: "openai_test_key", + }); + + expect(output.confidence).toBe(0); + expect(output.tokens.in).toBeGreaterThan(0); + expect(output.tokens.out).toBeGreaterThan(0); + }); +}); \ No newline at end of file diff --git a/src/logic/synth.ts b/src/logic/synth.ts index 63e006f..9f9fbdc 100644 --- a/src/logic/synth.ts +++ b/src/logic/synth.ts @@ -1,118 +1,281 @@ -/** - * GPT-4o-mini synthesis layer. - * Takes query + search results, produces a concise answer with confidence scoring. - */ - import type { SearchResult } from "./brave"; -export interface SynthResult { +export interface SynthesisTokens { + in: number; + out: number; +} + +export interface SynthesisResult { answer: string; confidence: number; - tokens: { in: number; out: number }; + tokens: SynthesisTokens; model: string; } -const SYSTEM_PROMPT = `You are a search synthesis engine for AI agents. Given a query and search results, produce a concise, factual answer. +export interface SynthesisOptions { + model?: string; + temperature?: number; + maxOutputTokens?: number; + signal?: AbortSignal; +} + +export class SynthesisError extends Error { + public readonly status?: number; + public readonly details?: string; + + constructor(message: string, status?: number, details?: string) { + super(message); + this.name = "SynthesisError"; + this.status = status; + this.details = details; + } +} + +const DEFAULT_MODEL = "gpt-4o-mini"; + +const SYSTEM_PROMPT = + "You are a synthesis engine for downstream agents. Return only JSON with shape " + + '{"answer":"string","confidence":0..1}. ' + + "Use concise factual prose, include caveats only when required, and avoid markdown."; + +function clamp01(value: number): number { + if (!Number.isFinite(value)) { + return 0; + } + return Math.max(0, Math.min(1, value)); +} + +function estimateTokens(text: string): number { + const normalized = text.trim(); + if (normalized === "") { + return 0; + } + return Math.max(1, Math.ceil(normalized.length / 4)); +} + +function numberOrUndefined(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + +function buildSourcesPayload(results: SearchResult[]): Array> { + return results.slice(0, 10).map((item) => ({ + title: item.title, + url: item.url, + snippet: item.snippet, + source: item.source, + publishedDate: item.publishedDate ?? "", + })); +} + +function buildUserPrompt(query: string, results: SearchResult[]): string { + const payload = { + query, + sources: buildSourcesPayload(results), + instructions: [ + "Synthesize only from provided sources.", + "If evidence is weak, lower confidence.", + "Avoid hedging unless uncertainty is material.", + ], + }; + + return JSON.stringify(payload); +} -Rules: -- Be direct and factual. No filler, no hedging. -- Cite information from the provided sources. -- If sources disagree, note the disagreement. -- If sources are insufficient, say so clearly. -- Keep answers under 300 words unless the query demands more. -- Use ISO 8601 dates when referencing time.`; +function extractJsonObject(text: string): Record | null { + const trimmed = text.trim(); + const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i); + const candidate = (fencedMatch?.[1] ?? trimmed).trim(); + const start = candidate.indexOf("{"); + const end = candidate.lastIndexOf("}"); -function buildUserPrompt(query: string, sources: SearchResult[]): string { - const sourceBlock = sources - .map( - (s, i) => - `[${i + 1}] ${s.title}\n${s.url}\n${s.snippet}${s.published ? `\nPublished: ${s.published}` : ""}`, - ) - .join("\n\n"); + if (start < 0 || end <= start) { + return null; + } - return `Query: ${query}\n\nSources:\n${sourceBlock}\n\nSynthesize an answer from these sources.`; + const jsonCandidate = candidate.slice(start, end + 1); + try { + const parsed = JSON.parse(jsonCandidate); + if (parsed && typeof parsed === "object") { + return parsed as Record; + } + return null; + } catch { + return null; + } } -export function scoreConfidence(sources: SearchResult[]): number { - if (sources.length === 0) return 0; +function getAssistantText(payload: Record): string { + if (typeof payload.output_text === "string") { + return payload.output_text; + } + + const choices = payload.choices; + if (Array.isArray(choices) && choices.length > 0) { + const first = choices[0] as { message?: { content?: unknown } }; + const content = first?.message?.content; - let score = 0; + if (typeof content === "string") { + return content; + } - // Base score from number of sources (max 0.4) - score += Math.min(sources.length / 10, 0.4); + if (Array.isArray(content)) { + const parts = content + .map((part) => { + if (typeof part === "string") { + return part; + } + if (part && typeof part === "object" && "text" in part) { + const text = (part as { text?: unknown }).text; + return typeof text === "string" ? text : ""; + } + return ""; + }) + .filter(Boolean); - // Snippet quality (max 0.3) - const avgSnippetLen = - sources.reduce((sum, s) => sum + (s.snippet?.length ?? 0), 0) / - sources.length; - score += Math.min(avgSnippetLen / 500, 0.3); + return parts.join("\n").trim(); + } + } + + return ""; +} - // Source agreement - check overlap in snippets (max 0.2) - if (sources.length >= 2) { - const words0 = new Set( - (sources[0].snippet ?? "").toLowerCase().split(/\s+/), - ); - const words1 = new Set( - (sources[1].snippet ?? "").toLowerCase().split(/\s+/), - ); - const overlap = [...words0].filter((w) => words1.has(w)).length; - const overlapRatio = overlap / Math.max(words0.size, 1); - score += Math.min(overlapRatio, 0.2); +function parseAnswerAndConfidence(rawContent: string): { answer: string; confidence?: number } { + const json = extractJsonObject(rawContent); + if (!json) { + return { answer: rawContent.trim() }; } - // Recency bonus (max 0.1) - const hasPublished = sources.some((s) => s.published); - if (hasPublished) score += 0.1; + const answer = + typeof json.answer === "string" + ? json.answer.trim() + : typeof json.final_answer === "string" + ? json.final_answer.trim() + : rawContent.trim(); - return Math.min(Math.max(score, 0), 1); + const confidence = numberOrUndefined(json.confidence); + return { answer, confidence }; } -export async function synthesise( +function heuristicConfidence(query: string, results: SearchResult[], answer: string): number { + let score = 0.3; + score += Math.min(results.length, 8) * 0.07; + + if (query.trim().length > 20) { + score += 0.05; + } + + if (answer.trim().length >= 80) { + score += 0.08; + } + + if (results.length <= 1) { + score -= 0.12; + } + + return clamp01(score); +} + +function extractUsageTokens(payload: Record): { in?: number; out?: number } { + const usage = payload.usage as Record | undefined; + if (!usage) { + return {}; + } + + const inTokens = numberOrUndefined(usage.prompt_tokens) ?? numberOrUndefined(usage.input_tokens); + const outTokens = numberOrUndefined(usage.completion_tokens) ?? numberOrUndefined(usage.output_tokens); + + return { + in: inTokens, + out: outTokens, + }; +} + +async function safeReadText(response: Response): Promise { + try { + const text = await response.text(); + return text.trim() === "" ? undefined : text; + } catch { + return undefined; + } +} + +export async function synthesizeAnswer( query: string, - sources: SearchResult[], -): Promise { + results: SearchResult[], + options: SynthesisOptions = {}, +): Promise { + const endpoint = process.env.OPENAI_API_URL; + if (!endpoint) { + throw new SynthesisError("OPENAI_API_URL is not configured."); + } + const apiKey = process.env.OPENAI_API_KEY; if (!apiKey) { - throw new Error("OPENAI_API_KEY is required"); + throw new SynthesisError("OPENAI_API_KEY is not configured."); } - const model = "gpt-4o-mini"; - const userPrompt = buildUserPrompt(query, sources); + const model = options.model ?? DEFAULT_MODEL; + const userPrompt = buildUserPrompt(query, results); - const response = await fetch("https://api.openai.com/v1/chat/completions", { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - }, - body: JSON.stringify({ - model, - messages: [ - { role: "system", content: SYSTEM_PROMPT }, - { role: "user", content: userPrompt }, - ], - temperature: 0.3, - max_tokens: 800, - }), - }); + const requestBody = { + model, + temperature: options.temperature ?? 0.2, + max_tokens: options.maxOutputTokens ?? 350, + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: SYSTEM_PROMPT }, + { role: "user", content: userPrompt }, + ], + }; + + let response: Response; + try { + response = await fetch(endpoint, { + method: "POST", + signal: options.signal, + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify(requestBody), + }); + } catch (error) { + const message = error instanceof Error ? error.message : "Unknown fetch error"; + throw new SynthesisError(`Synthesis request failed: ${message}`); + } if (!response.ok) { - throw new Error( - `OpenAI API error: ${response.status} ${response.statusText}`, - ); + const details = await safeReadText(response); + throw new SynthesisError(`Synthesis API request failed with status ${response.status}.`, response.status, details); + } + + let payload: Record; + try { + payload = (await response.json()) as Record; + } catch { + throw new SynthesisError("Synthesis API returned invalid JSON.", response.status); } - const body = await response.json(); - const choice = body.choices?.[0]; - const usage = body.usage ?? {}; + const rawContent = getAssistantText(payload).trim(); + const parsed = parseAnswerAndConfidence(rawContent); + + const answer = parsed.answer || "No answer generated."; + const confidence = clamp01(parsed.confidence ?? heuristicConfidence(query, results, answer)); + + const usage = extractUsageTokens(payload); + const tokenIn = usage.in ?? estimateTokens(SYSTEM_PROMPT + userPrompt); + const tokenOut = usage.out ?? estimateTokens(answer); return { - answer: choice?.message?.content ?? "No answer generated.", - confidence: scoreConfidence(sources), + answer, + confidence, tokens: { - in: usage.prompt_tokens ?? 0, - out: usage.completion_tokens ?? 0, + in: tokenIn, + out: tokenOut, }, - model, + model: typeof payload.model === "string" ? payload.model : model, }; } + +export const synthesize = synthesizeAnswer; +export default synthesizeAnswer; \ No newline at end of file diff --git a/src/logic/types.ts b/src/logic/types.ts new file mode 100644 index 0000000..bcf6a95 --- /dev/null +++ b/src/logic/types.ts @@ -0,0 +1,11 @@ +export type Freshness = "day" | "week" | "month"; + +export interface SearchResult { + title: string; + url: string; + description: string; + domain: string; + publishedAt?: string; + score?: number; + source?: string; +} \ No newline at end of file diff --git a/test/brave.test.ts b/test/brave.test.ts new file mode 100644 index 0000000..18a7861 --- /dev/null +++ b/test/brave.test.ts @@ -0,0 +1,77 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import { BraveClientError, searchBrave } from "../src/logic/brave"; + +const originalFetch = globalThis.fetch; + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("brave.ts", () => { + test("normalises Brave response into SearchResult[] and maps freshness", async () => { + let capturedUrl = ""; + + globalThis.fetch = (async (input: RequestInfo | URL) => { + capturedUrl = typeof input === "string" ? input : input.toString(); + + return new Response( + JSON.stringify({ + web: { + results: [ + { + title: "Queryx Launch", + url: "https://example.com/post", + description: "Queryx ships fast and clean results.", + page_age: "2026-03-01T12:00:00.000Z", + language: "en", + }, + { + title: "Invalid URL result", + url: "not-a-url", + description: "This should be filtered out", + }, + ], + }, + }), + { status: 200, headers: { "content-type": "application/json" } }, + ); + }) as typeof fetch; + + const results = await searchBrave(" Queryx ", { + apiKey: "test-key", + freshness: "week", + count: 5, + }); + + expect(capturedUrl.includes("freshness=pw")).toBe(true); + expect(results.length).toBe(1); + expect(results[0]?.title).toBe("Queryx Launch"); + expect(results[0]?.sourceDomain).toBe("example.com"); + expect(results[0]?.publishedAt).toBe("2026-03-01T12:00:00.000Z"); + }); + + test("throws rate limit error with retry-after", async () => { + globalThis.fetch = (async () => { + return new Response(JSON.stringify({ error: { detail: "Rate limit exceeded" } }), { + status: 429, + headers: { + "content-type": "application/json", + "retry-after": "12", + }, + }); + }) as typeof fetch; + + let thrown: unknown; + try { + await searchBrave("queryx", { apiKey: "test-key" }); + } catch (error) { + thrown = error; + } + + expect(thrown instanceof BraveClientError).toBe(true); + const err = thrown as BraveClientError; + expect(err.code).toBe("rate_limited"); + expect(err.status).toBe(429); + expect(err.retryAfterSeconds).toBe(12); + }); +}); \ No newline at end of file diff --git a/test/cache.test.ts b/test/cache.test.ts new file mode 100644 index 0000000..027b47f --- /dev/null +++ b/test/cache.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, test } from "bun:test"; +import { QueryCache } from "../src/logic/cache"; + +describe("cache.ts", () => { + test("expires entries after TTL", async () => { + const cache = new QueryCache({ ttlSeconds: 0.05 }); + + cache.set(" Queryx API ", "cached"); + expect(cache.get("queryx api")).toBe("cached"); + + await new Promise((resolve) => setTimeout(resolve, 70)); + + expect(cache.get("queryx api")).toBeUndefined(); + }); + + test("tracks hit/miss stats", () => { + const cache = new QueryCache({ ttlSeconds: 30 }); + + cache.set("a", 1); + cache.get("a"); // hit + cache.get("b"); // miss + + const stats = cache.stats(); + + expect(stats.hits).toBe(1); + expect(stats.misses).toBe(1); + expect(stats.hitRate).toBe(0.5); + expect(stats.size).toBe(1); + }); +}); \ No newline at end of file diff --git a/test/logic/brave.test.ts b/test/logic/brave.test.ts new file mode 100644 index 0000000..ed0106d --- /dev/null +++ b/test/logic/brave.test.ts @@ -0,0 +1,83 @@ +import { afterEach, describe, expect, it } from "bun:test"; +import { BraveApiError, searchBrave } from "../../src/logic/brave"; + +const originalFetch = globalThis.fetch; + +function mockFetch( + handler: (input: RequestInfo | URL, init?: RequestInit) => Promise | Response +): void { + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + return handler(input, init); + }) as typeof fetch; +} + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("logic/brave", () => { + it("normalises Brave response into SearchResult[] and passes freshness param", async () => { + let calledUrl = ""; + + mockFetch((input) => { + calledUrl = typeof input === "string" ? input : input.toString(); + return new Response( + JSON.stringify({ + web: { + results: [ + { + title: "Queryx launch notes", + url: "https://www.example.com/blog/queryx", + description: "Queryx data layer is now live.", + page_age: "2026-03-03T12:00:00Z" + } + ] + } + }), + { status: 200, headers: { "content-type": "application/json" } } + ); + }); + + const results = await searchBrave("queryx", { + apiKey: "test-api-key", + freshness: "week" + }); + + expect(results).toHaveLength(1); + expect(results[0]).toMatchObject({ + title: "Queryx launch notes", + url: "https://www.example.com/blog/queryx", + snippet: "Queryx data layer is now live.", + domain: "example.com", + source: "brave" + }); + expect(results[0].publishedAt).toBe("2026-03-03T12:00:00.000Z"); + + const url = new URL(calledUrl); + expect(url.searchParams.get("freshness")).toBe("pw"); + expect(url.searchParams.get("q")).toBe("queryx"); + }); + + it("throws BraveApiError on rate limit with retry metadata", async () => { + mockFetch(() => { + return new Response(JSON.stringify({ error: { message: "Too many requests" } }), { + status: 429, + headers: { + "content-type": "application/json", + "retry-after": "15" + } + }); + }); + + try { + await searchBrave("queryx", { apiKey: "test-api-key" }); + throw new Error("Expected searchBrave to throw"); + } catch (error) { + expect(error).toBeInstanceOf(BraveApiError); + const typed = error as BraveApiError; + expect(typed.status).toBe(429); + expect(typed.code).toBe("RATE_LIMITED"); + expect(typed.retryAfterSeconds).toBe(15); + } + }); +}); \ No newline at end of file diff --git a/test/logic/cache.test.ts b/test/logic/cache.test.ts new file mode 100644 index 0000000..ff9b0cf --- /dev/null +++ b/test/logic/cache.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from "bun:test"; +import { InMemoryCache } from "../../src/logic/cache"; + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +describe("logic/cache", () => { + it("expires entries after TTL", async () => { + const cache = new InMemoryCache(0.02); // 20ms + cache.set("Queryx cache", "value"); + + expect(cache.get("Queryx cache")).toBe("value"); + await sleep(30); + expect(cache.get("Queryx cache")).toBeUndefined(); + + const s = cache.stats(); + expect(s.hits).toBe(1); + expect(s.misses).toBe(1); + }); + + it("tracks hit/miss stats and hit rate", () => { + const cache = new InMemoryCache(60); + + expect(cache.get("missing")).toBeUndefined(); // miss + cache.set("hello world", "ok"); + expect(cache.get("hello world")).toBe("ok"); // hit + + const s = cache.stats(); + expect(s.hits).toBe(1); + expect(s.misses).toBe(1); + expect(s.hitRate).toBe(0.5); + expect(s.size).toBe(1); + }); +}); \ No newline at end of file diff --git a/test/logic/synth.test.ts b/test/logic/synth.test.ts new file mode 100644 index 0000000..f28d7a7 --- /dev/null +++ b/test/logic/synth.test.ts @@ -0,0 +1,98 @@ +import { afterEach, describe, expect, it } from "bun:test"; +import type { SearchResult } from "../../src/logic/brave"; +import { synthesize } from "../../src/logic/synth"; + +const originalFetch = globalThis.fetch; + +function mockFetch( + handler: (input: RequestInfo | URL, init?: RequestInit) => Promise | Response +): void { + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + return handler(input, init); + }) as typeof fetch; +} + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("logic/synth", () => { + const sampleResults: SearchResult[] = [ + { + title: "Queryx architecture", + url: "https://docs.queryx.ai/architecture", + snippet: "Queryx uses Brave search retrieval, source ranking, and synthesis over curated snippets.", + domain: "docs.queryx.ai", + publishedAt: "2026-03-03T10:00:00.000Z", + source: "brave" + }, + { + title: "Queryx API changes", + url: "https://blog.queryx.ai/api-update", + snippet: "The latest update adds cache TTL controls and confidence-calibrated answer synthesis.", + domain: "blog.queryx.ai", + publishedAt: "2026-03-02T10:00:00.000Z", + source: "brave" + } + ]; + + it("clamps confidence and uses explicit token usage from API response", async () => { + mockFetch((_input, init) => { + expect(init?.method).toBe("POST"); + + return new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: "{\"answer\":\"Queryx combines search, ranking, caching, and concise synthesis.\",\"confidence\":1.7}" + } + } + ], + usage: { + prompt_tokens: 120, + completion_tokens: 32 + } + }), + { status: 200, headers: { "content-type": "application/json" } } + ); + }); + + const output = await synthesize("How does Queryx work?", sampleResults, { + apiKey: "test-openai-key" + }); + + expect(output.answer).toContain("Queryx"); + expect(output.confidence).toBe(1); + expect(output.tokens).toEqual({ in: 120, out: 32 }); + expect(output.model).toBe("gpt-4o-mini"); + }); + + it("estimates tokens when usage is absent and computes confidence within [0,1]", async () => { + mockFetch(() => { + return new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: "{\"answer\":\"Queryx aggregates multiple sources and returns concise factual output.\"}" + } + } + ] + }), + { status: 200, headers: { "content-type": "application/json" } } + ); + }); + + const output = await synthesize("Queryx summary", sampleResults, { + apiKey: "test-openai-key" + }); + + expect(output.tokens.in).toBeGreaterThan(0); + expect(output.tokens.out).toBeGreaterThan(0); + expect(output.confidence).toBeGreaterThanOrEqual(0); + expect(output.confidence).toBeLessThanOrEqual(1); + }); +}); \ No newline at end of file diff --git a/test/rank.test.ts b/test/rank.test.ts new file mode 100644 index 0000000..16c90f9 --- /dev/null +++ b/test/rank.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, test } from "bun:test"; +import type { SearchResult } from "../src/logic/brave"; +import { rankResults } from "../src/logic/rank"; + +describe("rank.ts", () => { + test("deduplicates to max 2 results per domain", () => { + const input: SearchResult[] = [ + { + title: "A1", + url: "https://a.com/1", + snippet: "Detailed explanation about queryx ranking system and internals.", + sourceDomain: "a.com", + publishedAt: "2026-03-03T00:00:00.000Z", + }, + { + title: "A2", + url: "https://a.com/2", + snippet: "Another detailed explanation about queryx ranking and scoring.", + sourceDomain: "a.com", + publishedAt: "2026-03-02T00:00:00.000Z", + }, + { + title: "A3", + url: "https://a.com/3", + snippet: "Third article on same domain with enough content to be valid.", + sourceDomain: "a.com", + publishedAt: "2026-03-01T00:00:00.000Z", + }, + { + title: "B1", + url: "https://b.com/1", + snippet: "Strong alternative source from another domain with good details.", + sourceDomain: "b.com", + publishedAt: "2026-03-01T00:00:00.000Z", + }, + ]; + + const ranked = rankResults(input, { maxPerDomain: 2, limit: 10, now: new Date("2026-03-04T00:00:00.000Z") }); + const aCount = ranked.filter((r) => r.sourceDomain === "a.com").length; + + expect(aCount).toBe(2); + }); + + test("applies recency boost", () => { + const input: SearchResult[] = [ + { + title: "Older source", + url: "https://old.com/post", + snippet: "Comprehensive but old content about queryx architecture and usage.", + sourceDomain: "old.com", + publishedAt: "2020-01-01T00:00:00.000Z", + }, + { + title: "Recent source", + url: "https://new.com/post", + snippet: "Comprehensive and recent content about queryx architecture and usage.", + sourceDomain: "new.com", + publishedAt: "2026-03-03T00:00:00.000Z", + }, + ]; + + const ranked = rankResults(input, { now: new Date("2026-03-04T00:00:00.000Z") }); + + expect(ranked[0]?.sourceDomain).toBe("new.com"); + expect(ranked[0]?.score > ranked[1]?.score).toBe(true); + }); +}); \ No newline at end of file diff --git a/test/synth.test.ts b/test/synth.test.ts new file mode 100644 index 0000000..418e6fb --- /dev/null +++ b/test/synth.test.ts @@ -0,0 +1,81 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import type { SearchResult } from "../src/logic/brave"; +import { synthesize } from "../src/logic/synth"; + +const originalFetch = globalThis.fetch; + +const sampleResults: SearchResult[] = [ + { + title: "Queryx documentation", + url: "https://docs.example.com/queryx", + snippet: "Queryx is a search API with ranking, synthesis, and cache support.", + sourceDomain: "docs.example.com", + publishedAt: "2026-03-01T00:00:00.000Z", + language: "en", + }, +]; + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("synth.ts", () => { + test("clamps confidence and returns token usage", async () => { + globalThis.fetch = (async () => { + return new Response( + JSON.stringify({ + model: "gpt-4o-mini-2026", + usage: { + prompt_tokens: 120, + completion_tokens: 45, + }, + choices: [ + { + message: { + content: JSON.stringify({ + answer: "Queryx combines retrieval and synthesis for concise responses.", + confidence: 1.7, + }), + }, + }, + ], + }), + { status: 200, headers: { "content-type": "application/json" } }, + ); + }) as typeof fetch; + + const result = await synthesize("What is Queryx?", sampleResults, { apiKey: "test-key" }); + + expect(result.confidence).toBe(1); + expect(result.tokens.in).toBe(120); + expect(result.tokens.out).toBe(45); + expect(result.model).toBe("gpt-4o-mini-2026"); + }); + + test("estimates token usage when upstream usage is missing", async () => { + globalThis.fetch = (async () => { + return new Response( + JSON.stringify({ + model: "gpt-4o-mini", + choices: [ + { + message: { + content: JSON.stringify({ + answer: "There is not enough information to fully answer this.", + confidence: -0.5, + }), + }, + }, + ], + }), + { status: 200, headers: { "content-type": "application/json" } }, + ); + }) as typeof fetch; + + const result = await synthesize("unknown", sampleResults, { apiKey: "test-key" }); + + expect(result.confidence).toBe(0); + expect(result.tokens.in > 0).toBe(true); + expect(result.tokens.out > 0).toBe(true); + }); +}); \ No newline at end of file