diff --git a/STRUCTURE.md b/STRUCTURE.md index 66be292905..b6fdc7154d 100644 --- a/STRUCTURE.md +++ b/STRUCTURE.md @@ -227,6 +227,7 @@ cloud/ │ │ └── globals.css │ ├── router.tsx │ └── routeTree.gen.ts # Auto-generated route tree +├── server-entry.ts # Worker entry (fetch + queue handlers) ├── api/ # Backend API handlers │ ├── api.ts │ ├── docs.handlers.ts # Documentation handlers diff --git a/cloud/server-entry.ts b/cloud/server-entry.ts new file mode 100644 index 0000000000..fe237fe8df --- /dev/null +++ b/cloud/server-entry.ts @@ -0,0 +1,13 @@ +import { + createStartHandler, + defaultStreamHandler, +} from "@tanstack/react-start/server"; +import clickhouseQueueConsumer from "@/workers/clickhouseQueueConsumer"; + +const fetch = createStartHandler(defaultStreamHandler); +const queue = clickhouseQueueConsumer.queue.bind(clickhouseQueueConsumer); + +export default { + fetch, + queue, +}; diff --git a/cloud/workers/clickhouseQueueConsumer.ts b/cloud/workers/clickhouseQueueConsumer.ts new file mode 100644 index 0000000000..de282cdfc2 --- /dev/null +++ b/cloud/workers/clickhouseQueueConsumer.ts @@ -0,0 +1,228 @@ +/** + * @fileoverview Cloudflare Queue Consumer for ClickHouse sync. + * + * Receives outbox events from Cloudflare Queue and syncs spans to ClickHouse. + * Uses outbox table lock/status management to prevent duplicate processing. + * + * ## Architecture + * + * ``` + * Cloudflare Queue (spans-outbox) + * └── Queue Consumer (this file) + * ├── DrizzleORM (PostgreSQL via Hyperdrive) + * └── ClickHouseLive (HTTP API) + * ``` + * + * ## Message Format + * + * ```json + * { "spanId": "uuid", "operation": "INSERT" } + * ``` + * + * ## Deduplication + * + * - Same spanId:operation may arrive multiple times in a batch + * - We deduplicate locally before processing + * - All duplicate messages are acked/retried together + */ + +import { Effect, Layer } from "effect"; +import { DrizzleORM } from "@/db/client"; +import { ClickHouseLive } from "@/clickhouse/client"; +import { + processOutboxMessages, + type OutboxMessage, +} from "@/workers/outboxProcessor"; +import { + SettingsService, + getSettingsFromEnvironment, + type CloudflareEnvironment, +} from "@/settings"; + +// ============================================================================= +// Types +// ============================================================================= + +/** + * Queue message body format. + * Kept minimal to avoid hitting Cloudflare Queue's 128KB limit. + */ +type QueueMessageBody = { + spanId: string; + operation: "INSERT" | "UPDATE" | "DELETE"; +}; + +/** + * Cloudflare Queue Message type (subset of Cloudflare Workers types). + */ +interface QueueMessage
{ + readonly id: string; + readonly body: Body; + ack(): void; + retry(): void; +} + +/** + * Cloudflare Queue MessageBatch type. + */ +interface MessageBatch { + readonly queue: string; + readonly messages: readonly QueueMessage[]; +} + +/** + * Extended Cloudflare environment bindings for Queue Consumer. + * + * Includes Hyperdrive for PostgreSQL connection from Workers. + */ +export interface QueueConsumerEnvironment extends CloudflareEnvironment { + /** Hyperdrive binding for PostgreSQL connection pooling */ + readonly HYPERDRIVE?: { + readonly connectionString: string; + }; + /** Direct database URL (fallback when Hyperdrive is not configured) */ + readonly DATABASE_URL?: string; +} + +// ============================================================================= +// Worker ID Generation +// ============================================================================= + +/** + * Generate a unique worker ID for lock identification. + * + * Format: workers-{queue}-{uuid8} + * - workers: Environment identifier (Cloudflare Workers) + * - queue: Queue name for context + * - uuid8: First 8 characters of a UUID for uniqueness + * + * Note: In Workers environment, os.hostname() and process.pid are not available. + * This format is consistent with clickhouseSyncLocal.ts but adapted for Workers. + */ +const generateWorkerId = (queueName: string): string => { + const uuid = crypto.randomUUID().slice(0, 8); + return `workers-${queueName}-${uuid}`; +}; + +// ============================================================================= +// Queue Consumer Handler +// ============================================================================= + +/** + * Cloudflare Queue Consumer export. + * + * Processes outbox messages in batches, syncing spans to ClickHouse. + * Uses Workers-compatible implementations for both PostgreSQL and ClickHouse. + */ +export default { + /** + * Queue handler entry point. + * + * @param batch - Batch of messages from Cloudflare Queue + * @param env - Cloudflare Workers environment bindings + */ + async queue( + batch: MessageBatch