Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ node_modules/
.DS_Store
coverage/
dist/
.tmp/
# Web SPA build output (built by build-web.ts)
packages/canonry/assets/web/
# Build-time copies of skills from repo root (copied by copy-agent-assets.ts)
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "canonry",
"private": true,
"version": "4.10.1",
"version": "4.11.0",
"type": "module",
"packageManager": "pnpm@10.28.2",
"scripts": {
Expand Down
2 changes: 1 addition & 1 deletion packages/canonry/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@ainyc/canonry",
"version": "4.10.1",
"version": "4.11.0",
"type": "module",
"description": "Agent-first open-source AEO operating platform - track how answer engines cite your domain",
"license": "FSL-1.1-ALv2",
Expand Down
1 change: 1 addition & 0 deletions packages/contracts/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ export * from './citations.js'
export * from './report.js'
export * from './report-dedup.js'
export * from './skills.js'
export * from './traffic.js'
70 changes: 70 additions & 0 deletions packages/contracts/src/traffic.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { z } from 'zod'

export const trafficSourceTypeSchema = z.enum([
'cloud-run',
'wordpress',
'cloudflare',
'vercel',
'generic-log',
])
export type TrafficSourceType = z.infer<typeof trafficSourceTypeSchema>
export const TrafficSourceTypes = trafficSourceTypeSchema.enum

export const trafficAdapterCapabilitySchema = z.enum([
'raw-request-events',
'aggregate-request-metrics',
'request-url',
'status-code',
'user-agent',
'remote-ip',
'referer',
'cursor-pull',
])
export type TrafficAdapterCapability = z.infer<typeof trafficAdapterCapabilitySchema>
export const TrafficAdapterCapabilities = trafficAdapterCapabilitySchema.enum

export const trafficEvidenceKindSchema = z.enum(['raw-request', 'aggregate-bucket'])
export type TrafficEvidenceKind = z.infer<typeof trafficEvidenceKindSchema>
export const TrafficEvidenceKinds = trafficEvidenceKindSchema.enum

export const trafficEventConfidenceSchema = z.enum(['observed', 'provider-aggregated', 'inferred'])
export type TrafficEventConfidence = z.infer<typeof trafficEventConfidenceSchema>
export const TrafficEventConfidences = trafficEventConfidenceSchema.enum

export const trafficProviderResourceSchema = z.object({
type: z.string().nullable(),
labels: z.record(z.string(), z.string()),
})
export type TrafficProviderResource = z.infer<typeof trafficProviderResourceSchema>

export const normalizedTrafficRequestSchema = z.object({
sourceType: trafficSourceTypeSchema,
evidenceKind: z.literal(TrafficEvidenceKinds['raw-request']),
confidence: z.literal(TrafficEventConfidences.observed),
eventId: z.string().min(1),
observedAt: z.string().min(1),
method: z.string().nullable(),
requestUrl: z.string().nullable(),
host: z.string().nullable(),
path: z.string().min(1),
queryString: z.string().nullable(),
status: z.number().int().nullable(),
userAgent: z.string().nullable(),
remoteIp: z.string().nullable(),
referer: z.string().nullable(),
latencyMs: z.number().nullable(),
requestSizeBytes: z.number().int().nullable(),
responseSizeBytes: z.number().int().nullable(),
providerResource: trafficProviderResourceSchema,
providerLabels: z.record(z.string(), z.string()),
})
export type NormalizedTrafficRequest = z.infer<typeof normalizedTrafficRequestSchema>

export const normalizedTrafficPullPageSchema = z.object({
events: z.array(normalizedTrafficRequestSchema),
rawEntryCount: z.number().int().nonnegative(),
skippedEntryCount: z.number().int().nonnegative(),
nextPageToken: z.string().optional(),
filter: z.string(),
})
export type NormalizedTrafficPullPage = z.infer<typeof normalizedTrafficPullPageSchema>
45 changes: 45 additions & 0 deletions packages/contracts/test/traffic.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { describe, expect, it } from 'vitest'
import {
TrafficEventConfidences,
TrafficEvidenceKinds,
TrafficSourceTypes,
normalizedTrafficRequestSchema,
} from '../src/traffic.js'

describe('traffic contracts', () => {
it('accepts a raw request event from any server-side adapter', () => {
const parsed = normalizedTrafficRequestSchema.parse({
sourceType: TrafficSourceTypes['cloud-run'],
evidenceKind: TrafficEvidenceKinds['raw-request'],
confidence: TrafficEventConfidences.observed,
eventId: 'cloud-run:2026-04-30T12:00:00.000Z:abc123',
observedAt: '2026-04-30T12:00:00.000Z',
method: 'GET',
requestUrl: 'https://example.com/blog/post?utm_source=chatgpt.com',
host: 'example.com',
path: '/blog/post',
queryString: 'utm_source=chatgpt.com',
status: 200,
userAgent: 'GPTBot/1.2',
remoteIp: '203.0.113.10',
referer: 'https://chatgpt.com/',
latencyMs: 123.4,
requestSizeBytes: 456,
responseSizeBytes: 789,
providerResource: {
type: 'cloud_run_revision',
labels: {
project_id: 'sample-project',
service_name: 'web',
location: 'us-central1',
},
},
providerLabels: {},
})

expect(parsed.sourceType).toBe(TrafficSourceTypes['cloud-run'])
expect(parsed.evidenceKind).toBe(TrafficEvidenceKinds['raw-request'])
expect(parsed.confidence).toBe(TrafficEventConfidences.observed)
expect(parsed.path).toBe('/blog/post')
})
})
36 changes: 36 additions & 0 deletions packages/integration-cloud-run/AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# integration-cloud-run

## Purpose

Cloud Run / Cloud Logging integration — pulls request logs for `cloud_run_revision` resources via the Cloud Logging `entries.list` API and normalizes them into provider-neutral `NormalizedTrafficRequest` events for the traffic ingestion pipeline.

## Key Files

| File | Role |
|------|------|
| `src/client.ts` | `listCloudRunTrafficEvents` — paginated `entries.list` pull, page-token cursoring, `CloudRunLoggingApiError` |
| `src/filter.ts` | `buildCloudRunLogFilter` — composes the Cloud Logging query string from service/location/timestamp/url/UA narrowing options |
| `src/normalize.ts` | `normalizeCloudRunLogEntry` — converts a Cloud Logging `LogEntry.httpRequest` into a `NormalizedTrafficRequest` |
| `src/types.ts` | Adapter option/response shapes (`ListCloudRunTrafficEventsOptions`, `CloudRunTrafficEventsPage`, raw `LogEntry` types) |
| `src/index.ts` | Re-exports public API |

## Patterns

- **Bearer-token auth.** The caller supplies an OAuth access token (`logging.logEntries.list`-class scope). This package does not own the token — credentials live in `~/.canonry/config.yaml` and are exchanged by the consumer (CLI/script/server).
- **Pull-only, cursor-paginated.** `listCloudRunTrafficEvents` accepts `pageToken` / `pageSize` / `maxPages` so callers can do incremental syncs. No push, no SaaS relay.
- **Provider-neutral output.** Every adapter in the traffic stack normalizes to the same `NormalizedTrafficRequest` shape from `@ainyc/canonry-contracts`. Do not leak Cloud Logging types past the package boundary.
- **Narrow filters when possible.** `buildCloudRunLogFilter` composes filters incrementally (service, location, time window, request URL substring, user-agent substrings). Narrower filters lower Cloud Logging cost; the `--narrow-bots` mode in the probe script intentionally trades human-AI-referral coverage for crawler-only coverage.

## Common Mistakes

- **Calling `entries.list` without `resourceNames`.** Cloud Logging requires it; the client always passes `projects/<id>`.
- **Storing access tokens in this package.** Tokens are short-lived and supplied per call.
- **Using this client for non-`cloud_run_revision` resources.** The filter and normalizer are scoped to Cloud Run request logs. Other resource types need a separate adapter.

## See Also

- `packages/contracts/src/traffic.ts` — the `NormalizedTrafficRequest` contract this package emits
- `packages/integration-traffic/` — provider-neutral classifier + rollup over normalized events
- `plans/cloud-run-traffic-source-model-review.md` — design rationale for the raw-event vs aggregate-bucket split
- `plans/server-side-ai-traffic-ingestion.md` — overall traffic ingestion plan
- `scripts/test-cloud-run-traffic-pull.ts` — local probe that exercises pull → normalize → analyze
1 change: 1 addition & 0 deletions packages/integration-cloud-run/CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
@AGENTS.md
22 changes: 22 additions & 0 deletions packages/integration-cloud-run/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"name": "@ainyc/canonry-integration-cloud-run",
"version": "0.0.0",
"private": true,
"type": "module",
"license": "FSL-1.1-ALv2",
"exports": {
".": {
"types": "./src/index.ts",
"default": "./src/index.ts"
}
},
"types": "./src/index.ts",
"scripts": {
"typecheck": "tsc --noEmit -p tsconfig.json",
"test": "vitest run",
"lint": "eslint src/ test/"
},
"dependencies": {
"@ainyc/canonry-contracts": "workspace:*"
}
}
130 changes: 130 additions & 0 deletions packages/integration-cloud-run/src/client.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import { buildCloudRunLogFilter } from './filter.js'
import { normalizeCloudRunLogEntry } from './normalize.js'
import type {
CloudRunListLogEntriesResponse,
CloudRunTrafficEventsPage,
ListCloudRunTrafficEventsOptions,
} from './types.js'

const CLOUD_LOGGING_ENTRIES_LIST_URL = 'https://logging.googleapis.com/v2/entries:list'
const DEFAULT_PAGE_SIZE = 1000
const DEFAULT_MAX_PAGES = 1
const DEFAULT_TIMEOUT_MS = 30_000

export class CloudRunLoggingApiError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly body?: string,
) {
super(message)
this.name = 'CloudRunLoggingApiError'
}
}

function validateAccessToken(accessToken: string): void {
if (!accessToken.trim()) {
throw new CloudRunLoggingApiError('Cloud Logging access token is required', 400)
}
}

function validateProjectId(gcpProjectId: string): void {
if (!gcpProjectId.trim()) {
throw new CloudRunLoggingApiError('GCP project ID is required', 400)
}
}

function normalizePageSize(pageSize: number | undefined): number {
if (pageSize === undefined) return DEFAULT_PAGE_SIZE
if (!Number.isInteger(pageSize) || pageSize < 1) {
throw new CloudRunLoggingApiError('pageSize must be a positive integer', 400)
}
return pageSize
}

function normalizeMaxPages(maxPages: number | undefined): number {
if (maxPages === undefined) return DEFAULT_MAX_PAGES
if (!Number.isInteger(maxPages) || maxPages < 1) {
throw new CloudRunLoggingApiError('maxPages must be a positive integer', 400)
}
return maxPages
}

async function readErrorBody(response: Response): Promise<string | undefined> {
const text = await response.text().catch(() => '')
if (!text) return undefined
return text.length <= 500 ? text : `${text.slice(0, 500)}... [truncated]`
}

export async function listCloudRunTrafficEvents(
accessToken: string,
options: ListCloudRunTrafficEventsOptions,
): Promise<CloudRunTrafficEventsPage> {
validateAccessToken(accessToken)
validateProjectId(options.gcpProjectId)

const filter = buildCloudRunLogFilter(options)
const pageSize = normalizePageSize(options.pageSize)
const maxPages = normalizeMaxPages(options.maxPages)
const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS

let pageToken = options.pageToken
let rawEntryCount = 0
let skippedEntryCount = 0
const events: CloudRunTrafficEventsPage['events'] = []

for (let page = 0; page < maxPages; page += 1) {
const requestBody: Record<string, unknown> = {
resourceNames: [`projects/${options.gcpProjectId}`],
filter,
orderBy: options.orderBy ?? 'timestamp asc',
pageSize,
}
if (pageToken) {
requestBody.pageToken = pageToken
}

const response = await fetch(CLOUD_LOGGING_ENTRIES_LIST_URL, {
method: 'POST',
headers: {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
signal: AbortSignal.timeout(timeoutMs),
})

if (!response.ok) {
const body = await readErrorBody(response)
throw new CloudRunLoggingApiError(
`Cloud Logging entries.list failed with HTTP ${response.status}`,
response.status,
body,
)
}

const body = (await response.json()) as CloudRunListLogEntriesResponse
const entries = body.entries ?? []
rawEntryCount += entries.length

for (const entry of entries) {
const event = normalizeCloudRunLogEntry(entry)
if (event) {
events.push(event)
} else {
skippedEntryCount += 1
}
}

pageToken = body.nextPageToken
if (!pageToken) break
}

return {
events,
rawEntryCount,
skippedEntryCount,
nextPageToken: pageToken,
filter,
}
}
Loading
Loading