Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "canonry",
"private": true,
"version": "4.11.0",
"version": "4.11.1",
"type": "module",
"packageManager": "pnpm@10.28.2",
"scripts": {
Expand Down
43 changes: 39 additions & 4 deletions packages/api-routes/src/ga.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,26 @@ function formatSharePct(numerator: number, total: number): string {
return `${rounded}%`
}

// For each tuple key, keep the row with the highest sessions and discard the
// others. GA4 returns one row per attribution dimension (session, first_user,
// manual_utm), but those dimensions are overlapping lenses on the same visit
// — summing them across dimensions would double-count. Result is sorted by
// sessions descending.
function pickWinningDimension<T extends { sessions: number | null }>(
rows: T[],
tupleKey: (row: T) => string,
): T[] {
const winners = new Map<string, T>()
for (const row of rows) {
const key = tupleKey(row)
const existing = winners.get(key)
if (!existing || (row.sessions ?? 0) > (existing.sessions ?? 0)) {
winners.set(key, row)
}
}
return [...winners.values()].sort((a, b) => (b.sessions ?? 0) - (a.sessions ?? 0))
}

export interface Ga4CredentialRecord {
projectName: string
propertyId: string
Expand Down Expand Up @@ -707,7 +727,7 @@ export async function ga4Routes(app: FastifyInstance, opts: GA4RoutesOptions) {
.limit(limit)
.all()

const aiReferrals = app.db
const aiReferralRows = app.db
.select({
source: gaAiReferrals.source,
medium: gaAiReferrals.medium,
Expand All @@ -718,10 +738,9 @@ export async function ga4Routes(app: FastifyInstance, opts: GA4RoutesOptions) {
.from(gaAiReferrals)
.where(and(...aiConditions))
.groupBy(gaAiReferrals.source, gaAiReferrals.medium, gaAiReferrals.sourceDimension)
.orderBy(sql`SUM(${gaAiReferrals.sessions}) DESC`)
.all()

const aiReferralLandingPages = app.db
const aiReferralLandingPageRows = app.db
.select({
source: gaAiReferrals.source,
medium: gaAiReferrals.medium,
Expand All @@ -738,9 +757,25 @@ export async function ga4Routes(app: FastifyInstance, opts: GA4RoutesOptions) {
gaAiReferrals.sourceDimension,
sql`COALESCE(${gaAiReferrals.landingPageNormalized}, ${gaAiReferrals.landingPage})`,
)
.orderBy(sql`SUM(${gaAiReferrals.sessions}) DESC`)
.all()

// Dedupe across attribution dimensions: 'session', 'first_user', and
// 'manual_utm' are overlapping lenses on the same visit, not disjoint
// events. Returning all three would inflate the row count (e.g. 1 source
// showing as 6 rows). Keep the winning dimension — the one with the
// highest session count — per (source, medium) for `aiReferrals` and per
// (source, medium, landingPage) for `aiReferralLandingPages`. The cross-
// cutting / session-only totals (`aiSessionsDeduped`, `aiSessionsBySession`)
// are computed independently below and are unaffected.
const aiReferrals = pickWinningDimension(
aiReferralRows,
(r) => `${r.source}${r.medium}`,
)
const aiReferralLandingPages = pickWinningDimension(
aiReferralLandingPageRows,
(r) => `${r.source}${r.medium}${r.landingPage}`,
)

// Deduplicated AI totals: sessionSource, firstUserSource, and manualSource are
// overlapping attribution lenses, not disjoint visits. To avoid double-counting,
// first sum landing pages within each dimension, then take MAX(sessions) per
Expand Down
152 changes: 152 additions & 0 deletions packages/api-routes/test/ga.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1210,6 +1210,158 @@ describe('GA4 routes', () => {
}
})

it('GET /ga/traffic dedupes aiReferrals + aiReferralLandingPages to the winning attribution dimension', async () => {
// Regression for the inflated-row-count bug: GA4 emits one row per
// attribution dimension (session, first_user, manual_utm) and our SQL
// groups by all three, so the pre-fix response had three rows for what
// the user perceives as a single source. The fix collapses to the
// winning dimension per (source, medium) and per (source, medium,
// landingPage) so the table reflects reality.
const now = new Date().toISOString()
credentials.set('test-project', {
projectName: 'test-project',
propertyId: '999888',
clientEmail: 'sa@test.iam.gserviceaccount.com',
privateKey: 'fake-key',
createdAt: now,
updatedAt: now,
})

// Seed three dimensions for the same (source, medium, landingPage)
// tuple. session=4 wins for chatgpt.com (>2 first_user, 1 manual_utm).
const idChatgptSession = crypto.randomUUID()
const idChatgptFirst = crypto.randomUUID()
const idChatgptUtm = crypto.randomUUID()
// Plus a separate (source, medium) where first_user beats session.
const idClaudeSession = crypto.randomUUID()
const idClaudeFirst = crypto.randomUUID()
const seededIds = [idChatgptSession, idChatgptFirst, idChatgptUtm, idClaudeSession, idClaudeFirst]
const dedupDate = '2025-11-15'
db.insert(gaAiReferrals).values([
{
id: idChatgptSession,
projectId,
date: dedupDate,
source: 'dedup-chatgpt.com',
medium: 'referral',
sourceDimension: 'session',
landingPage: '/dedup-page',
sessions: 4,
users: 3,
syncedAt: now,
},
{
id: idChatgptFirst,
projectId,
date: dedupDate,
source: 'dedup-chatgpt.com',
medium: 'referral',
sourceDimension: 'first_user',
landingPage: '/dedup-page',
sessions: 2,
users: 2,
syncedAt: now,
},
{
id: idChatgptUtm,
projectId,
date: dedupDate,
source: 'dedup-chatgpt.com',
medium: 'referral',
sourceDimension: 'manual_utm',
landingPage: '/dedup-page',
sessions: 1,
users: 1,
syncedAt: now,
},
{
id: idClaudeSession,
projectId,
date: dedupDate,
source: 'dedup-claude.ai',
medium: 'referral',
sourceDimension: 'session',
landingPage: '/dedup-other',
sessions: 3,
users: 2,
syncedAt: now,
},
{
id: idClaudeFirst,
projectId,
date: dedupDate,
source: 'dedup-claude.ai',
medium: 'referral',
sourceDimension: 'first_user',
landingPage: '/dedup-other',
sessions: 9,
users: 7,
syncedAt: now,
},
]).run()

try {
const res = await app.inject({
method: 'GET',
url: '/api/v1/projects/test-project/ga/traffic',
})
expect(res.statusCode).toBe(200)
const body = JSON.parse(res.payload)

// Three rows seeded for chatgpt.com → exactly one row in the API
// response, surfacing the winning dimension.
const chatgptRows = body.aiReferrals.filter((r: { source: string }) => r.source === 'dedup-chatgpt.com')
expect(chatgptRows).toHaveLength(1)
expect(chatgptRows[0]).toMatchObject({
source: 'dedup-chatgpt.com',
medium: 'referral',
sourceDimension: 'session',
sessions: 4,
})

// Two rows seeded for claude.ai → one winning-dimension row.
const claudeRows = body.aiReferrals.filter((r: { source: string }) => r.source === 'dedup-claude.ai')
expect(claudeRows).toHaveLength(1)
expect(claudeRows[0]).toMatchObject({
source: 'dedup-claude.ai',
medium: 'referral',
sourceDimension: 'first_user',
sessions: 9,
})

// Landing-page table dedupes per (source, medium, landingPage).
const chatgptLanding = body.aiReferralLandingPages.filter(
(r: { source: string; landingPage: string }) =>
r.source === 'dedup-chatgpt.com' && r.landingPage === '/dedup-page',
)
expect(chatgptLanding).toHaveLength(1)
expect(chatgptLanding[0]).toMatchObject({
source: 'dedup-chatgpt.com',
sourceDimension: 'session',
sessions: 4,
})
const claudeLanding = body.aiReferralLandingPages.filter(
(r: { source: string; landingPage: string }) =>
r.source === 'dedup-claude.ai' && r.landingPage === '/dedup-other',
)
expect(claudeLanding).toHaveLength(1)
expect(claudeLanding[0]).toMatchObject({
source: 'dedup-claude.ai',
sourceDimension: 'first_user',
sessions: 9,
})

// Output is sorted by sessions descending — the claude.ai winner (9)
// ranks above the chatgpt.com winner (4).
const dedupRows = body.aiReferrals.filter((r: { source: string }) => r.source.startsWith('dedup-'))
expect(dedupRows[0].source).toBe('dedup-claude.ai')
expect(dedupRows[1].source).toBe('dedup-chatgpt.com')
} finally {
db.delete(gaAiReferrals).where(inArray(gaAiReferrals.id, seededIds)).run()
credentials.delete('test-project')
}
})

it('GET /ga/attribution-trend ai channel uses sessionSource only (matches breakdown cell)', async () => {
const now = new Date().toISOString()
const daysAgo = (n: number): string => {
Expand Down
2 changes: 1 addition & 1 deletion packages/canonry/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@ainyc/canonry",
"version": "4.11.0",
"version": "4.11.1",
"type": "module",
"description": "Agent-first open-source AEO operating platform - track how answer engines cite your domain",
"license": "FSL-1.1-ALv2",
Expand Down
13 changes: 13 additions & 0 deletions packages/contracts/src/ga.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,24 @@ export const ga4AiReferralDtoSchema = z.object({
medium: z.string(),
sessions: z.number(),
users: z.number(),
/**
* The winning attribution dimension for this (source, medium) tuple — the
* one with the highest session count. GA4 emits one row per dimension
* (session, first_user, manual_utm), but they're overlapping lenses on the
* same visit; only the dominant dimension is surfaced here so the table is
* not inflated.
*/
sourceDimension: ga4SourceDimensionSchema,
})
export type GA4AiReferralDto = z.infer<typeof ga4AiReferralDtoSchema>

export const ga4AiReferralLandingPageDtoSchema = z.object({
source: z.string(),
medium: z.string(),
/**
* The winning attribution dimension for this (source, medium, landingPage)
* tuple — the one with the highest session count.
*/
sourceDimension: ga4SourceDimensionSchema,
landingPage: z.string(),
sessions: z.number(),
Expand Down Expand Up @@ -179,7 +190,9 @@ export interface GaTrafficResponse {
totalDirectSessions: number
totalUsers: number
topPages: Array<{ landingPage: string; sessions: number; organicSessions: number; directSessions: number; users: number }>
/** Deduped to the winning attribution dimension (highest sessions) per (source, medium). */
aiReferrals: Array<{ source: string; medium: string; sessions: number; users: number; sourceDimension: GA4SourceDimension }>
/** Deduped to the winning attribution dimension (highest sessions) per (source, medium, landingPage). */
aiReferralLandingPages: Array<{ source: string; medium: string; sourceDimension: GA4SourceDimension; landingPage: string; sessions: number; users: number }>
/** Deduped AI session total: MAX(sessions) per date+source+medium across attribution dimensions, then summed. Cross-cutting: can overlap with Direct/Organic/Social via firstUserSource. */
aiSessionsDeduped: number
Expand Down
Loading