Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions action/src/capture/iteration-capture.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/**
* Tests for captureIteration
*
* Verifies that re-running captureIteration on the same head_sha does not
* produce duplicate phantom iterations (issue #486).
*/

import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { IterationDatabase } from '../db/database';
import { captureIteration } from './iteration-capture';

// Mock logger
vi.mock('../utils/logger', () => ({
logger: {
warn: vi.fn(),
info: vi.fn(),
debug: vi.fn(),
error: vi.fn(),
},
}));

// Mock @actions/github so github.context.actor is defined
vi.mock('@actions/github', () => ({
context: {
actor: 'test-actor',
repo: { owner: 'octo', repo: 'repo' },
payload: {},
},
getOctokit: vi.fn(),
}));

// Mock file-fetcher to avoid network
vi.mock('./file-fetcher', () => ({
fetchFileContent: vi.fn(async () => 'file contents'),
}));

function makeOctokit() {
const listFiles = vi.fn();
// paginate just calls the method with the params and returns a fixed list
const paginate = vi.fn(async () => [
{
filename: 'foo.txt',
status: 'modified',
sha: 'file-sha-1',
},
]);
return {
paginate,
rest: {
pulls: { listFiles },
},
} as unknown as Parameters<typeof captureIteration>[1]['octokit'];
}

describe('captureIteration dedup on same head_sha (issue #486)', () => {
let db: IterationDatabase;
let dbPath: string;
Comment on lines +58 to +60
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test file comment/describe mentions issue #486, which doesn’t match the PR metadata (Closes #136486). Please make the issue reference consistent so the test history links back to the correct tracker item.

Copilot uses AI. Check for mistakes.

beforeEach(() => {
const tempDir = os.tmpdir();
dbPath = path.join(tempDir, `codjiflo-dedup-test-${Date.now()}-${Math.random()}.db`);
db = new IterationDatabase(dbPath);
});

afterEach(() => {
db.close();
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
const walPath = dbPath + '-wal';
const shmPath = dbPath + '-shm';
if (fs.existsSync(walPath)) fs.unlinkSync(walPath);
if (fs.existsSync(shmPath)) fs.unlinkSync(shmPath);
});

it('does not create duplicate iteration rows when run twice with same head_sha', async () => {
const ctx = {
octokit: makeOctokit(),
owner: 'octo',
repo: 'repo',
prNumber: 1,
headSha: 'deadbeef',
baseSha: 'basefeed',
beforeSha: null,
};

const firstId = await captureIteration(db, ctx);
const firstSnapshotCount = (db as unknown as {
db: { prepare: (sql: string) => { get: () => { count: number } } };
}).db.prepare('SELECT COUNT(*) as count FROM artifact_snapshots').get().count;

const secondId = await captureIteration(db, ctx);

// Only one iteration row should exist
expect(db.getIterationCount()).toBe(1);
// Second call should short-circuit and return the same iteration id
expect(secondId).toBe(firstId);

// No new snapshots should have been written by the second call
const secondSnapshotCount = (db as unknown as {
db: { prepare: (sql: string) => { get: () => { count: number } } };
}).db.prepare('SELECT COUNT(*) as count FROM artifact_snapshots').get().count;
expect(secondSnapshotCount).toBe(firstSnapshotCount);
Comment on lines +89 to +104
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test reaches into IterationDatabase's private db instance via as unknown as { db: ... } to count artifact_snapshots rows. This is brittle (breaks if the implementation changes) and bypasses type-safety. Prefer adding a small, typed helper on IterationDatabase (e.g., getArtifactSnapshotRowCount() / getTableRowCount('artifact_snapshots')) or asserting behavior via existing public APIs.

Suggested change
const firstSnapshotCount = (db as unknown as {
db: { prepare: (sql: string) => { get: () => { count: number } } };
}).db.prepare('SELECT COUNT(*) as count FROM artifact_snapshots').get().count;
const secondId = await captureIteration(db, ctx);
// Only one iteration row should exist
expect(db.getIterationCount()).toBe(1);
// Second call should short-circuit and return the same iteration id
expect(secondId).toBe(firstId);
// No new snapshots should have been written by the second call
const secondSnapshotCount = (db as unknown as {
db: { prepare: (sql: string) => { get: () => { count: number } } };
}).db.prepare('SELECT COUNT(*) as count FROM artifact_snapshots').get().count;
expect(secondSnapshotCount).toBe(firstSnapshotCount);
const secondId = await captureIteration(db, ctx);
// Only one iteration row should exist
expect(db.getIterationCount()).toBe(1);
// Second call should short-circuit and return the same iteration id
expect(secondId).toBe(firstId);

Copilot uses AI. Check for mistakes.
});
});
16 changes: 16 additions & 0 deletions action/src/capture/iteration-capture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import * as github from '@actions/github';
import type { IterationDatabase } from '../db/database';
import { fetchFileContent } from './file-fetcher';
import { logger } from '../utils/logger';

// ============================================================================
// Types
Expand Down Expand Up @@ -40,6 +41,21 @@ export async function captureIteration(
db: IterationDatabase,
ctx: CaptureContext
): Promise<number> {
// Dedup: if an iteration with this head_sha already exists, short-circuit.
// This prevents phantom duplicate iterations when the workflow is re-run
// on the same commit (e.g. "Re-run all jobs" or workflow_dispatch with the
// same PR_NUMBER). See issue #486.
const existing = db.getIterationByHeadSha(ctx.headSha);
Comment on lines +44 to +48
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The inline references to the linked issue number look inconsistent with the PR metadata (Closes #136486 in the PR description, but the code comment says See issue #486). Please align the issue reference(s) to avoid confusing future readers/searches.

Copilot uses AI. Check for mistakes.
if (existing) {
logger.info({
event: 'iteration_already_captured',
'iteration.id': existing.id,
'iteration.revision': existing.revision,
'iteration.head_sha': existing.head_sha,
}, 'Iteration for this head_sha already exists; skipping capture');
return existing.id;
Comment on lines +44 to +56
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new early-return changes captureIteration semantics: it may return an existing iteration id that is not the latest revision in the DB. The main action currently uses db.getLatestIteration() after calling captureIteration (see action/src/index.ts:118), which means on a workflow re-run of an older commit (or if duplicates already exist) span tracker computation/outputs can target the wrong iteration. Consider returning the IterationRow (or { iteration, didCreate }) from captureIteration and have callers use that specific iteration for snapshot indices and outputs; also consider skipping SpanTracker recomputation entirely when didCreate is false.

Suggested change
// Dedup: if an iteration with this head_sha already exists, short-circuit.
// This prevents phantom duplicate iterations when the workflow is re-run
// on the same commit (e.g. "Re-run all jobs" or workflow_dispatch with the
// same PR_NUMBER). See issue #486.
const existing = db.getIterationByHeadSha(ctx.headSha);
if (existing) {
logger.info({
event: 'iteration_already_captured',
'iteration.id': existing.id,
'iteration.revision': existing.revision,
'iteration.head_sha': existing.head_sha,
}, 'Iteration for this head_sha already exists; skipping capture');
return existing.id;
// Dedup only when the matching head_sha is already the latest iteration.
// Re-runs of older commits must still capture a new latest revision so
// downstream code that reads "latest iteration" stays aligned with this run.
const existing = db.getIterationByHeadSha(ctx.headSha);
if (existing) {
const latest = db.getLatestIteration();
if (latest && latest.id === existing.id) {
logger.info({
event: 'iteration_already_captured',
'iteration.id': existing.id,
'iteration.revision': existing.revision,
'iteration.head_sha': existing.head_sha,
}, 'Iteration for this head_sha already exists as the latest revision; skipping capture');
return existing.id;
}
logger.info({
event: 'iteration_recapture_required',
'iteration.id': existing.id,
'iteration.revision': existing.revision,
'iteration.head_sha': existing.head_sha,
'latest_iteration.id': latest?.id,
'latest_iteration.revision': latest?.revision,
'latest_iteration.head_sha': latest?.head_sha,
}, 'Iteration for this head_sha already exists but is not the latest revision; capturing a new iteration');

Copilot uses AI. Check for mistakes.
}

const iterationCount = db.getIterationCount();
const revision = iterationCount + 1;

Expand Down
6 changes: 6 additions & 0 deletions action/src/db/database.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ export class IterationDatabase {
`).get();
}

getIterationByHeadSha(headSha: string): IterationRow | undefined {
return this.db.prepare<[string], IterationRow>(`
SELECT * FROM iterations WHERE head_sha = ? LIMIT 1
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getIterationByHeadSha uses LIMIT 1 without an ORDER BY, so if the DB already contains multiple rows for the same head_sha (the exact scenario this PR is addressing), the row returned is nondeterministic. This can cause re-runs to return a different iteration id/revision than expected. Consider ordering explicitly (e.g., by revision ASC to prefer the original capture, or revision DESC to prefer the latest) to make behavior deterministic.

Suggested change
SELECT * FROM iterations WHERE head_sha = ? LIMIT 1
SELECT * FROM iterations
WHERE head_sha = ?
ORDER BY revision ASC
LIMIT 1

Copilot uses AI. Check for mistakes.
`).get(headSha);
}

getIterationCount(): number {
const row = this.db.prepare<[], { count: number }>(`
SELECT COUNT(*) as count FROM iterations
Expand Down
Loading