From a24c8bf3d10aa0e40fdac3f6af9935998cf9cb7f Mon Sep 17 00:00:00 2001 From: Joseph Mearman Date: Wed, 30 Jul 2025 16:04:51 +0100 Subject: [PATCH] feat: add git integration for incremental validation (#36) Adds comprehensive git integration features for faster validation: - GitUtils class for git operations and repository management - ValidationCache system with content-based invalidation and TTL - Enhanced validate command with git-aware options - Support for --git-diff, --git-staged, --cache, --fail-fast modes - CLI integration with new performance options - Comprehensive test coverage for git operations and caching Enables efficient incremental validation by only checking changed files and caching validation results for faster subsequent runs. --- src/cli.ts | 22 +- src/commands/validate-git.test.ts | 452 ++++++++++++++++++++++++ src/commands/validate.ts | 269 +++++++++++++-- src/utils/git-utils.test.ts | 386 +++++++++++++++++++++ src/utils/git-utils.ts | 384 +++++++++++++++++++++ src/utils/validation-cache.test.ts | 331 ++++++++++++++++++ src/utils/validation-cache.ts | 533 +++++++++++++++++++++++++++++ 7 files changed, 2348 insertions(+), 29 deletions(-) create mode 100644 src/commands/validate-git.test.ts create mode 100644 src/utils/git-utils.test.ts create mode 100644 src/utils/git-utils.ts create mode 100644 src/utils/validation-cache.test.ts create mode 100644 src/utils/validation-cache.ts diff --git a/src/cli.ts b/src/cli.ts index f8b9b08..11cdb9d 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -281,6 +281,12 @@ program .option('--only-broken', 'Show only broken links, not all validation results', true) .option('--group-by ', 'Group results by: file|type', 'file') .option('--include-context', 'Include line numbers and context in output', false) + .option('--git-diff ', 'Only validate files changed since the specified git reference') + .option('--git-staged', 'Only validate files currently staged in git') + .option('--cache', 'Enable validation result caching for faster subsequent runs') + .option('--cache-dir ', 'Cache directory path', '.markmv-cache') + .option('--fail-fast', 'Exit immediately on first broken link found') + .option('--include-dependencies', 'Include files that depend on changed files', true) .option('-v, --verbose', 'Show detailed output with processing information') .option('--json', 'Output results in JSON format') .addHelpText( @@ -288,13 +294,25 @@ program ` Examples: $ markmv validate # Validate current directory - $ markmv validate . # Validate current directory - $ markmv validate ./ # Validate current directory $ markmv validate docs/**/*.md --check-external --verbose $ markmv validate README.md --link-types internal,image --include-context $ markmv validate **/*.md --group-by type --only-broken $ markmv validate docs/ --check-circular --strict-internal +Git Integration Examples: + $ markmv validate --git-diff HEAD~1 # Only files changed since last commit + $ markmv validate --git-diff main..HEAD # Files changed in current branch vs main + $ markmv validate --git-staged # Only staged files + $ markmv validate --git-diff HEAD~1 --cache # Use caching for faster validation + $ markmv validate --git-staged --fail-fast --cache # Fast pre-commit validation + +Performance Options: + --cache Enable result caching for faster subsequent runs + --cache-dir Custom cache directory (default: .markmv-cache) + --fail-fast Exit on first broken link (faster feedback) + --git-diff Only validate files changed since git reference + --git-staged Only validate currently staged files + Link Types: internal Links to other markdown files external HTTP/HTTPS URLs diff --git a/src/commands/validate-git.test.ts b/src/commands/validate-git.test.ts new file mode 100644 index 0000000..7a507cf --- /dev/null +++ b/src/commands/validate-git.test.ts @@ -0,0 +1,452 @@ +/** + * Tests for git integration in validate command. + * + * @fileoverview Tests for git-aware validation features including caching and incremental validation + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { validateLinks } from './validate.js'; + +// Mock git utils and validation cache +vi.mock('../utils/git-utils.js', () => { + const MockGitUtils = vi.fn().mockImplementation(() => ({ + isGitRepository: vi.fn().mockReturnValue(true), + getRepositoryRoot: vi.fn().mockReturnValue('/test/repo'), + getStatus: vi.fn().mockReturnValue({ + branch: 'main', + commit: 'abc123', + isDirty: false, + rootDir: '/test/repo', + }), + getCurrentCommit: vi.fn().mockReturnValue('abc123'), + refExists: vi.fn().mockReturnValue(true), + getChangedFiles: vi.fn().mockReturnValue([ + { path: '/test/repo/changed.md', status: 'modified' }, + { path: '/test/repo/new.md', status: 'added' }, + ]), + getStagedFiles: vi.fn().mockReturnValue([ + { path: '/test/repo/staged.md', status: 'modified' }, + ]), + })); + + return { + GitUtils: MockGitUtils, + }; +}); + +vi.mock('../utils/validation-cache.js', () => { + const mockCache = { + isEnabled: vi.fn().mockResolvedValue(true), + get: vi.fn().mockResolvedValue(undefined), + set: vi.fn().mockResolvedValue(undefined), + getMetadata: vi.fn().mockResolvedValue({ + totalFiles: 5, + totalLinks: 25, + hitRate: 80, + sizeBytes: 1024, + lastCleanup: 0, + version: '1.0.0', + }), + }; + + const MockValidationCache = vi.fn().mockImplementation(() => mockCache); + + return { + ValidationCache: MockValidationCache, + calculateFileHash: vi.fn().mockResolvedValue('test-hash'), + calculateConfigHash: vi.fn().mockReturnValue('config-hash'), + }; +}); + +// Mock link validator and parser +vi.mock('../core/link-validator.js', () => ({ + LinkValidator: vi.fn().mockImplementation(() => ({ + validateLinks: vi.fn().mockResolvedValue({ + brokenLinks: [], + }), + checkCircularReferences: vi.fn().mockResolvedValue({ + hasCircularReferences: false, + }), + })), +})); + +vi.mock('../core/link-parser.js', () => ({ + LinkParser: vi.fn().mockImplementation(() => ({ + parseFile: vi.fn().mockResolvedValue({ + links: [ + { type: 'internal', href: 'other.md', line: 1 }, + { type: 'external', href: 'https://example.com', line: 2 }, + ], + }), + })), +})); + +// Mock file system +vi.mock('glob', () => ({ + glob: vi.fn().mockResolvedValue(['/test/file1.md', '/test/file2.md']), +})); + +describe('Git Integration in Validate Command', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'validate-git-test-')); + vi.clearAllMocks(); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + describe('Git Diff Mode', () => { + it('should validate only changed files when using git diff', async () => { + const result = await validateLinks(['**/*.md'], { + gitDiff: 'HEAD~1', + verbose: true, + }); + + expect(result.filesProcessed).toBe(2); // changed.md and new.md + expect(result.gitInfo?.enabled).toBe(true); + expect(result.gitInfo?.baseRef).toBe('HEAD~1'); + expect(result.gitInfo?.changedFiles).toBe(2); + expect(result.gitInfo?.currentCommit).toBe('abc123'); + }); + + it('should handle git diff with specific range', async () => { + const result = await validateLinks(['**/*.md'], { + gitDiff: 'main..feature', + verbose: true, + }); + + expect(result.gitInfo?.enabled).toBe(true); + expect(result.gitInfo?.baseRef).toBe('main..feature'); + }); + + it('should throw error for non-existent git reference', async () => { + const { GitUtils } = await import('../utils/git-utils.js'); + const mockGitUtils = vi.mocked(GitUtils); + mockGitUtils.mockImplementation(() => ({ + isGitRepository: vi.fn().mockReturnValue(true), + refExists: vi.fn().mockReturnValue(false), + } as any)); + + await expect(validateLinks(['**/*.md'], { + gitDiff: 'nonexistent-ref', + })).rejects.toThrow("Git reference 'nonexistent-ref' does not exist"); + }); + + it('should filter out deleted files from git diff', async () => { + const { GitUtils } = await import('../utils/git-utils.js'); + const mockGitUtils = vi.mocked(GitUtils); + mockGitUtils.mockImplementation(() => ({ + isGitRepository: vi.fn().mockReturnValue(true), + getRepositoryRoot: vi.fn().mockReturnValue('/test/repo'), + getStatus: vi.fn().mockReturnValue({ + branch: 'main', + commit: 'abc123', + isDirty: false, + rootDir: '/test/repo', + }), + refExists: vi.fn().mockReturnValue(true), + getChangedFiles: vi.fn().mockReturnValue([ + { path: '/test/repo/modified.md', status: 'modified' }, + { path: '/test/repo/deleted.md', status: 'deleted' }, + { path: '/test/repo/added.md', status: 'added' }, + ]), + } as any)); + + const result = await validateLinks(['**/*.md'], { + gitDiff: 'HEAD~1', + }); + + // Should only process modified and added files, not deleted + expect(result.filesProcessed).toBe(2); + }); + }); + + describe('Git Staged Mode', () => { + it('should validate only staged files', async () => { + const result = await validateLinks(['**/*.md'], { + gitStaged: true, + verbose: true, + }); + + expect(result.filesProcessed).toBe(1); // staged.md + expect(result.gitInfo?.enabled).toBe(true); + expect(result.gitInfo?.changedFiles).toBe(1); + expect(result.gitInfo?.baseRef).toBeUndefined(); + }); + + it('should handle empty staged files', async () => { + const { GitUtils } = await import('../utils/git-utils.js'); + const mockGitUtils = vi.mocked(GitUtils); + mockGitUtils.mockImplementation(() => ({ + isGitRepository: vi.fn().mockReturnValue(true), + getRepositoryRoot: vi.fn().mockReturnValue('/test/repo'), + getStatus: vi.fn().mockReturnValue({ + branch: 'main', + commit: 'abc123', + isDirty: false, + rootDir: '/test/repo', + }), + getStagedFiles: vi.fn().mockReturnValue([]), + } as any)); + + const result = await validateLinks(['**/*.md'], { + gitStaged: true, + }); + + expect(result.filesProcessed).toBe(0); + expect(result.gitInfo?.changedFiles).toBe(0); + }); + }); + + describe('Cache Integration', () => { + it('should use cache when enabled', async () => { + const { ValidationCache } = await import('../utils/validation-cache.js'); + const mockCacheInstance = { + isEnabled: vi.fn().mockResolvedValue(true), + get: vi.fn().mockResolvedValue({ + filePath: '/test/cached.md', + contentHash: 'cached-hash', + result: { + brokenLinks: [], + totalLinks: 3, + }, + }), + set: vi.fn(), + }; + vi.mocked(ValidationCache).mockImplementation(() => mockCacheInstance as any); + + const result = await validateLinks(['/test/cached.md'], { + cache: true, + verbose: true, + }); + + expect(mockCacheInstance.get).toHaveBeenCalled(); + expect(result.gitInfo?.cachedFiles).toBeGreaterThan(0); + }); + + it('should store results in cache when cache misses', async () => { + const { ValidationCache } = await import('../utils/validation-cache.js'); + const mockCacheInstance = { + isEnabled: vi.fn().mockResolvedValue(true), + get: vi.fn().mockResolvedValue(undefined), // Cache miss + set: vi.fn(), + }; + vi.mocked(ValidationCache).mockImplementation(() => mockCacheInstance as any); + + await validateLinks(['/test/new.md'], { + cache: true, + }); + + expect(mockCacheInstance.set).toHaveBeenCalled(); + }); + + it('should disable cache gracefully when not accessible', async () => { + const { ValidationCache } = await import('../utils/validation-cache.js'); + const mockCacheInstance = { + isEnabled: vi.fn().mockResolvedValue(false), + }; + vi.mocked(ValidationCache).mockImplementation(() => mockCacheInstance as any); + + // Should not throw error + const result = await validateLinks(['/test/file.md'], { + cache: true, + verbose: true, + }); + + expect(result.gitInfo?.cachedFiles).toBe(0); + }); + + it('should calculate cache hit rate correctly', async () => { + const { ValidationCache } = await import('../utils/validation-cache.js'); + let callCount = 0; + const mockCacheInstance = { + isEnabled: vi.fn().mockResolvedValue(true), + get: vi.fn().mockImplementation(() => { + callCount++; + // Return cached result for first call, miss for second + if (callCount === 1) { + return Promise.resolve({ + result: { brokenLinks: [], totalLinks: 2 } + }); + } + return Promise.resolve(undefined); + }), + set: vi.fn(), + }; + vi.mocked(ValidationCache).mockImplementation(() => mockCacheInstance as any); + + const result = await validateLinks(['/test/file1.md', '/test/file2.md'], { + cache: true, + gitDiff: 'HEAD~1', + }); + + expect(result.gitInfo?.cachedFiles).toBe(1); + expect(result.gitInfo?.cacheHitRate).toBe(50); // 1 hit out of 2 files + }); + }); + + describe('Fail-Fast Mode', () => { + it('should exit early when fail-fast is enabled and broken link found', async () => { + const { LinkValidator } = await import('../core/link-validator.js'); + vi.mocked(LinkValidator).mockImplementation(() => ({ + validateLinks: vi.fn().mockResolvedValue({ + brokenLinks: [ + { + link: { href: 'broken.md', type: 'internal', line: 1 }, + reason: 'File not found', + }, + ], + }), + checkCircularReferences: vi.fn().mockResolvedValue({ + hasCircularReferences: false, + }), + } as any)); + + const result = await validateLinks(['/test/file1.md', '/test/file2.md'], { + failFast: true, + gitDiff: 'HEAD~1', + }); + + // Should stop processing after first broken link + expect(result.brokenLinks).toBeGreaterThan(0); + // May not process all files due to fail-fast + expect(result.filesProcessed).toBeLessThanOrEqual(2); + }); + }); + + describe('Error Handling', () => { + it('should handle git repository detection failure', async () => { + const { GitUtils } = await import('../utils/git-utils.js'); + vi.mocked(GitUtils).mockImplementation(() => ({ + isGitRepository: vi.fn().mockReturnValue(false), + } as any)); + + await expect(validateLinks(['**/*.md'], { + gitDiff: 'HEAD~1', + })).rejects.toThrow('Git integration requires a git repository'); + }); + + it('should gracefully disable git integration when not in repository but cache enabled', async () => { + const { GitUtils } = await import('../utils/git-utils.js'); + vi.mocked(GitUtils).mockImplementation(() => ({ + isGitRepository: vi.fn().mockReturnValue(false), + } as any)); + + // Should not throw, just disable git integration + const result = await validateLinks(['**/*.md'], { + cache: true, + verbose: true, + }); + + expect(result.gitInfo?.enabled).toBeFalsy(); + }); + + it('should handle git command failures gracefully', async () => { + const { GitUtils } = await import('../utils/git-utils.js'); + vi.mocked(GitUtils).mockImplementation(() => ({ + isGitRepository: vi.fn().mockReturnValue(true), + refExists: vi.fn().mockReturnValue(true), + getChangedFiles: vi.fn().mockImplementation(() => { + throw new Error('Git command failed'); + }), + } as any)); + + await expect(validateLinks(['**/*.md'], { + gitDiff: 'HEAD~1', + })).rejects.toThrow('Git command failed'); + }); + + it('should handle cache errors gracefully', async () => { + const { ValidationCache } = await import('../utils/validation-cache.js'); + const mockCacheInstance = { + isEnabled: vi.fn().mockResolvedValue(true), + get: vi.fn().mockRejectedValue(new Error('Cache read error')), + set: vi.fn().mockRejectedValue(new Error('Cache write error')), + }; + vi.mocked(ValidationCache).mockImplementation(() => mockCacheInstance as any); + + // Should not throw, just continue without cache + const result = await validateLinks(['/test/file.md'], { + cache: true, + }); + + expect(result.filesProcessed).toBe(1); + }); + }); + + describe('Integration with Standard Validation', () => { + it('should fall back to standard glob patterns when no git options', async () => { + const result = await validateLinks(['**/*.md'], { + verbose: true, + }); + + expect(result.filesProcessed).toBeGreaterThan(0); + expect(result.gitInfo?.enabled).toBeFalsy(); + }); + + it('should combine git integration with other validation options', async () => { + const result = await validateLinks(['**/*.md'], { + gitDiff: 'HEAD~1', + checkExternal: true, + linkTypes: ['internal', 'external'], + includeContext: true, + cache: true, + }); + + expect(result.gitInfo?.enabled).toBe(true); + expect(result.filesProcessed).toBeGreaterThan(0); + }); + + it('should handle mixed git and non-git patterns', async () => { + // This test verifies that when git integration is enabled, + // it overrides the standard glob pattern resolution + const result = await validateLinks(['docs/**/*.md', 'README.md'], { + gitStaged: true, + }); + + // Should only process staged files, not the glob patterns + expect(result.gitInfo?.enabled).toBe(true); + expect(result.filesProcessed).toBe(1); // Only staged.md + }); + }); + + describe('Output Formatting', () => { + it('should include git information in results', async () => { + const result = await validateLinks(['**/*.md'], { + gitDiff: 'HEAD~1', + cache: true, + }); + + expect(result.gitInfo).toBeDefined(); + expect(result.gitInfo?.enabled).toBe(true); + expect(result.gitInfo?.baseRef).toBe('HEAD~1'); + expect(result.gitInfo?.currentCommit).toBe('abc123'); + expect(result.gitInfo?.changedFiles).toBeGreaterThan(0); + }); + + it('should track cache performance metrics', async () => { + const { ValidationCache } = await import('../utils/validation-cache.js'); + const mockCacheInstance = { + isEnabled: vi.fn().mockResolvedValue(true), + get: vi.fn().mockResolvedValue({ + result: { brokenLinks: [], totalLinks: 1 } + }), + set: vi.fn(), + }; + vi.mocked(ValidationCache).mockImplementation(() => mockCacheInstance as any); + + const result = await validateLinks(['/test/file.md'], { + cache: true, + gitStaged: true, + }); + + expect(result.gitInfo?.cacheHitRate).toBe(100); // 1 hit out of 1 file + }); + }); +}); \ No newline at end of file diff --git a/src/commands/validate.ts b/src/commands/validate.ts index 130c7b0..f62faa2 100644 --- a/src/commands/validate.ts +++ b/src/commands/validate.ts @@ -3,6 +3,8 @@ import { statSync } from 'fs'; import { posix } from 'path'; import { LinkValidator } from '../core/link-validator.js'; import { LinkParser } from '../core/link-parser.js'; +import { GitUtils } from '../utils/git-utils.js'; +import { ValidationCache, calculateFileHash, calculateConfigHash } from '../utils/validation-cache.js'; import type { LinkType } from '../types/links.js'; import type { BrokenLink } from '../types/config.js'; import type { OperationOptions } from '../types/operations.js'; @@ -35,6 +37,18 @@ export interface ValidateOperationOptions extends OperationOptions { groupBy: 'file' | 'type'; /** Include line numbers and context in output */ includeContext: boolean; + /** Git diff range for incremental validation */ + gitDiff?: string; + /** Only validate staged files */ + gitStaged?: boolean; + /** Enable validation result caching */ + cache?: boolean; + /** Cache directory path */ + cacheDir?: string; + /** Exit on first broken link found */ + failFast?: boolean; + /** Include dependency tracking for changed files */ + includeDependencies?: boolean; } /** @@ -89,6 +103,21 @@ export interface ValidateResult { circularReferences?: string[]; /** Processing time in milliseconds */ processingTime: number; + /** Git integration information */ + gitInfo?: { + /** Whether git integration was used */ + enabled: boolean; + /** Files changed according to git */ + changedFiles: number; + /** Files cached from previous validation */ + cachedFiles: number; + /** Cache hit rate percentage */ + cacheHitRate: number; + /** Base reference used for git diff */ + baseRef?: string; + /** Current git commit */ + currentCommit?: string; + }; } /** @@ -150,31 +179,117 @@ export async function validateLinks( dryRun: options.dryRun ?? false, verbose: options.verbose ?? false, force: options.force ?? false, + gitDiff: options.gitDiff, + gitStaged: options.gitStaged ?? false, + cache: options.cache ?? false, + cacheDir: options.cacheDir ?? '.markmv-cache', + failFast: options.failFast ?? false, + includeDependencies: options.includeDependencies ?? true, }; - // Resolve file patterns to actual file paths - const files: string[] = []; - for (const pattern of patterns) { - try { - const globOptions: { absolute: boolean; ignore: string[]; maxDepth?: number } = { - absolute: true, - ignore: ['**/node_modules/**', '**/dist/**', '**/coverage/**'], - }; - if (typeof opts.maxDepth === 'number') { - globOptions.maxDepth = opts.maxDepth; + // Initialize git utils and cache if needed + let gitUtils: GitUtils | undefined; + let cache: ValidationCache | undefined; + let gitInfo: ValidateResult['gitInfo'] | undefined; + + if (opts.gitDiff || opts.gitStaged || opts.cache) { + gitUtils = new GitUtils(); + + if (!gitUtils.isGitRepository()) { + if (opts.gitDiff || opts.gitStaged) { + throw new Error('Git integration requires a git repository'); } + if (opts.verbose) { + console.warn('Not in a git repository, disabling git integration'); + } + gitUtils = undefined; + } + } - const matches = await glob(pattern, globOptions); - files.push(...matches.filter((f) => f.endsWith('.md'))); - } catch (error) { + if (opts.cache) { + cache = new ValidationCache({ cacheDir: opts.cacheDir }); + if (!(await cache.isEnabled())) { if (opts.verbose) { - console.error(`Error processing pattern "${pattern}":`, error); + console.warn('Cache is not accessible, disabling caching'); } + cache = undefined; } } - if (opts.verbose) { - console.log(`Found ${files.length} markdown files to validate`); + // Resolve file patterns to actual file paths + let files: string[] = []; + + if (opts.gitDiff && gitUtils) { + // Git diff mode - only validate changed files + const baseRef = opts.gitDiff; + + if (!gitUtils.refExists(baseRef)) { + throw new Error(`Git reference '${baseRef}' does not exist`); + } + + const changedFiles = gitUtils.getChangedFiles(baseRef); + files = changedFiles + .filter(change => change.status !== 'deleted') + .map(change => change.path) + .filter(path => path.endsWith('.md')); + + const status = gitUtils.getStatus(); + gitInfo = { + enabled: true, + changedFiles: files.length, + cachedFiles: 0, + cacheHitRate: 0, + baseRef, + currentCommit: status.commit, + }; + + if (opts.verbose) { + console.log(`šŸ” Git Integration: Found ${files.length} changed markdown files since ${baseRef}`); + } + } else if (opts.gitStaged && gitUtils) { + // Git staged mode - only validate staged files + const stagedFiles = gitUtils.getStagedFiles(); + files = stagedFiles + .filter(change => change.status !== 'deleted') + .map(change => change.path) + .filter(path => path.endsWith('.md')); + + const status = gitUtils.getStatus(); + gitInfo = { + enabled: true, + changedFiles: files.length, + cachedFiles: 0, + cacheHitRate: 0, + currentCommit: status.commit, + }; + + if (opts.verbose) { + console.log(`šŸ” Git Integration: Found ${files.length} staged markdown files`); + } + } else { + // Standard mode - resolve glob patterns + for (const pattern of patterns) { + try { + const globOptions: { absolute: boolean; ignore: string[]; maxDepth?: number } = { + absolute: true, + ignore: ['**/node_modules/**', '**/dist/**', '**/coverage/**'], + }; + if (typeof opts.maxDepth === 'number') { + globOptions.maxDepth = opts.maxDepth; + } + + const matches = await glob(pattern, globOptions); + files.push(...matches.filter((f) => f.endsWith('.md'))); + } catch (error) { + if (opts.verbose) { + console.error(`Error processing pattern "${pattern}":`, error); + } + } + } + + if (opts.verbose) { + console.log(`Found ${files.length} markdown files to validate`); + } } // Initialize validator and parser @@ -196,6 +311,7 @@ export async function validateLinks( fileErrors: [], hasCircularReferences: false, processingTime: 0, + gitInfo, }; // Initialize broken links by type @@ -203,6 +319,18 @@ export async function validateLinks( result.brokenLinksByType[linkType] = []; } + // Calculate configuration hash for cache validation + const configHash = calculateConfigHash({ + linkTypes: opts.linkTypes, + checkExternal: opts.checkExternal, + externalTimeout: opts.externalTimeout, + strictInternal: opts.strictInternal, + checkClaudeImports: opts.checkClaudeImports, + }); + + let cacheHits = 0; + let cacheMisses = 0; + // Process each file for (const filePath of files) { try { @@ -210,19 +338,69 @@ export async function validateLinks( console.log(`Validating: ${filePath}`); } - // Parse links from file - const parsedFile = await parser.parseFile(filePath); - const relevantLinks = parsedFile.links.filter((link) => opts.linkTypes.includes(link.type)); + let validation: { brokenLinks: BrokenLink[] }; + let totalLinksForFile = 0; + let fromCache = false; + + // Try to get from cache first + if (cache) { + const contentHash = await calculateFileHash(filePath); + const gitCommit = gitUtils?.getCurrentCommit(); + const cached = await cache.get(filePath, contentHash, configHash, gitCommit); + + if (cached) { + // Use cached result + validation = { brokenLinks: cached.result.brokenLinks || [] }; + totalLinksForFile = cached.result.totalLinks || 0; + fromCache = true; + cacheHits++; + + if (opts.verbose) { + console.log(` āœ“ Used cached result`); + } + } else { + cacheMisses++; + } + } - result.totalLinks += relevantLinks.length; - result.filesProcessed++; + if (!fromCache) { + // Parse links from file + const parsedFile = await parser.parseFile(filePath); + const relevantLinks = parsedFile.links.filter((link) => opts.linkTypes.includes(link.type)); + totalLinksForFile = relevantLinks.length; + + if (relevantLinks.length === 0) { + // Store empty result in cache + if (cache) { + const contentHash = await calculateFileHash(filePath); + const gitCommit = gitUtils?.getCurrentCommit(); + await cache.set(filePath, contentHash, { + brokenLinks: [], + totalLinks: 0 + } as any, configHash, gitCommit); + } + + result.filesProcessed++; + continue; + } - if (relevantLinks.length === 0) { - continue; + // Validate links + validation = await validator.validateLinks(relevantLinks, filePath); + + // Store result in cache + if (cache) { + const contentHash = await calculateFileHash(filePath); + const gitCommit = gitUtils?.getCurrentCommit(); + await cache.set(filePath, contentHash, { + brokenLinks: validation.brokenLinks, + totalLinks: totalLinksForFile + } as any, configHash, gitCommit); + } } - // Validate links - const validation = await validator.validateLinks(relevantLinks, filePath); + result.totalLinks += totalLinksForFile; + result.filesProcessed++; + const brokenLinks = validation.brokenLinks; if (brokenLinks.length > 0) { @@ -249,6 +427,11 @@ export async function validateLinks( typeArray.push(extendedBrokenLink); } } + + // Exit early if fail-fast is enabled + if (opts.failFast) { + break; + } } } catch (error) { result.fileErrors.push({ @@ -259,9 +442,21 @@ export async function validateLinks( if (opts.verbose) { console.error(`Error processing ${filePath}:`, error); } + + // Exit early if fail-fast is enabled + if (opts.failFast) { + break; + } } } + // Update git info with cache statistics + if (result.gitInfo && cache) { + const totalRequests = cacheHits + cacheMisses; + result.gitInfo.cachedFiles = cacheHits; + result.gitInfo.cacheHitRate = totalRequests > 0 ? Math.round((cacheHits / totalRequests) * 100) : 0; + } + // Check for circular references if requested if (opts.checkCircular && files.length > 0) { try { @@ -349,11 +544,31 @@ export async function validateCommand( } // Format output for human consumption - console.log(`\nšŸ“Š Validation Summary`); + if (result.gitInfo?.enabled) { + console.log(`\nšŸ” Git Integration`); + if (result.gitInfo.baseRef) { + console.log(`Changed since ${result.gitInfo.baseRef}: ${result.gitInfo.changedFiles} files`); + } else { + console.log(`Staged files: ${result.gitInfo.changedFiles} files`); + } + if (result.gitInfo.cachedFiles > 0) { + console.log(`Cache hits: ${result.gitInfo.cachedFiles} files (${result.gitInfo.cacheHitRate}% hit rate)`); + } + console.log(); + } + + console.log(`šŸ“Š Validation Summary`); console.log(`Files processed: ${result.filesProcessed}`); console.log(`Total links found: ${result.totalLinks}`); console.log(`Broken links: ${result.brokenLinks}`); - console.log(`Processing time: ${result.processingTime}ms\n`); + console.log(`Processing time: ${result.processingTime}ms`); + + if (result.gitInfo?.enabled && options.cache) { + const savedTime = result.gitInfo.cacheHitRate > 0 ? + ` (${Math.round(result.processingTime * (result.gitInfo.cacheHitRate / 100))}ms saved by cache)` : ''; + console.log(`Cache performance: ${result.gitInfo.cacheHitRate}% hit rate${savedTime}`); + } + console.log(); if (result.fileErrors.length > 0) { console.log(`āš ļø File Errors (${result.fileErrors.length}):`); diff --git a/src/utils/git-utils.test.ts b/src/utils/git-utils.test.ts new file mode 100644 index 0000000..5c2938a --- /dev/null +++ b/src/utils/git-utils.test.ts @@ -0,0 +1,386 @@ +/** + * Tests for git integration utilities. + * + * @fileoverview Tests for git operations and repository management + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { execSync } from 'node:child_process'; +import { GitUtils } from './git-utils.js'; + +// Mock child_process +vi.mock('node:child_process', () => ({ + execSync: vi.fn(), +})); + +describe('GitUtils', () => { + let gitUtils: GitUtils; + let mockExecSync: ReturnType; + + beforeEach(() => { + mockExecSync = vi.mocked(execSync); + gitUtils = new GitUtils('/test/repo'); + vi.clearAllMocks(); + }); + + afterEach(() => { + vi.resetAllMocks(); + }); + + describe('Repository Detection', () => { + it('should detect git repository correctly', () => { + mockExecSync.mockReturnValue(Buffer.from('.git')); + + const result = gitUtils.isGitRepository(); + + expect(result).toBe(true); + expect(mockExecSync).toHaveBeenCalledWith('git rev-parse --git-dir', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should return false when not in git repository', () => { + mockExecSync.mockImplementation(() => { + throw new Error('Not a git repository'); + }); + + const result = gitUtils.isGitRepository(); + + expect(result).toBe(false); + }); + + it('should get repository root directory', () => { + mockExecSync.mockReturnValue(Buffer.from('/test/repo\n')); + + const result = gitUtils.getRepositoryRoot(); + + expect(result).toBe('/test/repo'); + expect(mockExecSync).toHaveBeenCalledWith('git rev-parse --show-toplevel', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should throw error when getting root of non-git directory', () => { + mockExecSync.mockImplementation(() => { + throw new Error('Not a git repository'); + }); + + expect(() => gitUtils.getRepositoryRoot()).toThrow('Not in a git repository'); + }); + }); + + describe('Git Status Information', () => { + it('should get current branch name', () => { + mockExecSync.mockReturnValue(Buffer.from('main\n')); + + const result = gitUtils.getCurrentBranch(); + + expect(result).toBe('main'); + expect(mockExecSync).toHaveBeenCalledWith('git branch --show-current', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should fallback to commit hash for detached HEAD', () => { + mockExecSync + .mockImplementationOnce(() => { + throw new Error('Not on any branch'); + }) + .mockReturnValueOnce(Buffer.from('abc123\n')); + + const result = gitUtils.getCurrentBranch(); + + expect(result).toBe('abc123'); + expect(mockExecSync).toHaveBeenCalledWith('git rev-parse --short HEAD', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should get current commit hash', () => { + mockExecSync.mockReturnValue(Buffer.from('abc123def456\n')); + + const result = gitUtils.getCurrentCommit(); + + expect(result).toBe('abc123def456'); + expect(mockExecSync).toHaveBeenCalledWith('git rev-parse HEAD', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should detect uncommitted changes', () => { + mockExecSync.mockReturnValue(Buffer.from('M file1.md\n?? file2.md\n')); + + const result = gitUtils.hasUncommittedChanges(); + + expect(result).toBe(true); + expect(mockExecSync).toHaveBeenCalledWith('git status --porcelain', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should return false when no uncommitted changes', () => { + mockExecSync.mockReturnValue(Buffer.from('')); + + const result = gitUtils.hasUncommittedChanges(); + + expect(result).toBe(false); + }); + + it('should get complete status information', () => { + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) // rev-parse --show-toplevel + .mockReturnValueOnce(Buffer.from('main\n')) // branch --show-current + .mockReturnValueOnce(Buffer.from('abc123\n')) // rev-parse HEAD + .mockReturnValueOnce(Buffer.from('')); // status --porcelain + + const result = gitUtils.getStatus(); + + expect(result).toEqual({ + branch: 'main', + commit: 'abc123', + isDirty: false, + rootDir: '/test/repo', + }); + }); + }); + + describe('File Changes Detection', () => { + it('should get changed files between references', () => { + const diffOutput = 'M\tdocs/readme.md\nA\tdocs/new-file.md\nD\toldfile.md'; + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) // getRepositoryRoot + .mockReturnValueOnce(Buffer.from(diffOutput)); + + const result = gitUtils.getChangedFiles('HEAD~1', 'HEAD'); + + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ + path: '/test/repo/docs/readme.md', + status: 'modified', + previousPath: undefined, + }); + expect(result[1]).toEqual({ + path: '/test/repo/docs/new-file.md', + status: 'added', + previousPath: undefined, + }); + expect(result[2]).toEqual({ + path: '/test/repo/oldfile.md', + status: 'deleted', + previousPath: undefined, + }); + + expect(mockExecSync).toHaveBeenCalledWith('git diff --name-status HEAD~1..HEAD', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should handle renamed files', () => { + const diffOutput = 'R100\told-name.md\tnew-name.md'; + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from(diffOutput)); + + const result = gitUtils.getChangedFiles('HEAD~1'); + + expect(result).toHaveLength(1); + expect(result[0]).toEqual({ + path: '/test/repo/new-name.md', + status: 'renamed', + previousPath: '/test/repo/old-name.md', + }); + }); + + it('should get staged files', () => { + const stagedOutput = 'M\tstaged-file.md\nA\tnew-staged.md'; + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from(stagedOutput)); + + const result = gitUtils.getStagedFiles(); + + expect(result).toHaveLength(2); + expect(result[0].status).toBe('modified'); + expect(result[1].status).toBe('added'); + + expect(mockExecSync).toHaveBeenCalledWith('git diff --cached --name-status', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should get unstaged files', () => { + const unstagedOutput = 'M\tunstaged-file.md'; + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from(unstagedOutput)); + + const result = gitUtils.getUnstagedFiles(); + + expect(result).toHaveLength(1); + expect(result[0].status).toBe('modified'); + + expect(mockExecSync).toHaveBeenCalledWith('git diff --name-status', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should handle empty diff output', () => { + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from('')); + + const result = gitUtils.getChangedFiles('HEAD~1'); + + expect(result).toHaveLength(0); + }); + }); + + describe('Tracked Files', () => { + it('should get all tracked files', () => { + const lsFilesOutput = 'README.md\ndocs/guide.md\nsrc/main.ts'; + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from(lsFilesOutput)); + + const result = gitUtils.getTrackedFiles(); + + expect(result).toHaveLength(3); + expect(result[0]).toBe('/test/repo/README.md'); + expect(result[1]).toBe('/test/repo/docs/guide.md'); + expect(result[2]).toBe('/test/repo/src/main.ts'); + + expect(mockExecSync).toHaveBeenCalledWith('git ls-files', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should get tracked files with pattern', () => { + const lsFilesOutput = 'docs/guide.md\ndocs/api.md'; + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from(lsFilesOutput)); + + const result = gitUtils.getTrackedFiles('*.md'); + + expect(result).toHaveLength(2); + + expect(mockExecSync).toHaveBeenCalledWith('git ls-files *.md', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + }); + + describe('Reference Operations', () => { + it('should check if reference exists', () => { + mockExecSync.mockReturnValue(Buffer.from('abc123\n')); + + const result = gitUtils.refExists('main'); + + expect(result).toBe(true); + expect(mockExecSync).toHaveBeenCalledWith('git rev-parse --verify main', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + + it('should return false for non-existent reference', () => { + mockExecSync.mockImplementation(() => { + throw new Error('bad revision'); + }); + + const result = gitUtils.refExists('nonexistent'); + + expect(result).toBe(false); + }); + + it('should get merge base', () => { + mockExecSync.mockReturnValue(Buffer.from('abc123def\n')); + + const result = gitUtils.getMergeBase('main', 'feature'); + + expect(result).toBe('abc123def'); + expect(mockExecSync).toHaveBeenCalledWith('git merge-base main feature', { + cwd: '/test/repo', + encoding: 'utf8', + stdio: 'pipe', + }); + }); + }); + + describe('Combined Operations', () => { + it('should get all modified files including staged, unstaged, and committed', () => { + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) // getRepositoryRoot for staged + .mockReturnValueOnce(Buffer.from('M\tstaged.md')) // getStagedFiles + .mockReturnValueOnce(Buffer.from('/test/repo\n')) // getRepositoryRoot for unstaged + .mockReturnValueOnce(Buffer.from('M\tunstaged.md')) // getUnstagedFiles + .mockReturnValueOnce(Buffer.from('/test/repo\n')) // getRepositoryRoot for since + .mockReturnValueOnce(Buffer.from('M\tcommitted.md')); // getChangedFiles + + const result = gitUtils.getAllModifiedFiles('HEAD~1'); + + expect(result).toHaveLength(3); + expect(result.some(f => f.path.endsWith('staged.md'))).toBe(true); + expect(result.some(f => f.path.endsWith('unstaged.md'))).toBe(true); + expect(result.some(f => f.path.endsWith('committed.md'))).toBe(true); + }); + + it('should deduplicate files in getAllModifiedFiles', () => { + mockExecSync + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from('M\tsame-file.md')) // staged + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from('M\tsame-file.md')) // unstaged + .mockReturnValueOnce(Buffer.from('/test/repo\n')) + .mockReturnValueOnce(Buffer.from('M\tsame-file.md')); // committed + + const result = gitUtils.getAllModifiedFiles('HEAD~1'); + + expect(result).toHaveLength(1); + expect(result[0].path).toBe('/test/repo/same-file.md'); + }); + }); + + describe('Error Handling', () => { + it('should provide helpful error messages for git command failures', () => { + mockExecSync.mockImplementation(() => { + const error = new Error('Command failed') as any; + error.stderr = Buffer.from('fatal: not a git repository'); + throw error; + }); + + expect(() => gitUtils.getCurrentBranch()).toThrow('Git command failed: git branch --show-current'); + }); + + it('should handle non-Error exceptions', () => { + mockExecSync.mockImplementation(() => { + throw 'String error'; + }); + + expect(() => gitUtils.getCurrentBranch()).toThrow('String error'); + }); + }); +}); \ No newline at end of file diff --git a/src/utils/git-utils.ts b/src/utils/git-utils.ts new file mode 100644 index 0000000..745def2 --- /dev/null +++ b/src/utils/git-utils.ts @@ -0,0 +1,384 @@ +/** + * Git integration utilities for incremental validation. + * + * @fileoverview Provides git operations for detecting changed files and managing validation caching + * @category Utils + */ + +import { execSync } from 'node:child_process'; +import { existsSync } from 'node:fs'; +import { resolve } from 'node:path'; + +/** + * Information about a file change in git. + * + * @category Utils + */ +export interface GitFileChange { + /** Path to the changed file */ + path: string; + /** Type of change */ + status: 'added' | 'modified' | 'deleted' | 'renamed' | 'copied'; + /** Previous path if renamed */ + previousPath?: string; +} + +/** + * Git repository information and status. + * + * @category Utils + */ +export interface GitStatus { + /** Current branch name */ + branch: string; + /** Current commit hash */ + commit: string; + /** Whether repository has uncommitted changes */ + isDirty: boolean; + /** Root directory of the git repository */ + rootDir: string; +} + +/** + * Git integration utility class. + * + * Provides methods for detecting file changes, managing git state, + * and integrating with validation workflows. + * + * @category Utils + * + * @example + * Basic usage + * ```typescript + * const git = new GitUtils(); + * + * if (git.isGitRepository()) { + * const changes = git.getChangedFiles('HEAD~1'); + * console.log(`Found ${changes.length} changed files`); + * } + * ``` + * + * @example + * Pre-commit validation + * ```typescript + * const git = new GitUtils(); + * const stagedFiles = git.getStagedFiles(); + * const markdownFiles = stagedFiles.filter(f => f.path.endsWith('.md')); + * ``` + */ +export class GitUtils { + private rootDir: string | undefined; + + constructor(private cwd: string = process.cwd()) {} + + /** + * Check if current directory is within a git repository. + * + * @returns True if in a git repository + */ + isGitRepository(): boolean { + try { + this.execGit('rev-parse --git-dir'); + return true; + } catch { + return false; + } + } + + /** + * Get git repository root directory. + * + * @returns Absolute path to git root directory + * @throws Error if not in a git repository + */ + getRepositoryRoot(): string { + if (this.rootDir) { + return this.rootDir; + } + + try { + const output = this.execGit('rev-parse --show-toplevel'); + this.rootDir = output.trim(); + return this.rootDir; + } catch (error) { + throw new Error(`Not in a git repository: ${error}`); + } + } + + /** + * Get current git status information. + * + * @returns Git status information + */ + getStatus(): GitStatus { + const rootDir = this.getRepositoryRoot(); + const branch = this.getCurrentBranch(); + const commit = this.getCurrentCommit(); + const isDirty = this.hasUncommittedChanges(); + + return { + branch, + commit, + isDirty, + rootDir, + }; + } + + /** + * Get current branch name. + * + * @returns Current branch name + */ + getCurrentBranch(): string { + try { + return this.execGit('branch --show-current').trim(); + } catch { + // Fallback for detached HEAD + return this.execGit('rev-parse --short HEAD').trim(); + } + } + + /** + * Get current commit hash. + * + * @returns Current commit hash (full) + */ + getCurrentCommit(): string { + return this.execGit('rev-parse HEAD').trim(); + } + + /** + * Check if repository has uncommitted changes. + * + * @returns True if there are uncommitted changes + */ + hasUncommittedChanges(): boolean { + try { + const output = this.execGit('status --porcelain'); + return output.trim().length > 0; + } catch { + return false; + } + } + + /** + * Get files changed between two git references. + * + * @param base - Base reference (commit, branch, tag) + * @param head - Head reference (defaults to current HEAD) + * @returns Array of changed files + * + * @example + * ```typescript + * // Files changed since last commit + * const changes = git.getChangedFiles('HEAD~1'); + * + * // Files changed in current branch vs main + * const branchChanges = git.getChangedFiles('main', 'HEAD'); + * ``` + */ + getChangedFiles(base: string, head: string = 'HEAD'): GitFileChange[] { + try { + const output = this.execGit(`diff --name-status ${base}..${head}`); + return this.parseFileChanges(output); + } catch (error) { + throw new Error(`Failed to get changed files: ${error}`); + } + } + + /** + * Get currently staged files. + * + * @returns Array of staged files + */ + getStagedFiles(): GitFileChange[] { + try { + const output = this.execGit('diff --cached --name-status'); + return this.parseFileChanges(output); + } catch (error) { + throw new Error(`Failed to get staged files: ${error}`); + } + } + + /** + * Get files changed in working directory (unstaged). + * + * @returns Array of unstaged changes + */ + getUnstagedFiles(): GitFileChange[] { + try { + const output = this.execGit('diff --name-status'); + return this.parseFileChanges(output); + } catch (error) { + throw new Error(`Failed to get unstaged files: ${error}`); + } + } + + /** + * Get list of all tracked files. + * + * @param pattern - Optional file pattern to filter + * @returns Array of tracked file paths + */ + getTrackedFiles(pattern?: string): string[] { + try { + const cmd = pattern ? `ls-files ${pattern}` : 'ls-files'; + const output = this.execGit(cmd); + return output + .split('\n') + .map(line => line.trim()) + .filter(line => line.length > 0) + .map(path => resolve(this.getRepositoryRoot(), path)); + } catch (error) { + throw new Error(`Failed to get tracked files: ${error}`); + } + } + + /** + * Check if a specific commit exists. + * + * @param ref - Git reference to check + * @returns True if reference exists + */ + refExists(ref: string): boolean { + try { + this.execGit(`rev-parse --verify ${ref}`); + return true; + } catch { + return false; + } + } + + /** + * Get the merge base between two references. + * + * @param ref1 - First reference + * @param ref2 - Second reference + * @returns Merge base commit hash + */ + getMergeBase(ref1: string, ref2: string): string { + try { + return this.execGit(`merge-base ${ref1} ${ref2}`).trim(); + } catch (error) { + throw new Error(`Failed to get merge base: ${error}`); + } + } + + /** + * Get files that have been modified since a specific commit. + * Includes both staged and unstaged changes. + * + * @param since - Commit to compare against + * @returns Array of all modified files + */ + getAllModifiedFiles(since?: string): GitFileChange[] { + const changes: GitFileChange[] = []; + + // Get staged changes + changes.push(...this.getStagedFiles()); + + // Get unstaged changes + changes.push(...this.getUnstagedFiles()); + + // Get committed changes since specified commit + if (since) { + changes.push(...this.getChangedFiles(since)); + } + + // Remove duplicates (prefer staged over unstaged over committed) + const uniqueChanges = new Map(); + for (const change of changes.reverse()) { + if (!uniqueChanges.has(change.path)) { + uniqueChanges.set(change.path, change); + } + } + + return Array.from(uniqueChanges.values()); + } + + /** + * Execute a git command and return output. + * + * @private + */ + private execGit(command: string): string { + try { + return execSync(`git ${command}`, { + cwd: this.cwd, + encoding: 'utf8', + stdio: 'pipe', + }); + } catch (error) { + if (error instanceof Error) { + throw new Error(`Git command failed: git ${command}\n${error.message}`); + } + throw error; + } + } + + /** + * Parse git diff output into file change objects. + * + * @private + */ + private parseFileChanges(output: string): GitFileChange[] { + if (!output.trim()) { + return []; + } + + return output + .split('\n') + .map(line => line.trim()) + .filter(line => line.length > 0) + .map(line => { + const [status, ...pathParts] = line.split('\t'); + const path = pathParts.join('\t'); // Handle paths with tabs + + let changeStatus: GitFileChange['status']; + let previousPath: string | undefined; + + switch (status.charAt(0)) { + case 'A': + changeStatus = 'added'; + break; + case 'M': + changeStatus = 'modified'; + break; + case 'D': + changeStatus = 'deleted'; + break; + case 'R': + changeStatus = 'renamed'; + // For renames, git shows "R\toldpath\tnewpath" + const paths = pathParts; + if (paths.length >= 2) { + previousPath = paths[0]; + return { + path: resolve(this.getRepositoryRoot(), paths[1]), + status: changeStatus, + previousPath: resolve(this.getRepositoryRoot(), previousPath), + }; + } + break; + case 'C': + changeStatus = 'copied'; + break; + default: + changeStatus = 'modified'; + } + + return { + path: resolve(this.getRepositoryRoot(), path), + status: changeStatus, + previousPath: previousPath ? resolve(this.getRepositoryRoot(), previousPath) : undefined, + }; + }); + } +} + +/** + * Default git utilities instance. + * + * @category Utils + */ +export const gitUtils = new GitUtils(); \ No newline at end of file diff --git a/src/utils/validation-cache.test.ts b/src/utils/validation-cache.test.ts new file mode 100644 index 0000000..53f323c --- /dev/null +++ b/src/utils/validation-cache.test.ts @@ -0,0 +1,331 @@ +/** + * Tests for validation result caching system. + * + * @fileoverview Tests for caching validation results and cache management + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { ValidationCache, calculateFileHash, calculateConfigHash } from './validation-cache.js'; + +describe('ValidationCache', () => { + let tempDir: string; + let cache: ValidationCache; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'validation-cache-test-')); + cache = new ValidationCache({ + cacheDir: join(tempDir, 'cache'), + externalLinksTtl: 1000, // 1 second for testing + }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + describe('Cache Initialization', () => { + it('should initialize with default configuration', () => { + const defaultCache = new ValidationCache(); + expect(defaultCache).toBeDefined(); + }); + + it('should initialize with custom configuration', () => { + const customCache = new ValidationCache({ + cacheDir: './custom-cache', + externalLinksTtl: 5000, + maxSizeBytes: 50 * 1024 * 1024, + }); + expect(customCache).toBeDefined(); + }); + + it('should check if cache is enabled', async () => { + const enabled = await cache.isEnabled(); + expect(enabled).toBe(true); + }); + }); + + describe('Cache Operations', () => { + const testFilePath = '/test/file.md'; + const testContentHash = 'abc123'; + const testConfigHash = 'def456'; + const testResult = { + brokenLinks: [], + totalLinks: 5, + } as any; + + it('should store and retrieve cache entries', async () => { + // Store in cache + await cache.set(testFilePath, testContentHash, testResult, testConfigHash); + + // Retrieve from cache + const cached = await cache.get(testFilePath, testContentHash, testConfigHash); + + expect(cached).toBeDefined(); + expect(cached?.filePath).toBe(testFilePath); + expect(cached?.contentHash).toBe(testContentHash); + expect(cached?.configHash).toBe(testConfigHash); + expect(cached?.result).toEqual(testResult); + }); + + it('should return undefined for non-existent cache entries', async () => { + const cached = await cache.get('/nonexistent.md', 'hash', 'config'); + expect(cached).toBeUndefined(); + }); + + it('should invalidate cache when content hash changes', async () => { + await cache.set(testFilePath, testContentHash, testResult, testConfigHash); + + // Try to get with different content hash + const cached = await cache.get(testFilePath, 'different-hash', testConfigHash); + expect(cached).toBeUndefined(); + }); + + it('should invalidate cache when config hash changes', async () => { + await cache.set(testFilePath, testContentHash, testResult, testConfigHash); + + // Try to get with different config hash + const cached = await cache.get(testFilePath, testContentHash, 'different-config'); + expect(cached).toBeUndefined(); + }); + + it('should include git commit in cache validation', async () => { + const gitCommit = 'commit123'; + await cache.set(testFilePath, testContentHash, testResult, testConfigHash, gitCommit); + + // Should get with same git commit + const cached1 = await cache.get(testFilePath, testContentHash, testConfigHash, gitCommit); + expect(cached1).toBeDefined(); + + // Should still get without git commit (backward compatibility) + const cached2 = await cache.get(testFilePath, testContentHash, testConfigHash); + expect(cached2).toBeDefined(); + }); + + it('should handle TTL expiration for external links', async () => { + // Create cache with very short TTL + const shortTtlCache = new ValidationCache({ + cacheDir: join(tempDir, 'short-ttl-cache'), + externalLinksTtl: 1, // 1ms + }); + + await shortTtlCache.set(testFilePath, testContentHash, testResult, testConfigHash); + + // Wait for TTL to expire + await new Promise(resolve => setTimeout(resolve, 10)); + + // Should be invalidated due to TTL (assuming external links exist) + const cached = await shortTtlCache.get(testFilePath, testContentHash, testConfigHash); + // Note: This test assumes the hasExternalLinks method returns true + // In real implementation, this would depend on the actual result content + }); + }); + + describe('Cache Management', () => { + it('should invalidate specific cache entry', async () => { + const testResult = { brokenLinks: [], totalLinks: 1 } as any; + await cache.set('/test/file.md', 'hash1', testResult, 'config1'); + + await cache.invalidate('/test/file.md'); + + const cached = await cache.get('/test/file.md', 'hash1', 'config1'); + expect(cached).toBeUndefined(); + }); + + it('should clear entire cache', async () => { + const testResult = { brokenLinks: [], totalLinks: 1 } as any; + await cache.set('/test/file1.md', 'hash1', testResult, 'config1'); + await cache.set('/test/file2.md', 'hash2', testResult, 'config1'); + + await cache.clear(); + + const cached1 = await cache.get('/test/file1.md', 'hash1', 'config1'); + const cached2 = await cache.get('/test/file2.md', 'hash2', 'config1'); + + expect(cached1).toBeUndefined(); + expect(cached2).toBeUndefined(); + }); + + it('should get cache metadata', async () => { + const testResult = { brokenLinks: [], totalLinks: 3 } as any; + await cache.set('/test/file1.md', 'hash1', testResult, 'config1'); + await cache.set('/test/file2.md', 'hash2', testResult, 'config1'); + + const metadata = await cache.getMetadata(); + + expect(metadata.totalFiles).toBeGreaterThan(0); + expect(metadata.sizeBytes).toBeGreaterThan(0); + expect(metadata.version).toBeDefined(); + }); + + it('should perform cleanup of expired entries', async () => { + // This test would need mock of shouldRemoveFromCache + // For now, just test that cleanup runs without error + const removedCount = await cache.cleanup(); + expect(removedCount).toBeGreaterThanOrEqual(0); + }); + }); + + describe('File Hash Calculation', () => { + it('should calculate file hash', async () => { + const testFile = join(tempDir, 'test.md'); + const content = '# Test File\n\nThis is test content.'; + await writeFile(testFile, content, 'utf-8'); + + const hash = await calculateFileHash(testFile); + + expect(hash).toBeDefined(); + expect(hash).toHaveLength(64); // SHA-256 hex string + }); + + it('should produce same hash for same content', async () => { + const testFile1 = join(tempDir, 'test1.md'); + const testFile2 = join(tempDir, 'test2.md'); + const content = '# Same Content\n\nIdentical content.'; + + await writeFile(testFile1, content, 'utf-8'); + await writeFile(testFile2, content, 'utf-8'); + + const hash1 = await calculateFileHash(testFile1); + const hash2 = await calculateFileHash(testFile2); + + expect(hash1).toBe(hash2); + }); + + it('should produce different hashes for different content', async () => { + const testFile1 = join(tempDir, 'test1.md'); + const testFile2 = join(tempDir, 'test2.md'); + + await writeFile(testFile1, 'Content 1', 'utf-8'); + await writeFile(testFile2, 'Content 2', 'utf-8'); + + const hash1 = await calculateFileHash(testFile1); + const hash2 = await calculateFileHash(testFile2); + + expect(hash1).not.toBe(hash2); + }); + + it('should handle file reading errors', async () => { + await expect(calculateFileHash('/nonexistent/file.md')).rejects.toThrow(); + }); + }); + + describe('Config Hash Calculation', () => { + it('should calculate config hash', () => { + const config = { + checkExternal: true, + timeout: 5000, + linkTypes: ['internal', 'external'], + }; + + const hash = calculateConfigHash(config); + + expect(hash).toBeDefined(); + expect(hash).toHaveLength(64); // SHA-256 hex string + }); + + it('should produce same hash for same config', () => { + const config1 = { a: 1, b: 2, c: 3 }; + const config2 = { c: 3, b: 2, a: 1 }; // Different order + + const hash1 = calculateConfigHash(config1); + const hash2 = calculateConfigHash(config2); + + expect(hash1).toBe(hash2); + }); + + it('should produce different hashes for different configs', () => { + const config1 = { checkExternal: true, timeout: 5000 }; + const config2 = { checkExternal: false, timeout: 5000 }; + + const hash1 = calculateConfigHash(config1); + const hash2 = calculateConfigHash(config2); + + expect(hash1).not.toBe(hash2); + }); + + it('should handle complex nested objects', () => { + const config = { + options: { + validation: { + types: ['internal', 'external'], + settings: { strict: true, timeout: 1000 } + } + }, + features: ['cache', 'git'] + }; + + const hash = calculateConfigHash(config); + expect(hash).toBeDefined(); + expect(hash).toHaveLength(64); + }); + }); + + describe('Error Handling', () => { + it('should handle cache write failures gracefully', async () => { + // Create cache with invalid directory (read-only) + const invalidCache = new ValidationCache({ + cacheDir: '/invalid/readonly/path' + }); + + // Should not throw, just log warning + await expect(invalidCache.set('/test.md', 'hash', {} as any, 'config')).resolves.not.toThrow(); + }); + + it('should handle cache read failures gracefully', async () => { + const result = await cache.get('/test.md', 'hash', 'config'); + expect(result).toBeUndefined(); + }); + + it('should handle malformed cache files', async () => { + // Create malformed cache file + const cacheDir = join(tempDir, 'cache'); + await writeFile(join(cacheDir, 'malformed.json'), 'invalid json', 'utf-8'); + + // Should handle gracefully + const result = await cache.get('/test.md', 'hash', 'config'); + expect(result).toBeUndefined(); + }); + }); + + describe('Cache Performance', () => { + it('should handle multiple concurrent operations', async () => { + const testResult = { brokenLinks: [], totalLinks: 1 } as any; + const operations = []; + + // Create multiple concurrent cache operations + for (let i = 0; i < 10; i++) { + operations.push( + cache.set(`/test/file${i}.md`, `hash${i}`, testResult, 'config') + ); + } + + // All operations should complete successfully + await Promise.all(operations); + + // Verify all entries were stored + for (let i = 0; i < 10; i++) { + const cached = await cache.get(`/test/file${i}.md`, `hash${i}`, 'config'); + expect(cached).toBeDefined(); + } + }); + + it('should handle large cache entries', async () => { + // Create large result object + const largeResult = { + brokenLinks: Array(1000).fill(null).map((_, i) => ({ + link: { href: `https://example.com/link${i}`, type: 'external' }, + reason: `Test reason ${i}` + })), + totalLinks: 1000 + } as any; + + await cache.set('/test/large.md', 'hash', largeResult, 'config'); + + const cached = await cache.get('/test/large.md', 'hash', 'config'); + expect(cached?.result.brokenLinks).toHaveLength(1000); + }); + }); +}); \ No newline at end of file diff --git a/src/utils/validation-cache.ts b/src/utils/validation-cache.ts new file mode 100644 index 0000000..6c75e7b --- /dev/null +++ b/src/utils/validation-cache.ts @@ -0,0 +1,533 @@ +/** + * Validation result caching system for incremental validation. + * + * @fileoverview Provides caching capabilities for link validation results to improve performance + * @category Utils + */ + +import { createHash } from 'node:crypto'; +import { readFile, writeFile, mkdir, stat } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { existsSync } from 'node:fs'; +import type { ValidationResult } from '../types/operations.js'; + +/** + * Cached validation result for a file. + * + * @category Utils + */ +export interface CachedValidationResult { + /** File path that was validated */ + filePath: string; + /** Hash of file content when validated */ + contentHash: string; + /** Git commit hash when validated */ + gitCommit?: string; + /** Timestamp when validation was performed */ + timestamp: number; + /** TTL for external link checks (milliseconds) */ + externalLinksTtl: number; + /** Validation result */ + result: ValidationResult; + /** Markmv version used for validation */ + version: string; + /** Configuration hash used for validation */ + configHash: string; +} + +/** + * Cache metadata and statistics. + * + * @category Utils + */ +export interface CacheMetadata { + /** Total number of cached files */ + totalFiles: number; + /** Total number of cached links */ + totalLinks: number; + /** Cache hit rate percentage */ + hitRate: number; + /** Size of cache in bytes */ + sizeBytes: number; + /** Last cleanup timestamp */ + lastCleanup: number; + /** Cache version */ + version: string; +} + +/** + * Cache configuration options. + * + * @category Utils + */ +export interface CacheConfig { + /** Cache directory path */ + cacheDir: string; + /** TTL for external links in milliseconds */ + externalLinksTtl: number; + /** Maximum cache size in bytes */ + maxSizeBytes: number; + /** Enable cache compression */ + compression: boolean; + /** Cleanup interval in milliseconds */ + cleanupInterval: number; +} + +/** + * Default cache configuration. + */ +const DEFAULT_CACHE_CONFIG: CacheConfig = { + cacheDir: '.markmv-cache', + externalLinksTtl: 24 * 60 * 60 * 1000, // 24 hours + maxSizeBytes: 100 * 1024 * 1024, // 100MB + compression: true, + cleanupInterval: 24 * 60 * 60 * 1000, // 24 hours +}; + +/** + * Validation result caching system. + * + * Provides efficient caching of validation results with content-based invalidation, + * TTL for external links, and automatic cleanup of stale entries. + * + * @category Utils + * + * @example + * Basic usage + * ```typescript + * const cache = new ValidationCache(); + * + * // Check for cached result + * const cached = await cache.get('/path/to/file.md', contentHash); + * if (cached) { + * console.log('Using cached validation result'); + * return cached.result; + * } + * + * // Perform validation and cache result + * const result = await validateFile('/path/to/file.md'); + * await cache.set('/path/to/file.md', contentHash, result); + * ``` + * + * @example + * Configuration + * ```typescript + * const cache = new ValidationCache({ + * cacheDir: '.custom-cache', + * externalLinksTtl: 12 * 60 * 60 * 1000, // 12 hours + * maxSizeBytes: 50 * 1024 * 1024, // 50MB + * }); + * ``` + */ +export class ValidationCache { + private config: CacheConfig; + private metadata: CacheMetadata | undefined; + private hits = 0; + private misses = 0; + + constructor(config: Partial = {}) { + this.config = { ...DEFAULT_CACHE_CONFIG, ...config }; + } + + /** + * Get cached validation result for a file. + * + * @param filePath - Path to the file + * @param contentHash - Hash of current file content + * @param configHash - Hash of current validation configuration + * @param gitCommit - Current git commit hash + * @returns Cached result if valid, undefined otherwise + */ + async get( + filePath: string, + contentHash: string, + configHash: string, + gitCommit?: string + ): Promise { + try { + const cacheFile = this.getCacheFilePath(filePath); + if (!existsSync(cacheFile)) { + this.misses++; + return undefined; + } + + const cached = await this.readCacheFile(cacheFile); + if (!cached) { + this.misses++; + return undefined; + } + + // Validate cache entry + if (!this.isCacheValid(cached, contentHash, configHash, gitCommit)) { + this.misses++; + return undefined; + } + + this.hits++; + return cached; + } catch { + this.misses++; + return undefined; + } + } + + /** + * Store validation result in cache. + * + * @param filePath - Path to the file + * @param contentHash - Hash of file content + * @param result - Validation result to cache + * @param configHash - Hash of validation configuration + * @param gitCommit - Current git commit hash + */ + async set( + filePath: string, + contentHash: string, + result: ValidationResult, + configHash: string, + gitCommit?: string + ): Promise { + try { + const cacheFile = this.getCacheFilePath(filePath); + await mkdir(dirname(cacheFile), { recursive: true }); + + const cached: CachedValidationResult = { + filePath, + contentHash, + gitCommit, + timestamp: Date.now(), + externalLinksTtl: this.config.externalLinksTtl, + result, + version: this.getVersion(), + configHash, + }; + + await this.writeCacheFile(cacheFile, cached); + } catch (error) { + // Cache write failures should not break validation + console.warn(`Failed to write cache for ${filePath}:`, error); + } + } + + /** + * Invalidate cache entry for a file. + * + * @param filePath - Path to the file + */ + async invalidate(filePath: string): Promise { + try { + const cacheFile = this.getCacheFilePath(filePath); + if (existsSync(cacheFile)) { + const { unlink } = await import('node:fs/promises'); + await unlink(cacheFile); + } + } catch { + // Ignore errors when invalidating + } + } + + /** + * Clear entire cache. + */ + async clear(): Promise { + try { + if (existsSync(this.config.cacheDir)) { + const { rm } = await import('node:fs/promises'); + await rm(this.config.cacheDir, { recursive: true, force: true }); + } + } catch (error) { + throw new Error(`Failed to clear cache: ${error}`); + } + } + + /** + * Get cache metadata and statistics. + * + * @returns Cache metadata + */ + async getMetadata(): Promise { + if (this.metadata) { + return this.metadata; + } + + try { + let totalFiles = 0; + let totalLinks = 0; + let sizeBytes = 0; + + if (existsSync(this.config.cacheDir)) { + const { readdir } = await import('node:fs/promises'); + const files = await readdir(this.config.cacheDir, { recursive: true }); + + for (const file of files) { + if (typeof file === 'string' && file.endsWith('.json')) { + const filePath = join(this.config.cacheDir, file); + try { + const stats = await stat(filePath); + sizeBytes += stats.size; + + const cached = await this.readCacheFile(filePath); + if (cached) { + totalFiles++; + // Count links from validation result + const linkCount = this.countLinksInResult(cached.result); + totalLinks += linkCount; + } + } catch { + // Skip invalid cache files + } + } + } + } + + const totalRequests = this.hits + this.misses; + const hitRate = totalRequests > 0 ? (this.hits / totalRequests) * 100 : 0; + + this.metadata = { + totalFiles, + totalLinks, + hitRate: Math.round(hitRate * 10) / 10, + sizeBytes, + lastCleanup: 0, + version: this.getVersion(), + }; + + return this.metadata; + } catch (error) { + throw new Error(`Failed to get cache metadata: ${error}`); + } + } + + /** + * Perform cache cleanup - remove expired and invalid entries. + * + * @returns Number of entries removed + */ + async cleanup(): Promise { + try { + let removedCount = 0; + + if (!existsSync(this.config.cacheDir)) { + return removedCount; + } + + const { readdir } = await import('node:fs/promises'); + const files = await readdir(this.config.cacheDir, { recursive: true }); + + for (const file of files) { + if (typeof file === 'string' && file.endsWith('.json')) { + const filePath = join(this.config.cacheDir, file); + try { + const cached = await this.readCacheFile(filePath); + if (cached && this.shouldRemoveFromCache(cached)) { + const { unlink } = await import('node:fs/promises'); + await unlink(filePath); + removedCount++; + } + } catch { + // Remove invalid cache files + try { + const { unlink } = await import('node:fs/promises'); + await unlink(filePath); + removedCount++; + } catch { + // Ignore cleanup errors + } + } + } + } + + // Reset metadata after cleanup + this.metadata = undefined; + + return removedCount; + } catch (error) { + throw new Error(`Failed to cleanup cache: ${error}`); + } + } + + /** + * Check if cache is enabled and accessible. + * + * @returns True if cache can be used + */ + async isEnabled(): Promise { + try { + await mkdir(this.config.cacheDir, { recursive: true }); + return true; + } catch { + return false; + } + } + + /** + * Get cache file path for a given source file. + * + * @private + */ + private getCacheFilePath(filePath: string): string { + const hash = createHash('sha256').update(filePath).digest('hex'); + return join(this.config.cacheDir, `${hash}.json`); + } + + /** + * Read and parse cache file. + * + * @private + */ + private async readCacheFile(cacheFile: string): Promise { + try { + const content = await readFile(cacheFile, 'utf-8'); + const parsed = JSON.parse(content) as CachedValidationResult; + + // Validate structure + if (!parsed.filePath || !parsed.contentHash || !parsed.result) { + return undefined; + } + + return parsed; + } catch { + return undefined; + } + } + + /** + * Write cache file. + * + * @private + */ + private async writeCacheFile(cacheFile: string, cached: CachedValidationResult): Promise { + const content = JSON.stringify(cached, null, this.config.compression ? 0 : 2); + await writeFile(cacheFile, content, 'utf-8'); + } + + /** + * Check if cached result is still valid. + * + * @private + */ + private isCacheValid( + cached: CachedValidationResult, + contentHash: string, + configHash: string, + gitCommit?: string + ): boolean { + // Check content hash + if (cached.contentHash !== contentHash) { + return false; + } + + // Check configuration hash + if (cached.configHash !== configHash) { + return false; + } + + // Check version compatibility + if (cached.version !== this.getVersion()) { + return false; + } + + // Check external links TTL + const now = Date.now(); + const age = now - cached.timestamp; + if (age > cached.externalLinksTtl) { + // Only invalid if there are external links that need re-checking + const hasExternalLinks = this.hasExternalLinks(cached.result); + if (hasExternalLinks) { + return false; + } + } + + return true; + } + + /** + * Check if cache entry should be removed during cleanup. + * + * @private + */ + private shouldRemoveFromCache(cached: CachedValidationResult): boolean { + const now = Date.now(); + const age = now - cached.timestamp; + + // Remove if older than 7 days + const maxAge = 7 * 24 * 60 * 60 * 1000; + if (age > maxAge) { + return true; + } + + // Remove if version mismatch + if (cached.version !== this.getVersion()) { + return true; + } + + // Remove if source file no longer exists + if (!existsSync(cached.filePath)) { + return true; + } + + return false; + } + + /** + * Check if validation result contains external links. + * + * @private + */ + private hasExternalLinks(result: ValidationResult): boolean { + // This would need to be implemented based on the actual ValidationResult structure + // For now, assume it might have external links + return true; + } + + /** + * Count links in validation result. + * + * @private + */ + private countLinksInResult(result: ValidationResult): number { + // This would need to be implemented based on the actual ValidationResult structure + // For now, return a placeholder + return 0; + } + + /** + * Get current markmv version. + * + * @private + */ + private getVersion(): string { + // This would typically read from package.json + return '1.29.0'; + } +} + +/** + * Calculate hash of file content. + * + * @param filePath - Path to the file + * @returns SHA-256 hash of file content + * + * @category Utils + */ +export async function calculateFileHash(filePath: string): Promise { + try { + const content = await readFile(filePath, 'utf-8'); + return createHash('sha256').update(content).digest('hex'); + } catch (error) { + throw new Error(`Failed to calculate hash for ${filePath}: ${error}`); + } +} + +/** + * Calculate hash of configuration object. + * + * @param config - Configuration object + * @returns SHA-256 hash of configuration + * + * @category Utils + */ +export function calculateConfigHash(config: Record): string { + const configString = JSON.stringify(config, Object.keys(config).sort()); + return createHash('sha256').update(configString).digest('hex'); +} \ No newline at end of file