diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 367b9c0..379d7e2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,6 +3,9 @@ on: push: tags: - "v*" + branches: + - main + workflow_dispatch: permissions: contents: write @@ -10,6 +13,7 @@ permissions: jobs: build: + if: github.ref_name == 'main' && startsWith(github.ref, 'refs/tags/v') strategy: matrix: include: @@ -84,6 +88,7 @@ jobs: path: sha-${{ matrix.target }}.txt release: + if: github.ref_name == 'main' && startsWith(github.ref, 'refs/tags/v') needs: build runs-on: ubuntu-latest steps: @@ -105,8 +110,10 @@ jobs: generate_release_notes: true env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + publish: + if: github.ref_name == 'main' && startsWith(github.ref, 'refs/tags/v') name: Publish to crates.io needs: [release] runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index 112bf26..4f64d04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,31 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.13.8] - 2025-01-20 +[0.13.8]: https://github.com/bodo-run/yek/compare/v0.13.7...v0.13.8 +### Bug Fixes + +- Use WalkBuilder in streaming mode to respect gitignore +- Include hidden files in WalkBuilder configuration + +### Miscellaneous Tasks + +- Fix the release script + +### Refactor + +- Improve gitignore handling and fix clippy warnings +- Improve binary file handling and remove duplicate gitignore checks + +### Testing + +- Add comprehensive gitignore end-to-end tests +- Fix binary file test assertion + +### Ci + +- Simpler release script + ## [0.13.7] - 2025-01-19 [0.13.7]: https://github.com/bodo-run/yek/compare/v0.13.5...v0.13.7 ### Bug Fixes diff --git a/Cargo.lock b/Cargo.lock index ca45e0a..5f9d7a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3358,7 +3358,7 @@ dependencies = [ [[package]] name = "yek" -version = "0.13.7" +version = "0.13.8" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index cdebb5b..4a9ca74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yek" -version = "0.13.7" +version = "0.13.8" edition = "2021" [dependencies] diff --git a/scripts/make-release.sh b/scripts/make-release.sh index 4943f94..633b8b5 100755 --- a/scripts/make-release.sh +++ b/scripts/make-release.sh @@ -45,14 +45,14 @@ echo "Bumping version to: $NEW_VERSION" # 4. Generate/Update CHANGELOG using cargo-cliff # Make sure cargo-cliff is installed (cargo install cargo-cliff) -cargo cliff --tag "v${NEW_VERSION}" --output CHANGELOG.md +git cliff --tag "v${NEW_VERSION}" --output CHANGELOG.md # 5. Update Cargo.toml sed -i.bak "s/^version *= *\"${CURRENT_VERSION}\"/version = \"${NEW_VERSION}\"/" Cargo.toml rm -f Cargo.toml.bak # 6. Update Cargo.lock (so that if your package references itself, it's updated) -cargo update -p "$(cargo pkgid | sed 's|.*#||')" +cargo update -p yek # 7. Commit changes git add Cargo.toml Cargo.lock CHANGELOG.md diff --git a/src/lib.rs b/src/lib.rs index a167c73..034b07e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ use anyhow::Result; use ignore::gitignore::GitignoreBuilder; +use ignore::WalkBuilder; use regex::Regex; use serde::Deserialize; use std::collections::HashMap; @@ -8,7 +9,6 @@ use std::io::{Read, Write}; use std::path::{Path, PathBuf}; use std::process::{Command as SysCommand, Stdio}; use tracing::debug; -use walkdir::WalkDir; mod parallel; use parallel::process_files_parallel; use path_slash::PathExt; @@ -562,7 +562,7 @@ pub fn serialize_repo( if gitignore_path.exists() { builder.add(&gitignore_path); } - let gitignore = builder + let _gitignore = builder .build() .unwrap_or_else(|_| GitignoreBuilder::new(base_path).build().unwrap()); @@ -595,67 +595,73 @@ pub fn serialize_repo( // 1) Collect all FileEntry objects let mut files: Vec = Vec::new(); - for entry in WalkDir::new(base_path) - .follow_links(true) - .into_iter() - .filter_map(|e| e.ok()) - { - let path = entry.path(); - if !path.is_file() { - continue; - } - - // Get path relative to base - let rel_str = normalize_path(base_path, path); - - // Skip via .gitignore - if gitignore - .matched(path.strip_prefix(base_path).unwrap_or(path), false) - .is_ignore() - { - debug!("Skipping {} - matched by gitignore", rel_str); - continue; - } + let mut builder = WalkBuilder::new(base_path); + builder + .follow_links(false) + .standard_filters(true) + .hidden(false) + .git_ignore(true) + .git_global(false) + .git_exclude(false) + .require_git(false); + + for entry in builder.build().flatten() { + if entry.file_type().is_some_and(|ft| ft.is_file()) { + let path = entry.path(); + let rel_str = normalize_path(base_path, path); + + // Skip via our ignore regexes + if final_config + .ignore_patterns + .iter() + .any(|pat| pat.is_match(&rel_str)) + { + debug!("Skipping {} - matched ignore pattern", rel_str); + continue; + } - // Skip via our ignore regexes - if final_config - .ignore_patterns - .iter() - .any(|pat| pat.is_match(&rel_str)) - { - debug!("Skipping {} - matched ignore pattern", rel_str); - continue; - } + // Skip .gitignore files + if path.file_name().is_some_and(|f| f == ".gitignore") { + debug!("Skipping .gitignore file"); + continue; + } - // Check if text or binary - let user_bin_exts = config - .as_ref() - .map(|c| c.binary_extensions.as_slice()) - .unwrap_or(&[]); - if !is_text_file(path, user_bin_exts) { - debug!("Skipping binary file: {}", rel_str); - continue; - } + // Skip via gitignore + if entry.path().file_name().is_some_and(|f| f == ".gitignore") { + debug!("Skipping .gitignore file"); + continue; + } - // Calculate priority with recentness boost - let mut priority = get_file_priority( - &rel_str, - &final_config.ignore_patterns, - &final_config.priority_list, - ); + // Skip binary files + if !is_text_file( + path, + config + .as_ref() + .map(|c| &c.binary_extensions) + .unwrap_or(&vec![]), + ) { + debug!("Skipping binary file: {}", rel_str); + continue; + } - // Apply recentness boost if available - if let Some(boost_map) = recentness_boost.as_ref() { - if let Some(boost) = boost_map.get(&rel_str) { - priority += *boost; + // Apply recentness boost to priority + let mut priority = get_file_priority( + &rel_str, + &final_config.ignore_patterns, + &final_config.priority_list, + ); + if let Some(boost_map) = &recentness_boost { + if let Some(boost) = boost_map.get(&rel_str) { + priority += boost; + } } - } - files.push(FileEntry { - path: path.to_path_buf(), - priority, - file_index: files.len(), - }); + files.push(FileEntry { + path: path.to_path_buf(), + priority, + file_index: files.len(), + }); + } } // 2) Sort ascending by priority, so the last entries are the most important diff --git a/src/parallel.rs b/src/parallel.rs index 3c0f8f4..e023236 100644 --- a/src/parallel.rs +++ b/src/parallel.rs @@ -3,7 +3,7 @@ use crate::{ PriorityPattern, Result, YekConfig, }; use crossbeam::channel::{bounded, Receiver, Sender}; -use ignore::{gitignore::GitignoreBuilder, WalkBuilder}; +use ignore::WalkBuilder; use num_cpus::get; use regex::Regex; use std::{ @@ -218,21 +218,14 @@ fn collect_files( priority_list: &[PriorityPattern], recentness_boost: Option<&HashMap>, ) -> Result> { - // Build gitignore matcher - let mut builder = GitignoreBuilder::new(base_dir); - let gitignore_path = base_dir.join(".gitignore"); - if gitignore_path.exists() { - builder.add(&gitignore_path); - } - let gitignore = builder - .build() - .unwrap_or_else(|_| GitignoreBuilder::new(base_dir).build().unwrap()); - let mut builder = WalkBuilder::new(base_dir); builder .follow_links(false) .standard_filters(true) - .add_custom_ignore_filename(".gitignore") + .hidden(false) + .git_ignore(true) + .git_global(false) + .git_exclude(false) .require_git(false); let mut results = Vec::new(); @@ -240,15 +233,8 @@ fn collect_files( for entry in builder.build().flatten() { if entry.file_type().is_some_and(|ft| ft.is_file()) { - let path = entry.path().to_path_buf(); - let rel_str = normalize_path(base_dir, &path); - let rel_path = path.strip_prefix(base_dir).unwrap_or(&path); - - // Skip via .gitignore - if gitignore.matched(rel_path, false).is_ignore() { - debug!("Skipping {} - matched by gitignore", rel_str); - continue; - } + let path = entry.path(); + let rel_str = normalize_path(base_dir, path); // Skip via our ignore regexes if ignore_patterns.iter().any(|p| p.is_match(&rel_str)) { @@ -256,18 +242,24 @@ fn collect_files( continue; } - // Check if text or binary + // Skip .gitignore files + if path.file_name().is_some_and(|f| f == ".gitignore") { + debug!("Skipping .gitignore file"); + continue; + } + + // Skip binary files let user_bin_exts = config .as_ref() .map(|c| c.binary_extensions.as_slice()) .unwrap_or(&[]); - if !is_text_file(&path, user_bin_exts) { + if !is_text_file(path, user_bin_exts) { debug!("Skipping binary file: {}", rel_str); continue; } results.push(FileEntry { - path, + path: path.to_path_buf(), priority: get_file_priority(&rel_str, ignore_patterns, priority_list), file_index, }); diff --git a/tests/test_gitignore_e2e.rs b/tests/test_gitignore_e2e.rs new file mode 100644 index 0000000..46233ba --- /dev/null +++ b/tests/test_gitignore_e2e.rs @@ -0,0 +1,239 @@ +mod integration_common; +use assert_cmd::Command; +use integration_common::{create_file, setup_temp_repo}; +use std::fs; + +/// Helper to run yek in streaming mode (pipe to stdout) +fn run_stream_mode(dir: &std::path::Path) -> String { + let output = Command::cargo_bin("yek") + .unwrap() + .current_dir(dir) + .env("TERM", "dumb") // Force non-interactive mode + .env("NO_COLOR", "1") // Disable color output + .env("CI", "1") // Force CI mode + .output() + .expect("Failed to execute command"); + + String::from_utf8_lossy(&output.stdout).into_owned() +} + +/// Helper to run yek in file mode (write to output directory) +fn run_file_mode(dir: &std::path::Path) -> String { + let output_dir = dir.join("output"); + let _ = Command::cargo_bin("yek") + .unwrap() + .current_dir(dir) + .arg("--output-dir") + .arg(&output_dir) + .assert() + .success(); + + // Read all chunk files + let mut content = String::new(); + let read_dir = fs::read_dir(&output_dir).expect("Failed to read output directory"); + for entry in read_dir { + let entry = entry.expect("Failed to read directory entry"); + let path = entry.path(); + content.push_str( + &fs::read_to_string(&path) + .unwrap_or_else(|_| panic!("Failed to read file: {}", path.display())), + ); + } + content +} + +#[test] +fn basic_gitignore_exclusion() { + let repo = setup_temp_repo(); + + // Setup test files + create_file(repo.path(), ".gitignore", "ignore_me.txt\n"); + create_file(repo.path(), "ignore_me.txt", "should be ignored"); + create_file(repo.path(), "keep_me.txt", "should be kept"); + + // Test both modes + for content in [run_stream_mode(repo.path()), run_file_mode(repo.path())] { + // Should exclude ignored file + assert!( + !content.contains("ignore_me.txt"), + "Found ignored file in output: {content}" + ); + + // Should include kept file + assert!( + content.contains("keep_me.txt"), + "Missing kept file in output: {content}" + ); + } +} + +#[test] +fn nested_gitignore_in_subdirectory() { + let repo = setup_temp_repo(); + + // Root gitignore + create_file(repo.path(), ".gitignore", "*.temp\n"); + + // Subdirectory with its own gitignore + let sub_dir = repo.path().join("src"); + fs::create_dir_all(&sub_dir).unwrap(); + create_file(&sub_dir, ".gitignore", "secret.conf\n"); + create_file(&sub_dir, "secret.conf", "password=1234"); + create_file(&sub_dir, "app.rs", "fn main() {}"); + + // Another subdir without gitignore + let other_dir = repo.path().join("config"); + fs::create_dir_all(&other_dir).unwrap(); + create_file(&other_dir, "settings.temp", "key=value"); + + for content in [run_stream_mode(repo.path()), run_file_mode(repo.path())] { + // Should exclude nested gitignore entries + assert!( + !content.contains("secret.conf"), + "Found nested gitignore file: {content}" + ); + + // Should exclude root gitignore pattern + assert!( + !content.contains("settings.temp"), + "Found root gitignore pattern violation: {content}" + ); + + // Should keep valid files + assert!( + content.contains("app.rs"), + "Missing valid source file: {content}" + ); + } +} + +#[test] +fn complex_ignore_patterns() { + let repo = setup_temp_repo(); + + create_file( + repo.path(), + ".gitignore", + " + # Comment + *.log + /build/ + temp/* + !temp/keep.me + ", + ); + + // Create test files + create_file(repo.path(), "error.log", "logs"); + create_file(repo.path(), "build/output.exe", "binary"); + create_file(repo.path(), "temp/junk.tmp", "tmp"); + create_file(repo.path(), "temp/keep.me", "important"); + create_file(repo.path(), "src/main.rs", "fn main() {}"); + + for content in [run_stream_mode(repo.path()), run_file_mode(repo.path())] { + // Excluded patterns + assert!( + !content.contains("error.log"), + "Found *.log file: {content}" + ); + assert!( + !content.contains("build/output.exe"), + "Found build dir file: {content}" + ); + assert!( + !content.contains("temp/junk.tmp"), + "Found temp/* file: {content}" + ); + + // Included exceptions + assert!( + content.contains("temp/keep.me"), + "Missing !temp/keep.me: {content}" + ); + assert!( + content.contains("src/main.rs"), + "Missing source file: {content}" + ); + } +} + +#[test] +fn combined_ignore_rules() { + let repo = setup_temp_repo(); + + // Main config + create_file( + repo.path(), + "yek.toml", + " + [ignore_patterns] + patterns = [\"^exclude/\"] + ", + ); + + // Gitignore + create_file( + repo.path(), + ".gitignore", + " + *.tmp + /node_modules/ + ", + ); + + // Test files + create_file(repo.path(), "exclude/secret.txt", "confidential"); + create_file(repo.path(), "test.tmp", "temporary"); + create_file(repo.path(), "node_modules/lib.js", "junk"); + create_file(repo.path(), "src/index.rs", "fn main() {}"); + + for content in [run_stream_mode(repo.path()), run_file_mode(repo.path())] { + // Should exclude both gitignore and config patterns + assert!( + !content.contains("exclude/secret.txt"), + "Found excluded dir: {content}" + ); + assert!(!content.contains("test.tmp"), "Found *.tmp file: {content}"); + assert!( + !content.contains("node_modules/lib.js"), + "Found node_modules: {content}" + ); + + // Should keep valid files + assert!( + content.contains("src/index.rs"), + "Missing source file: {content}" + ); + } +} + +#[test] +fn binary_file_exclusion() { + let repo = setup_temp_repo(); + + // Create files without .gitignore using proper binary data + create_file( + repo.path(), + "binary.jpg", + &String::from_utf8_lossy(&[0xFF, 0xD8, 0xFF, 0xE0]), + ); // JPEG magic bytes + create_file(repo.path(), "text.txt", "normal text"); + create_file(repo.path(), "unknown.xyz", "unknown format"); + + for content in [run_stream_mode(repo.path()), run_file_mode(repo.path())] { + // Should exclude known binary format + assert!( + !content.contains("binary.jpg"), + "Found binary.jpg: {content}" + ); + + // Should include text files + assert!(content.contains("text.txt"), "Missing text.txt: {content}"); + + // Should include unknown.xyz since it's text content + assert!( + content.contains("unknown.xyz"), + "Missing unknown.xyz which has text content: {content}" + ); + } +}