Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
name: Bench

on:
push:
branches: [main]
pull_request:
branches: [main]

env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: short
OPENSSL_STATIC: true
PKG_CONFIG_ALLOW_CROSS: true

jobs:
# Ensure various stress tests pass on all platforms
stress:
name: Stress
runs-on: ubuntu-latest
strategy:
matrix:
include:
# Linux builds using cross
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
use-cross: true
artifact_name: yek
asset_name: yek-x86_64-unknown-linux-gnu.tar.gz
- os: ubuntu-latest
target: aarch64-unknown-linux-gnu
use-cross: true
artifact_name: yek
asset_name: yek-aarch64-unknown-linux-gnu.tar.gz
- os: ubuntu-latest
target: x86_64-unknown-linux-musl
use-cross: true
artifact_name: yek
asset_name: yek-x86_64-unknown-linux-musl.tar.gz
- os: ubuntu-latest
target: aarch64-unknown-linux-musl
use-cross: true
artifact_name: yek
asset_name: yek-aarch64-unknown-linux-musl.tar.gz

# Native macOS builds
- os: macos-latest
target: x86_64-apple-darwin
artifact_name: yek
asset_name: yek-x86_64-apple-darwin.tar.gz
- os: macos-latest
target: aarch64-apple-darwin
artifact_name: yek
asset_name: yek-aarch64-apple-darwin.tar.gz

# Native Windows builds
- os: windows-latest
target: x86_64-pc-windows-msvc
artifact_name: yek.exe
asset_name: yek-x86_64-pc-windows-msvc.zip
- os: windows-latest
target: aarch64-pc-windows-msvc
artifact_name: yek.exe
asset_name: yek-aarch64-pc-windows-msvc.zip

steps:
- uses: actions/checkout@v4

- name: Checkout VSCode repository
uses: actions/checkout@v4
with:
repository: microsoft/vscode
path: vscode
fetch-depth: 1

- name: Install cross (Linux)
if: matrix.use-cross
run: cargo install cross

- name: Setup Rust (Native builds)
if: ${{ !matrix.use-cross }}
uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt,clippy

- name: Install Rust target
if: ${{ !matrix.use-cross }}
run: rustup target add ${{ matrix.target }}

- name: Build with cross (Linux)
if: matrix.use-cross
run: cross build --release --target ${{ matrix.target }}

- name: Native build (macOS/Windows)
if: ${{ !matrix.use-cross }}
run: cargo build --release --target ${{ matrix.target }}

- name: Install yek
run: cargo install --path . --all-features

- name: Run yek
timeout-minutes: 1
run: yek

benchmark:
name: Benchmark / ${{ matrix.benchmark_group.name }}
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
benchmark_group:
- group: "SingleFile_ByteMode"
name: "Single File Byte Mode"
- group: "SingleFile_ByteMode_Large"
name: "Single File Byte Mode Large"
- group: "SingleFile_TokenMode_Large"
name: "Single File Token Mode Large"
- group: "MultipleFiles_Small"
name: "Multiple Files Small"
- group: "MultipleFiles_Medium"
name: "Multiple Files Medium"
- group: "MultipleFiles_Large"
name: "Multiple Files Large"
- group: "MultipleFiles_TokenMode"
name: "Multiple Files Token Mode"
- group: "CustomConfig"
name: "Custom Config"

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Install Rust
uses: dtolnay/rust-toolchain@stable

- uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true

- name: Build benchmarks on target branch
run: |
git fetch origin ${{ github.base_ref }}
git checkout ${{ github.base_ref }}
cargo bench --bench serialization --no-run
- name: Run benchmark on target branch
run: cargo bench --bench serialization -- --save-baseline ${{ github.base_ref }} '${{ matrix.benchmark_group.group }}/'

- name: Build benchmarks on PR branch
run: |
git checkout ${{ github.head_ref }}
cargo bench --bench serialization --no-run
- name: Compare benchmarks
run: |
cargo bench --bench serialization -- --baseline ${{ github.base_ref }} --noise-threshold 2 '${{ matrix.benchmark_group.group }}/' > benchmark_results.md
echo "## Benchmark Results for ${{ matrix.benchmark_group.name }}" >> $GITHUB_STEP_SUMMARY
cat benchmark_results.md >> $GITHUB_STEP_SUMMARY
- name: Upload benchmark results
uses: actions/upload-artifact@v4
with:
name: criterion-${{ matrix.benchmark_group.group }}-results
path: benchmark_results.md
if-no-files-found: error
64 changes: 0 additions & 64 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,70 +172,6 @@ jobs:
path: release-artifacts/${{ matrix.asset_name }}
if-no-files-found: error

benchmark:
name: Benchmark / ${{ matrix.benchmark_group.name }}
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
strategy:
matrix:
benchmark_group:
- group: "SingleFile_ByteMode"
name: "Single File Byte Mode"
- group: "SingleFile_ByteMode_Large"
name: "Single File Byte Mode Large"
- group: "SingleFile_TokenMode_Large"
name: "Single File Token Mode Large"
- group: "MultipleFiles_Small"
name: "Multiple Files Small"
- group: "MultipleFiles_Medium"
name: "Multiple Files Medium"
- group: "MultipleFiles_Large"
name: "Multiple Files Large"
- group: "MultipleFiles_TokenMode"
name: "Multiple Files Token Mode"
- group: "CustomConfig"
name: "Custom Config"
fail-fast: false

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Install Rust
uses: dtolnay/rust-toolchain@stable

- uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true

- name: Build benchmarks on target branch
run: |
git fetch origin ${{ github.base_ref }}
git checkout ${{ github.base_ref }}
cargo bench --bench serialization --no-run

- name: Run benchmark on target branch
run: cargo bench --bench serialization -- --save-baseline ${{ github.base_ref }} '${{ matrix.benchmark_group.group }}/'

- name: Build benchmarks on PR branch
run: |
git checkout ${{ github.head_ref }}
cargo bench --bench serialization --no-run

- name: Compare benchmarks
run: |
cargo bench --bench serialization -- --baseline ${{ github.base_ref }} --noise-threshold 2 '${{ matrix.benchmark_group.group }}/' > benchmark_results.md
echo "## Benchmark Results for ${{ matrix.benchmark_group.name }}" >> $GITHUB_STEP_SUMMARY
cat benchmark_results.md >> $GITHUB_STEP_SUMMARY

- name: Upload benchmark results
uses: actions/upload-artifact@v4
with:
name: criterion-${{ matrix.benchmark_group.group }}-results
path: benchmark_results.md
if-no-files-found: error

release:
name: Release
needs: [test, lint, build]
Expand Down
5 changes: 5 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ pub struct YekConfig {

/// Final resolved output file path (only used if not streaming)
pub output_file_full_path: Option<String>,

/// Maximum depth to search for Git commit times
#[config_arg(accept_from = "config_only", default_value = "100")]
pub max_git_depth: i32,
}

/// Provide defaults so tests or other callers can create a baseline YekConfig easily.
Expand All @@ -104,6 +108,7 @@ impl Default for YekConfig {
stream: false,
token_mode: false,
output_file_full_path: None,
max_git_depth: 100,
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub fn serialize_repo(config: &YekConfig) -> Result<(String, Vec<ProcessedFile>)
.par_iter()
.filter_map(|dir| {
let repo_path = Path::new(dir);
priority::get_recent_commit_times_git2(repo_path)
priority::get_recent_commit_times_git2(repo_path, config.max_git_depth as usize)
})
.flatten()
.collect::<HashMap<String, u64>>();
Expand Down
17 changes: 11 additions & 6 deletions src/priority.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use git2::Repository;
use git2;
use regex;
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, path::Path};
Expand Down Expand Up @@ -61,7 +61,11 @@ pub fn compute_recentness_boost(
/// Get the commit time of the most recent change to each file using git2.
/// Returns a map from file path (relative to the repo root) → last commit Unix time.
/// If Git or .git folder is missing, returns None instead of erroring.
pub fn get_recent_commit_times_git2(repo_path: &Path) -> Option<HashMap<String, u64>> {
/// Only considers up to `max_commits` most recent commits.
pub fn get_recent_commit_times_git2(
repo_path: &Path,
max_commits: usize,
) -> Option<HashMap<String, u64>> {
// Walk up until you find a .git folder but not higher than the base of the given repo_path
let mut current_path = repo_path.to_path_buf();
while current_path.components().count() > 1 {
Expand All @@ -71,7 +75,7 @@ pub fn get_recent_commit_times_git2(repo_path: &Path) -> Option<HashMap<String,
current_path = current_path.parent()?.to_path_buf();
}

let repo = match Repository::open(&current_path) {
let repo = match git2::Repository::open(&current_path) {
Ok(repo) => repo,
Err(_) => {
debug!("Not a Git repository or unable to open: {:?}", current_path);
Expand All @@ -97,14 +101,15 @@ pub fn get_recent_commit_times_git2(repo_path: &Path) -> Option<HashMap<String,
revwalk.set_sorting(git2::Sort::TIME).ok()?;

let mut commit_times = HashMap::new();
for oid in revwalk {
let oid = match oid {
for oid_result in revwalk.take(max_commits) {
let oid = match oid_result {
Ok(oid) => oid,
Err(e) => {
debug!("Error during revwalk iteration: {:?}", e);
continue;
}
};

let commit = match repo.find_commit(oid) {
Ok(commit) => commit,
Err(e) => {
Expand All @@ -119,8 +124,8 @@ pub fn get_recent_commit_times_git2(repo_path: &Path) -> Option<HashMap<String,
continue;
}
};
let time = commit.time().seconds() as u64;

let time = commit.time().seconds() as u64;
tree.walk(git2::TreeWalkMode::PreOrder, |root, entry| {
if let Some(name) = entry.name() {
if entry.kind() == Some(git2::ObjectType::Blob) {
Expand Down
8 changes: 4 additions & 4 deletions tests/priority_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ mod priority_tests {
#[test]
fn test_get_recent_commit_times_no_git() {
let dir = tempdir().unwrap();
let times = get_recent_commit_times_git2(dir.path());
let times = get_recent_commit_times_git2(dir.path(), 100);
assert!(times.is_none());
}

Expand Down Expand Up @@ -216,7 +216,7 @@ mod priority_tests {
.output()
.unwrap();

let times = get_recent_commit_times_git2(repo_path).unwrap();
let times = get_recent_commit_times_git2(repo_path, 100).unwrap();
assert_eq!(times.len(), 2);
assert!(times.contains_key("file1.txt"));
assert!(times.contains_key("file2.txt"));
Expand All @@ -234,7 +234,7 @@ mod priority_tests {
.output()
.unwrap();

let times = get_recent_commit_times_git2(repo_path);
let times = get_recent_commit_times_git2(repo_path, 100);
assert!(times.is_none(), "Expected no times for empty repo");
}

Expand All @@ -254,7 +254,7 @@ mod priority_tests {
fs::remove_dir_all(repo_path.join(".git")).unwrap();
fs::create_dir(repo_path.join(".git")).unwrap(); // Create an empty directory

let times = get_recent_commit_times_git2(repo_path);
let times = get_recent_commit_times_git2(repo_path, 100);
assert!(times.is_none(), "Expected no times on Git failure");
}
}
Loading