From 23d2bed707e0c0cb164f3f279849536688ffa4c1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 6 Feb 2025 09:58:34 +0100 Subject: [PATCH 1/2] feat: make internal `repo` fields public for ease of use. That way, functions or methods taking such a type as argument have access to the underlying repository so it doesn't need to be passed as separate argument. --- gix/src/filter.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gix/src/filter.rs b/gix/src/filter.rs index 92eafcc2b5e..7c4f93e6f29 100644 --- a/gix/src/filter.rs +++ b/gix/src/filter.rs @@ -70,7 +70,8 @@ pub mod pipeline { pub struct Pipeline<'repo> { inner: gix_filter::Pipeline, cache: gix_worktree::Stack, - repo: &'repo Repository, + /// The repository this pipeline is associated with. + pub repo: &'repo Repository, } /// Lifecycle From 70ebd5f4128e7dcf83175ca05a70741434b71379 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 6 Feb 2025 10:03:38 +0100 Subject: [PATCH 2/2] feat: add `filter::Pipeline::worktree_file_to_object()`. That way it's easier to correctly add whole files into the object database. --- gix/src/filter.rs | 86 ++++++++++++++++++- .../fixtures/generated-archives/.gitignore | 3 +- .../fixtures/repo_with_untracked_files.sh | 9 ++ gix/tests/gix/repository/filter.rs | 49 ++++++++++- 4 files changed, 141 insertions(+), 6 deletions(-) create mode 100755 gix/tests/fixtures/repo_with_untracked_files.sh diff --git a/gix/src/filter.rs b/gix/src/filter.rs index 7c4f93e6f29..5e5e238f5f0 100644 --- a/gix/src/filter.rs +++ b/gix/src/filter.rs @@ -19,7 +19,7 @@ pub mod pipeline { pub mod options { use crate::{bstr::BString, config}; - /// The error returned by [Pipeline::options()][crate::filter::Pipeline::options()]. + /// The error returned by [Pipeline::options()](crate::filter::Pipeline::options()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -39,7 +39,7 @@ pub mod pipeline { /// pub mod convert_to_git { - /// The error returned by [Pipeline::convert_to_git()][crate::filter::Pipeline::convert_to_git()]. + /// The error returned by [Pipeline::convert_to_git()](crate::filter::Pipeline::convert_to_git()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -52,7 +52,7 @@ pub mod pipeline { /// pub mod convert_to_worktree { - /// The error returned by [Pipeline::convert_to_worktree()][crate::filter::Pipeline::convert_to_worktree()]. + /// The error returned by [Pipeline::convert_to_worktree()](crate::filter::Pipeline::convert_to_worktree()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -62,6 +62,25 @@ pub mod pipeline { Convert(#[from] gix_filter::pipeline::convert::to_worktree::Error), } } + + /// + pub mod worktree_file_to_object { + use std::path::PathBuf; + + /// The error returned by [Pipeline::worktree_file_to_object()](crate::filter::Pipeline::worktree_file_to_object()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Cannot add worktree files in bare repositories")] + MissingWorktree, + #[error("Failed to perform IO for object creation for '{}'", path.display())] + IO { source: std::io::Error, path: PathBuf }, + #[error(transparent)] + WriteBlob(#[from] crate::object::write::Error), + #[error(transparent)] + ConvertToGit(#[from] crate::filter::pipeline::convert_to_git::Error), + } + } } /// A git pipeline for transforming data *to-git* and *to-worktree*, based @@ -133,7 +152,7 @@ impl Pipeline<'_> { /// Convert a `src` stream (to be found at `rela_path`, a repo-relative path) to a representation suitable for storage in `git` /// by using all attributes at `rela_path` and configuration of the repository to know exactly which filters apply. /// `index` is used in particularly rare cases where the CRLF filter in auto-mode tries to determine whether to apply itself, - /// and it should match the state used when [instantiating this instance][Self::new()]. + /// and it should match the state used when [instantiating this instance](Self::new()). /// Note that the return-type implements [`std::io::Read`]. pub fn convert_to_git( &mut self, @@ -187,6 +206,65 @@ impl Pipeline<'_> { )?) } + /// Add the worktree file at `rela_path` to the object database and return its `(id, entry, symlink_metadata)` for use in a tree or in the index, for instance. + /// + /// `index` is used in particularly rare cases where the CRLF filter in auto-mode tries to determine whether to apply itself, + /// and it should match the state used when [instantiating this instance](Self::new()). + /// + /// Return `Ok(None)` the file didn't exist in the worktree, or if it was of an untrackable type. + pub fn worktree_file_to_object( + &mut self, + rela_path: &BStr, + index: &gix_index::State, + ) -> Result< + Option<(gix_hash::ObjectId, gix_object::tree::EntryKind, std::fs::Metadata)>, + pipeline::worktree_file_to_object::Error, + > { + use pipeline::worktree_file_to_object::Error; + + let rela_path_as_path = gix_path::from_bstr(rela_path); + let repo = self.repo; + let worktree_dir = repo.work_dir().ok_or(Error::MissingWorktree)?; + let path = worktree_dir.join(&rela_path_as_path); + let md = match std::fs::symlink_metadata(&path) { + Ok(md) => md, + Err(err) => { + if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) { + return Ok(None); + } else { + return Err(Error::IO { source: err, path }); + } + } + }; + let (id, kind) = if md.is_symlink() { + let target = std::fs::read_link(&path).map_err(|source| Error::IO { source, path })?; + let id = repo.write_blob(gix_path::into_bstr(target).as_ref())?; + (id, gix_object::tree::EntryKind::Link) + } else if md.is_file() { + use gix_filter::pipeline::convert::ToGitOutcome; + + let file = std::fs::File::open(&path).map_err(|source| Error::IO { source, path })?; + let file_for_git = self.convert_to_git(file, rela_path_as_path.as_ref(), index)?; + let id = match file_for_git { + ToGitOutcome::Unchanged(mut file) => repo.write_blob_stream(&mut file)?, + ToGitOutcome::Buffer(buf) => repo.write_blob(buf)?, + ToGitOutcome::Process(mut read) => repo.write_blob_stream(&mut read)?, + }; + + let kind = if gix_fs::is_executable(&md) { + gix_object::tree::EntryKind::BlobExecutable + } else { + gix_object::tree::EntryKind::Blob + }; + (id, kind) + } else { + // This is probably a type-change to something we can't track. + return Ok(None); + }; + + Ok(Some((id.detach(), kind, md))) + } + /// Retrieve the static context that is made available to the process filters. /// /// The context set here is relevant for the [`convert_to_git()`][Self::convert_to_git()] and diff --git a/gix/tests/fixtures/generated-archives/.gitignore b/gix/tests/fixtures/generated-archives/.gitignore index 5b9b8472077..9279e744abb 100644 --- a/gix/tests/fixtures/generated-archives/.gitignore +++ b/gix/tests/fixtures/generated-archives/.gitignore @@ -7,4 +7,5 @@ /make_core_worktree_repo.tar /make_signatures_repo.tar /make_diff_repos.tar -/make_submodule_with_worktree.tar \ No newline at end of file +/make_submodule_with_worktree.tar +/repo_with_untracked_files.tar \ No newline at end of file diff --git a/gix/tests/fixtures/repo_with_untracked_files.sh b/gix/tests/fixtures/repo_with_untracked_files.sh new file mode 100755 index 00000000000..e3bca58c94b --- /dev/null +++ b/gix/tests/fixtures/repo_with_untracked_files.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +git init -q +echo content >file +ln -s file link + +echo binary >exe && chmod +x exe +mkfifo fifo diff --git a/gix/tests/gix/repository/filter.rs b/gix/tests/gix/repository/filter.rs index ea555b9b5a3..eef0ec8e42b 100644 --- a/gix/tests/gix/repository/filter.rs +++ b/gix/tests/gix/repository/filter.rs @@ -10,7 +10,7 @@ fn pipeline_in_nonbare_repo_without_index() -> crate::Result { use gix::bstr::ByteSlice; use gix_filter::driver::apply::Delay; -use crate::util::{named_repo, named_subrepo_opts}; +use crate::util::{hex_to_id, named_repo, named_subrepo_opts}; #[test] fn pipeline_in_repo_without_special_options() -> crate::Result { @@ -31,6 +31,53 @@ fn pipeline_in_repo_without_special_options() -> crate::Result { Ok(()) } +#[test] +#[cfg(unix)] +fn pipeline_worktree_file_to_object() -> crate::Result { + let repo = named_repo("repo_with_untracked_files.sh")?; + let (mut pipe, index) = repo.filter_pipeline(None)?; + fn take_two(t: Option<(A, B, C)>) -> Option<(A, B)> { + t.map(|t| (t.0, t.1)) + } + + assert_eq!( + take_two(pipe.worktree_file_to_object("file".into(), &index)?), + Some(( + hex_to_id("d95f3ad14dee633a758d2e331151e950dd13e4ed"), + gix::object::tree::EntryKind::Blob + )) + ); + assert_eq!( + take_two(pipe.worktree_file_to_object("link".into(), &index)?), + Some(( + hex_to_id("1a010b1c0f081b2e8901d55307a15c29ff30af0e"), + gix::object::tree::EntryKind::Link + )) + ); + assert_eq!( + take_two(pipe.worktree_file_to_object("exe".into(), &index)?), + Some(( + hex_to_id("a9128c283485202893f5af379dd9beccb6e79486"), + gix::object::tree::EntryKind::BlobExecutable + )) + ); + assert_eq!( + take_two(pipe.worktree_file_to_object("missing".into(), &index)?), + None, + "Missing files are specifically typed and no error" + ); + assert!( + repo.work_dir().expect("non-bare").join("fifo").exists(), + "there is a fifo" + ); + assert_eq!( + take_two(pipe.worktree_file_to_object("fifo".into(), &index)?), + None, + "untrackable entries are just ignored as if they didn't exist" + ); + Ok(()) +} + #[test] fn pipeline_with_autocrlf() -> crate::Result { let repo = named_repo("make_config_repo.sh")?;