Skip to content

Commit f8c5d9c

Browse files
committedAug 22, 2024·
fix similarity detection
track all removed tokens and not the most recent batch only
1 parent 25a3f1b commit f8c5d9c

File tree

2 files changed

+36
-66
lines changed

2 files changed

+36
-66
lines changed
 

‎gix-diff/src/rewrites/tracker.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -561,10 +561,10 @@ mod diff {
561561
type Out = usize;
562562

563563
fn process_change(&mut self, before: Range<u32>, _after: Range<u32>) {
564-
self.removed_bytes = self.input.before[before.start as usize..before.end as usize]
564+
self.removed_bytes += self.input.before[before.start as usize..before.end as usize]
565565
.iter()
566566
.map(|token| self.input.interner[*token].len())
567-
.sum();
567+
.sum::<usize>();
568568
}
569569

570570
fn finish(self) -> Self::Out {

‎gix/tests/object/tree/diff.rs

+34-64
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ fn tree_named(repo: &gix::Repository, rev_spec: impl AsRef<str>) -> gix::Tree {
175175
}
176176

177177
mod track_rewrites {
178+
use std::collections::HashMap;
178179
use std::convert::Infallible;
179180

180181
use gix::{
@@ -398,34 +399,27 @@ mod track_rewrites {
398399
// It's like the fixture doesn't get setup correctly.
399400
return Ok(());
400401
}
402+
403+
let mut expected = HashMap::<&BStr, (&BStr, u32)>::new();
404+
expected.insert(
405+
"cli/src/commands/file/chmod.rs".into(),
406+
("cli/src/commands/chmod.rs".into(), 90),
407+
);
408+
expected.insert(
409+
"cli/src/commands/file/print.rs".into(),
410+
("cli/src/commands/cat.rs".into(), 90),
411+
);
412+
expected.insert(
413+
"cli/tests/test_file_chmod_command.rs".into(),
414+
("cli/tests/test_chmod_command.rs".into(), 88),
415+
);
416+
expected.insert(
417+
"cli/tests/test_file_print_command.rs".into(),
418+
("cli/tests/test_cat_command.rs".into(), 77),
419+
);
420+
401421
let from = tree_named(&repo, "@~1");
402422
let to = tree_named(&repo, "@");
403-
404-
let mut count = 0;
405-
let expected_location = [
406-
"cli",
407-
"cli/src",
408-
"cli/tests",
409-
"cli/src/commands",
410-
"cli/src/commands/file",
411-
"cli/src/commands/file/print.rs",
412-
"cli/tests/test_file_print_command.rs",
413-
"cli/src/commands/file/chmod.rs",
414-
"cli/src/commands/file/mod.rs",
415-
"cli/tests/test_file_chmod_command.rs",
416-
"CHANGELOG.md",
417-
"cli/src/commands/mod.rs",
418-
"cli/tests/cli-reference@.md.snap",
419-
"cli/tests/runner.rs",
420-
"cli/tests/test_acls.rs",
421-
"cli/tests/test_diffedit_command.rs",
422-
"cli/tests/test_fix_command.rs",
423-
"cli/tests/test_global_opts.rs",
424-
"cli/tests/test_move_command.rs",
425-
"cli/tests/test_new_command.rs",
426-
"cli/tests/test_squash_command.rs",
427-
"cli/tests/test_unsquash_command.rs",
428-
];
429423
let out = from
430424
.changes()?
431425
.track_path()
@@ -441,48 +435,24 @@ mod track_rewrites {
441435
.into(),
442436
)
443437
.for_each_to_obtain_tree(&to, |change| -> Result<_, Infallible> {
444-
// TODO: all rewrites here don't match what Git produces right now.
445-
// dbg!(change);
446-
assert_eq!(change.location, expected_location[count]);
447-
match count {
448-
0 | 1 | 2 | 3 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 => {
449-
assert!(
450-
matches!(change.event, Event::Modification { .. }),
451-
"ignore modifications"
452-
);
453-
}
454-
4 => {
455-
assert!(matches!(change.event, Event::Addition { entry_mode, .. } if entry_mode.is_tree()));
456-
assert_eq!(change.location, "cli/src/commands/file");
457-
}
458-
5 => {
459-
assert_eq!(change.location, "cli/src/commands/file/print.rs");
460-
assert!(matches!(change.event, Event::Rewrite {source_location, ..} if source_location == "cli/tests/test_cat_command.rs"));
461-
}
462-
6 => {
463-
assert_eq!(change.location, "cli/tests/test_file_print_command.rs");
464-
assert!(matches!(change.event, Event::Rewrite {source_location, ..} if source_location == "cli/tests/test_chmod_command.rs"));
465-
}
466-
7 => {
467-
assert_eq!(change.location, "cli/src/commands/file/chmod.rs");
468-
assert!(matches!(change.event, Event::Rewrite {source_location, ..} if source_location == "cli/src/commands/chmod.rs"));
469-
}
470-
8 => {
471-
assert_eq!(change.location, "cli/src/commands/file/mod.rs");
472-
assert!(matches!(change.event, Event::Rewrite {source_location, ..} if source_location == "cli/src/commands/cat.rs"));
473-
}
474-
9 => {
475-
assert_eq!(change.location, "cli/tests/test_file_chmod_command.rs");
476-
assert!(matches!(change.event, Event::Rewrite {source_location, ..} if source_location == "cli/tests/test_immutable_commits.rs"));
477-
}
478-
n => unreachable!("unexpected call: {n}"),
479-
};
480-
count += 1;
438+
if let Event::Rewrite {
439+
source_location,
440+
diff: Some(diff),
441+
..
442+
} = change.event
443+
{
444+
// Round to percentage points to avoid floating point error.
445+
let similarity = (diff.similarity * 100.0) as u32;
446+
let v = expected.remove(change.location);
447+
assert_eq!(v, Some((source_location, similarity)));
448+
}
481449
Ok(Default::default())
482450
})?;
451+
452+
assert_eq!(expected, HashMap::new());
483453
let out = out.rewrites.expect("tracking enabled");
484454
assert_eq!(
485-
out.num_similarity_checks, 8,
455+
out.num_similarity_checks, 21,
486456
"this probably increases once the algorithm improves"
487457
);
488458
assert_eq!(

0 commit comments

Comments
 (0)
Please sign in to comment.