Skip to content

Commit 37894ad

Browse files
committed
git: Use merge-tree in patchsets
Previously patchsets used a hacky invocation of update-index to overlay changed files from old tree to new tree. In addition to being slower than necessary, this has the downside of showing upstream changes in the diff, even those that were not changed in the given review upload. Using git merge-tree for this instead is cleaner, faster, and better. We do this by "cherry-picking" the old branch with git merge-tree, but providing -X ours as the strategy. This provides a tree that looks like the new base with the old changes applied. For cases where the old changes don't apply cleanly, the ours strategy prevents conflicts by taking the version in the old tree. Although this can be a bit confusing when looking at the file as a whole, it provides a succinct way to view a difference between the old version and new version. Fixes: #183
1 parent 920df71 commit 37894ad

File tree

1 file changed

+40
-72
lines changed

1 file changed

+40
-72
lines changed

revup/git.py

+40-72
Original file line numberDiff line numberDiff line change
@@ -558,20 +558,6 @@ async def get_best_base_branch(
558558
ret = c
559559
return ret
560560

561-
async def ls_files(
562-
self, show_conflicts: bool = False, env: Optional[Dict[str, str]] = None
563-
) -> List[Tuple[GitTreeHash, int, str]]:
564-
args = ["ls-files"]
565-
if show_conflicts:
566-
args.append("-u")
567-
else:
568-
args.append("-s")
569-
raw = await self.git(*args, env=env)
570-
return [
571-
(GitTreeHash(m.group("hash")), int(m.group("stage")), m.group("path"))
572-
for m in RE_LS_FILES_LINE.finditer(raw[1])
573-
]
574-
575561
async def commit_tree(self, commit_info: CommitHeader) -> GitCommitHash:
576562
"""
577563
Run git commit-tree with the args in commit_info.
@@ -634,19 +620,28 @@ async def get_diff_summary(
634620

635621
async def merge_tree_commit(
636622
self,
637-
branch1: GitCommitHash,
638-
branch2: GitCommitHash,
623+
tree1: GitCommitHash,
624+
tree2: GitCommitHash,
639625
new_commit_info: CommitHeader,
640-
merge_base: Optional[GitCommitHash] = None,
626+
merge_base: GitCommitHash,
627+
strategy: Optional[str] = None,
628+
ignore_conflicts: bool = False,
641629
) -> GitCommitHash:
642630
"""
643631
Perform a combined git merge-tree and commit-tree, returning a git commit hash. Raises
644632
GitConflictException if there are conflicts while merging the trees.
645633
"""
646-
args = ["merge-tree", "--write-tree", "--messages", "-z"]
647-
if merge_base:
648-
args.extend(["--merge-base", merge_base])
649-
args.extend([branch1, branch2])
634+
args = [
635+
"merge-tree",
636+
"--write-tree",
637+
"--messages",
638+
"-z",
639+
"--merge-base",
640+
merge_base,
641+
]
642+
if strategy is not None:
643+
args.extend([f"-X{strategy}"])
644+
args.extend([tree1, tree2])
650645

651646
ret, stdout = await self.git(*args, no_config=True, raiseonerror=False)
652647

@@ -655,7 +650,7 @@ async def merge_tree_commit(
655650
subsections = [s.split("\0") for s in sections]
656651
tree_hash = GitTreeHash(subsections[0][0])
657652

658-
if ret == 0:
653+
if ret == 0 or ignore_conflicts:
659654
new_commit_info.tree = tree_hash
660655
return await self.commit_tree(new_commit_info)
661656
elif ret == 1:
@@ -679,7 +674,7 @@ async def merge_tree_commit(
679674
i += num_paths + 3
680675
raise e
681676
else:
682-
raise RuntimeError(f"Unexpected / unknown error in git merge-tree {ret}")
677+
raise RuntimeError(f"Unexpected / unknown error in git {args} {ret}")
683678

684679
async def dump_conflict(self, e: GitConflictException) -> None:
685680
"""
@@ -769,58 +764,31 @@ async def make_virtual_diff_target(
769764
) -> GitCommitHash:
770765
"""
771766
Return a commit (optionally on top of parent) that provides a way to get the diff from old
772-
head to new head while accounting for the fact that new base might have been rebased since
773-
old base. This new commit makes an effort to include only files that were actually changed,
774-
while excluding files that were changed upstream as part of the rebase.
775-
776-
We do this by resetting any files that changed in the old_base->old_head diff to their
777-
old_head versions in new_base. The returned tree will thus have the following properties
778-
when diffed against new_head.
779-
780-
For files not touched by old or new, ret->new_head won't show any diff. This is primarily
781-
what allows us to exclude upstream files.
782-
For files touched by both old and new, ret->new_head will show the entire old_head->new_head
783-
diff. This will include upstream changes for these files, which are difficult to untangle.
784-
For files not touched in old but touched by new (regardless of whether it existed in
785-
old_base), diff will show new_base->new_head.
786-
For files touched in old but not touched in new, there are 2 cases. If file exists in
787-
new_base, diff will show old_head->new_base. If file doesn't exist in new_base, diff will
788-
show old_head->(deleted) which isn't perfect since technically new_base->new_head did not
789-
delete the file, but probably the least confusing of the alternatives of showing no diff and
790-
showing the old_head->old_base diff.
791-
"""
792-
new_index: List[str] = []
793-
794-
# Transform diff-tree raw output to ls-files style output, taking only the new version
795-
for m in RE_RAW_DIFF_TREE_LINE.finditer(
796-
await self.git_stdout("diff-tree", "-r", "--no-commit-id", "--raw", old_base, old_head)
797-
):
798-
new_index.append(f"{m.group('new_mode')} {m.group('new_hash')} 0\t{m.group('path')}")
799-
800-
if not new_index:
801-
# No files were actually changed, so no diff needs to be applied to new_base
802-
return new_base
803-
804-
temp_index_path = self.get_scratch_dir() + "/index.temp"
805-
git_env = {
806-
"GIT_INDEX_FILE": temp_index_path,
807-
}
808-
shutil.copy(f"{self.git_dir}/index", temp_index_path)
809-
await self.git("reset", "-q", "--no-refresh", new_base, "--", ":/", env=git_env)
810-
await self.git(
811-
"update-index",
812-
"--index-info",
813-
input_str="\n".join(new_index),
814-
env=git_env,
815-
)
816-
817-
tree = GitTreeHash(await self.git_stdout("write-tree", env=git_env))
818-
new_commit_info = CommitHeader(tree, [parent] if parent else [])
767+
head to new head while accounting for the fact that the base might have been rebased
768+
769+
We do this by "cherry-picking" the old branch with git merge-tree, but providing -X ours as
770+
the strategy. This provides a tree that looks like the new base with old changes applied.
771+
For cases where the old changes don't apply cleanly, the ours strategy prevents conflicts by
772+
taking the version in the old tree. Although this can be a bit confusing when looking at the
773+
file as a whole, it provides a succinct way to view a difference between the old version and
774+
new version.
775+
776+
Note that providing -X ours does not actually ensure the merge succeeds without conflicts,
777+
since there can still be add / remove conflicts, file / dir / symlink conflicts, and mode
778+
conflicts among other more subtle ones. We currently handle this by ignoring them and
779+
committing whatever git has decided to leave in the tree. Since this is purely informative,
780+
deciding what to display in these corner cases can be tweaked as needed.
781+
"""
782+
new_commit_info = CommitHeader(GitTreeHash(""), [parent] if parent else [])
819783
new_commit_info.commit_msg = (
820-
f"revup virtual diff target\n\n{old_base}\n{old_head}\n{new_base}\n{new_head}"
784+
f"revup virtual diff target\n\n{old_base}\n{old_head}\n{new_base}\n{new_head}\n\n"
785+
"revup uses this branch internally to generate github viewable diffs. The commits and "
786+
"diffs between commits are not meaningful or useful."
821787
)
822788

823-
return await self.commit_tree(new_commit_info)
789+
return await self.merge_tree_commit(
790+
old_head, new_base, new_commit_info, old_base, "ours", ignore_conflicts=True
791+
)
824792

825793
async def soft_reset(self, new_commit: GitCommitHash, env: Dict) -> None:
826794
await self.git("reset", "--soft", new_commit, env=env)

0 commit comments

Comments
 (0)