-
-
Notifications
You must be signed in to change notification settings - Fork 796
Implement chunk comparison and selective extraction for borg extract (#5638) #8632
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -719,6 +719,94 @@ def extract_helper(self, item, path, hlm, *, dry_run=False): | |
| # In this case, we *want* to extract twice, because there is no other way. | ||
| pass | ||
|
|
||
| def compare_and_extract_chunks(self, item, fs_path, *, st, pi=None): | ||
| """Compare file chunks and patch if needed. Returns True if patching succeeded.""" | ||
| if st is None: | ||
| return False | ||
| try: | ||
| # First pass: Build fs chunks list | ||
| fs_chunks = [] | ||
| with backup_io("open"): | ||
| fs_file = open(fs_path, "rb") | ||
| with fs_file: | ||
| for chunk in item.chunks: | ||
| with backup_io("read"): | ||
| data = fs_file.read(chunk.size) | ||
|
|
||
| fs_chunks.append(ChunkListEntry(id=self.key.id_hash(data), size=len(data))) | ||
|
|
||
| # Compare chunks and collect needed chunk IDs | ||
| needed_chunks = [] | ||
| for fs_chunk, item_chunk in zip(fs_chunks, item.chunks): | ||
| if fs_chunk != item_chunk: | ||
| needed_chunks.append(item_chunk) | ||
|
|
||
| # Fetch all needed chunks and iterate through ALL of them | ||
| chunk_data_iter = self.pipeline.fetch_many([chunk.id for chunk in needed_chunks], ro_type=ROBJ_FILE_STREAM) | ||
|
|
||
| # Second pass: Update file | ||
| with backup_io("open"): | ||
| fs_file = open(fs_path, "rb+") | ||
| for fs_chunk, item_chunk in zip(fs_chunks, item.chunks): | ||
| if fs_chunk == item_chunk: | ||
| with backup_io("seek"): | ||
| fs_file.seek(item_chunk.size, os.SEEK_CUR) | ||
| size = item_chunk.size | ||
| else: | ||
| chunk_data = next(chunk_data_iter) | ||
|
|
||
| with backup_io("write"): | ||
| fs_file.write(chunk_data) | ||
| size = len(chunk_data) | ||
| if pi: | ||
| pi.show(increase=size, info=[remove_surrogates(item.path)]) | ||
|
|
||
| with backup_io("truncate_and_attrs"): | ||
| pos = item_chunks_size = fs_file.tell() | ||
| fs_file.truncate(pos) | ||
| fs_file.flush() | ||
|
|
||
| if not self.noacls: | ||
| try: | ||
| # Clear ACLs by setting empty ACL entries | ||
| acl_set(fs_path, {"acl_entries": []}, self.numeric_ids, fd=fs_file.fileno()) | ||
| except OSError as e: | ||
| if e.errno != errno.ENOTSUP: # Ignore if ACLs not supported | ||
| raise | ||
| if not self.noflags: | ||
| try: | ||
| # Clear all BSD flags | ||
| set_flags(fs_path, 0, fd=fs_file.fileno()) | ||
| except OSError: | ||
| pass | ||
| if not self.noxattrs: | ||
| try: | ||
| # Clear ALL xattrs | ||
| attrs = xattr.listxattr(fs_file.fileno(), follow_symlinks=False) | ||
| for attr in attrs: | ||
| try: | ||
| xattr.setxattr(fs_file.fileno(), attr, b"", follow_symlinks=False) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that does not do what you intended. setting an xattr to an empty bytestring is not the same as removing an xattr.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are calling |
||
| except OSError as e: | ||
| if e.errno != xattr.ENOATTR: # Ignore if already removed | ||
| raise | ||
| except OSError as e: | ||
| if e.errno != errno.ENOTSUP: # Ignore if xattrs not supported | ||
| raise | ||
| self.restore_attrs(fs_path, item, fd=fs_file.fileno()) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could create a method clear_attrs close to the definition of restore_attrs and move that code you added above to there. |
||
|
|
||
| if "size" in item: | ||
| item_size = item.size | ||
| if item_size != item_chunks_size: | ||
| raise BackupError(f"Size inconsistency detected: size {item.size}, chunks size {item_chunks_size}") | ||
|
|
||
| has_damaged_chunks = "chunks_healthy" in item | ||
| if has_damaged_chunks: | ||
| raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.") | ||
|
|
||
| return True | ||
| except OSError: | ||
| return False | ||
|
|
||
| def extract_item( | ||
| self, | ||
| item, | ||
|
|
@@ -800,6 +888,9 @@ def same_item(item, st): | |
| st = os.stat(path, follow_symlinks=False) | ||
| if continue_extraction and same_item(item, st): | ||
| return # done! we already have fully extracted this file in a previous run. | ||
| if stat.S_ISREG(st.st_mode) and not continue_extraction: | ||
| if self.compare_and_extract_chunks(item, path, st=st, pi=pi): | ||
| return | ||
| elif stat.S_ISDIR(st.st_mode): | ||
| os.rmdir(path) | ||
| else: | ||
|
|
@@ -821,6 +912,7 @@ def make_parent(path): | |
| with self.extract_helper(item, path, hlm) as hardlink_set: | ||
| if hardlink_set: | ||
| return | ||
|
|
||
| with backup_io("open"): | ||
| fd = open(path, "wb") | ||
| with fd: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you didn't read the acl_set code, did you?