-
Couldn't load subscription status.
- Fork 4.3k
Add support for decompressing .7z files #27269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
fa0051d
86270f4
7f9c155
4950332
8e812e8
8603c6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,105 @@ | ||
| package com.google.devtools.build.lib.bazel.repository.decompressor; | ||
|
|
||
| import static java.nio.charset.StandardCharsets.UTF_8; | ||
|
|
||
| import com.google.common.io.ByteStreams; | ||
| import com.google.devtools.build.lib.bazel.repository.RepositoryFunctionException; | ||
| import com.google.devtools.build.lib.bazel.repository.decompressor.DecompressorValue.Decompressor; | ||
| import com.google.devtools.build.lib.vfs.Path; | ||
| import com.google.devtools.build.lib.vfs.PathFragment; | ||
| import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; | ||
| import org.apache.commons.compress.archivers.sevenz.SevenZFile; | ||
|
|
||
| import javax.annotation.Nullable; | ||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.io.OutputStream; | ||
| import java.util.HashSet; | ||
| import java.util.Map; | ||
| import java.util.Optional; | ||
| import java.util.Set; | ||
|
|
||
| /** | ||
| * Creates a repository by decompressing a 7-zip file. This implementation generally follows the | ||
| * logic from {@link ZipDecompressor} with the exception that the 7z format does not support file | ||
| * permissions or symbolic links. | ||
| */ | ||
| public class SevenZDecompressor implements Decompressor { | ||
| public static final Decompressor INSTANCE = new SevenZDecompressor(); | ||
|
|
||
| /** Decompresses the file to directory {@link DecompressorDescriptor#destinationPath()} */ | ||
| @Override | ||
| @Nullable | ||
| public Path decompress(DecompressorDescriptor descriptor) | ||
| throws IOException, RepositoryFunctionException, InterruptedException { | ||
| Path destinationDirectory = descriptor.destinationPath(); | ||
| Optional<String> prefix = descriptor.prefix(); | ||
| Map<String, String> renameFiles = descriptor.renameFiles(); | ||
| boolean foundPrefix = false; | ||
|
|
||
| try (SevenZFile sevenZFile = | ||
| SevenZFile.builder().setFile(descriptor.archivePath().getPathFile()).get()) { | ||
| Iterable<SevenZArchiveEntry> entries = sevenZFile.getEntries(); | ||
| for (SevenZArchiveEntry entry : entries) { | ||
| String entryName = entry.getName(); | ||
| entryName = renameFiles.getOrDefault(entryName, entryName); | ||
| StripPrefixedPath entryPath = | ||
| StripPrefixedPath.maybeDeprefix(entryName.getBytes(UTF_8), prefix); | ||
| foundPrefix = foundPrefix || entryPath.foundPrefix(); | ||
| if (entryPath.skip()) { | ||
| continue; | ||
| } | ||
| extract7zEntry(sevenZFile, entry, destinationDirectory, entryPath.getPathFragment()); | ||
| } | ||
|
|
||
| if (prefix.isPresent() && !foundPrefix) { | ||
| Set<String> prefixes = new HashSet<>(); | ||
| for (SevenZArchiveEntry entry : entries) { | ||
| StripPrefixedPath entryPath = | ||
| StripPrefixedPath.maybeDeprefix(entry.getName().getBytes(UTF_8), Optional.empty()); | ||
| CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()) | ||
| .ifPresent(prefixes::add); | ||
| } | ||
| throw new CouldNotFindPrefixException(prefix.get(), prefixes); | ||
| } | ||
| } | ||
| return destinationDirectory; | ||
| } | ||
|
|
||
| private static void extract7zEntry( | ||
| SevenZFile sevenZFile, | ||
| SevenZArchiveEntry entry, | ||
| Path destinationDirectory, | ||
| PathFragment strippedRelativePath) | ||
| throws IOException, InterruptedException { | ||
| if (strippedRelativePath.isAbsolute()) { | ||
| throw new IOException( | ||
| String.format( | ||
| "Failed to extract %s, 7-zipped paths cannot be absolute", strippedRelativePath)); | ||
| } | ||
| // Sanity/security check - at this point, uplevel references (..) should be resolved. | ||
| // There shouldn't be any remaining uplevel references, otherwise, the extracted file could | ||
| // "escape" the destination directory. | ||
| if (strippedRelativePath.containsUplevelReferences()) { | ||
| throw new IOException( | ||
| String.format( | ||
| "Failed to extract %s, 7-zipped entry contains uplevel references (..)", | ||
| strippedRelativePath)); | ||
| } | ||
| Path outputPath = destinationDirectory.getRelative(strippedRelativePath); | ||
| outputPath.getParentDirectory().createDirectoryAndParents(); | ||
| boolean isDirectory = entry.isDirectory(); | ||
| if (isDirectory) { | ||
| outputPath.createDirectoryAndParents(); | ||
| } else { | ||
| try (InputStream input = sevenZFile.getInputStream(entry); | ||
| OutputStream output = outputPath.getOutputStream()) { | ||
| ByteStreams.copy(input, output); | ||
| if (Thread.interrupted()) { | ||
| throw new InterruptedException(); | ||
| } | ||
| } | ||
| outputPath.setLastModifiedTime(entry.getLastModifiedTime().toMillis()); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,142 @@ | ||||||
| package com.google.devtools.build.lib.bazel.repository.decompressor; | ||||||
|
|
||||||
| import com.google.devtools.build.lib.util.StringEncoding; | ||||||
| import com.google.devtools.build.lib.vfs.Dirent; | ||||||
| import com.google.devtools.build.lib.vfs.Path; | ||||||
| import com.google.devtools.build.lib.vfs.Symlinks; | ||||||
| import org.junit.Rule; | ||||||
| import org.junit.Test; | ||||||
| import org.junit.rules.TestName; | ||||||
| import org.junit.runner.RunWith; | ||||||
| import org.junit.runners.JUnit4; | ||||||
|
|
||||||
| import java.util.HashMap; | ||||||
| import java.util.List; | ||||||
| import java.util.stream.Collectors; | ||||||
|
|
||||||
| import static com.google.common.truth.Truth.assertThat; | ||||||
| import static com.google.devtools.build.lib.bazel.repository.decompressor.TestArchiveDescriptor.INNER_FOLDER_NAME; | ||||||
| import static com.google.devtools.build.lib.bazel.repository.decompressor.TestArchiveDescriptor.ROOT_FOLDER_NAME; | ||||||
|
|
||||||
| /** Tests .7z decompression. */ | ||||||
| @RunWith(JUnit4.class) | ||||||
| public class SevenZDecompressorTest { | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also add a test case for Unicode filename handling? You can search for examples of tests containing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, see the added test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is failing internally, I'm getting Any idea how to fix? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you modify the test you run internally so that it prints the entries in that directory and possibly also their UTF-8 byte representations? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's hard for me to help here when I don't know how internal testing differs from external testing. Your help would be appreciated here!
I did find one thing weird when I was originally writing the test. I had to call
this changed I wonder if that has something to do with it? In the updated test, I now run
I did some test refactoring - see the latest two commits. Now the test does a directory listing and shows what's there and the filename we expected. Hope that helps meteorcloudy@ with more info from the internal testing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I ended up removing the unicode file test case. Just curious - how does one make a test "Bazel only"?
I couldn't find existing integration tests which tested decompression under I saw you added some to 10169bb#diff-c8e054134e31f6f0307f7dccc79dc185f43d5ce865d9c895232e388cf6992ab9 But that file and those tests ended up getting deleted in 37e0d23 when the workspace->bzlmod transition happened. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In Java tests, we can check https://cs.opensource.google/bazel/bazel/+/master:src/test/java/com/google/devtools/build/lib/testutil/TestConstants.java;l=31 But I realize this isn't easy since the archive contains the unicode file name. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests passing internally now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Nice find, will add them back There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
| @Rule public TestName name = new TestName(); | ||||||
|
|
||||||
| /** | ||||||
| * .7z file, created with two files: | ||||||
| * | ||||||
| * <ul> | ||||||
| * <li>root_folder/another_folder/regularFile | ||||||
| * <li>root_folder/another_folder/ünïcödëFïlë.txt | ||||||
| * </ul> | ||||||
| * | ||||||
| * Compressed with command "7zz a test_decompress_archive.7z root_folder" | ||||||
| */ | ||||||
| private static final String ARCHIVE_NAME = "test_decompress_archive.7z"; | ||||||
|
|
||||||
| private static final String REGULAR_FILENAME = "regularFile"; | ||||||
| private static final String UNICODE_FILENAME = "ünïcödëFïlë.txt"; | ||||||
|
|
||||||
| /** Provides a test filesystem descriptor for a test. NOTE: unique per individual test ONLY. */ | ||||||
| private TestArchiveDescriptor archiveDescriptor() throws Exception { | ||||||
| return new TestArchiveDescriptor( | ||||||
| ARCHIVE_NAME, | ||||||
| /* outDirName= */ this.getClass().getSimpleName() + "_" + name.getMethodName(), | ||||||
| /* withHardLinks= */ false); | ||||||
| } | ||||||
|
|
||||||
| /** Test decompressing a .7z file without stripping a prefix */ | ||||||
| @Test | ||||||
| public void testDecompressWithoutPrefix() throws Exception { | ||||||
| Path outputDir = decompress(archiveDescriptor().createDescriptorBuilder().build()); | ||||||
|
|
||||||
| Path fileDir = outputDir.getRelative(ROOT_FOLDER_NAME).getRelative(INNER_FOLDER_NAME); | ||||||
| List<String> files = | ||||||
| fileDir.readdir(Symlinks.NOFOLLOW).stream() | ||||||
| .map(Dirent::getName) | ||||||
| .collect(Collectors.toList()); | ||||||
| assertThat(files).contains(REGULAR_FILENAME); | ||||||
| assertThat(fileDir.getRelative(REGULAR_FILENAME).getFileSize()).isNotEqualTo(0); | ||||||
| } | ||||||
|
|
||||||
| /** Test decompressing a .7z file and stripping a prefix. */ | ||||||
| @Test | ||||||
| public void testDecompressWithPrefix() throws Exception { | ||||||
| DecompressorDescriptor.Builder descriptorBuilder = | ||||||
| archiveDescriptor().createDescriptorBuilder().setPrefix(ROOT_FOLDER_NAME); | ||||||
| Path outputDir = decompress(descriptorBuilder.build()); | ||||||
| Path fileDir = outputDir.getRelative(INNER_FOLDER_NAME); | ||||||
|
|
||||||
| List<String> files = | ||||||
| fileDir.readdir(Symlinks.NOFOLLOW).stream() | ||||||
| .map(Dirent::getName) | ||||||
| .collect(Collectors.toList()); | ||||||
| assertThat(files).contains(REGULAR_FILENAME); | ||||||
| } | ||||||
|
|
||||||
| /** Test decompressing a .7z with entries being renamed during the extraction process. */ | ||||||
| @Test | ||||||
| public void testDecompressWithRenamedFiles() throws Exception { | ||||||
| String innerDirName = ROOT_FOLDER_NAME + "/" + INNER_FOLDER_NAME; | ||||||
|
|
||||||
| HashMap<String, String> renameFiles = new HashMap<>(); | ||||||
| renameFiles.put(innerDirName + "/" + REGULAR_FILENAME, innerDirName + "/renamedFile"); | ||||||
| DecompressorDescriptor.Builder descriptorBuilder = | ||||||
| archiveDescriptor().createDescriptorBuilder().setRenameFiles(renameFiles); | ||||||
| Path outputDir = decompress(descriptorBuilder.build()); | ||||||
|
|
||||||
| Path fileDir = outputDir.getRelative(ROOT_FOLDER_NAME).getRelative(INNER_FOLDER_NAME); | ||||||
| List<String> files = | ||||||
| fileDir.readdir(Symlinks.NOFOLLOW).stream() | ||||||
| .map((Dirent::getName)) | ||||||
| .collect(Collectors.toList()); | ||||||
| assertThat(files).contains("renamedFile"); | ||||||
| assertThat(fileDir.getRelative("renamedFile").getFileSize()).isNotEqualTo(0); | ||||||
| } | ||||||
|
|
||||||
| /** Test that entry renaming is applied prior to prefix stripping. */ | ||||||
| @Test | ||||||
| public void testDecompressWithRenamedFilesAndPrefix() throws Exception { | ||||||
| String innerDirName = ROOT_FOLDER_NAME + "/" + INNER_FOLDER_NAME; | ||||||
|
|
||||||
| HashMap<String, String> renameFiles = new HashMap<>(); | ||||||
| renameFiles.put(innerDirName + "/" + REGULAR_FILENAME, innerDirName + "/renamedFile"); | ||||||
| DecompressorDescriptor.Builder descriptorBuilder = | ||||||
| archiveDescriptor() | ||||||
| .createDescriptorBuilder() | ||||||
| .setPrefix(ROOT_FOLDER_NAME) | ||||||
| .setRenameFiles(renameFiles); | ||||||
| Path outputDir = decompress(descriptorBuilder.build()); | ||||||
|
|
||||||
| Path fileDir = outputDir.getRelative(INNER_FOLDER_NAME); | ||||||
| List<String> files = | ||||||
| fileDir.readdir(Symlinks.NOFOLLOW).stream() | ||||||
| .map((Dirent::getName)) | ||||||
| .collect(Collectors.toList()); | ||||||
| assertThat(files).contains("renamedFile"); | ||||||
| assertThat(fileDir.getRelative("renamedFile").getFileSize()).isNotEqualTo(0); | ||||||
| } | ||||||
|
|
||||||
| /** Test that Unicode filenames are handled. **/ | ||||||
| @Test | ||||||
| public void testUnicodeFilename() throws Exception { | ||||||
| Path outputDir = decompress(archiveDescriptor().createDescriptorBuilder().build()); | ||||||
|
|
||||||
| Path path = outputDir.getRelative(ROOT_FOLDER_NAME).getRelative(INNER_FOLDER_NAME); | ||||||
| List<String> filenames = | ||||||
| path.readdir(Symlinks.NOFOLLOW).stream() | ||||||
| .map((Dirent::getName)) | ||||||
| .map(StringEncoding::internalToPlatform) | ||||||
|
||||||
| .map(StringEncoding::internalToPlatform) | |
| .map(StringEncoding::internalToUnicode) |
since you are passing the result to the Truth assertion libraries, which expect regular Java Unicode strings
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, reverted this change.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| Path unicodeFile = path.getRelative(StringEncoding.platformToInternal(UNICODE_FILENAME)); | |
| Path unicodeFile = path.getRelative(StringEncoding.unicodeToInternal(UNICODE_FILENAME)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, reverted this change.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible for
strippedRelativePathto contain../andoutputPathwill actually be out ofdestinationDirectory? I think this might already be possible for other decompressors, but it'll be best if we can prevent it for new ones.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a
PathFragmentis created, it should normalize the path:eg.
a/b/../c->a/cI guess multiple uplevel references could potentially escape the destination directory:
eg.
a/b/../../../../root->../../rootI added a check to make sure there can't be any uplevel references (
..) when we create theoutputPathThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Awesome, thank you!