Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.ArgumentMatchers.anyCollection;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.anyMap;
import static org.mockito.ArgumentMatchers.anySet;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.anyBoolean;
Expand Down Expand Up @@ -237,19 +236,19 @@ public void write(int b) throws IOException {
doCallRealMethod().when(omDbCheckpointServletMock)
.writeDbDataToStream(any(), any(), any(), any(), any());
doCallRealMethod().when(omDbCheckpointServletMock)
.writeDBToArchive(any(), any(), any(), any(), any(), any(), anyBoolean());
.collectFilesFromDir(any(), any(), any(), anyBoolean(), any());

when(omDbCheckpointServletMock.getBootstrapStateLock())
.thenReturn(lock);
doCallRealMethod().when(omDbCheckpointServletMock).getCheckpoint(any(), anyBoolean());
assertNull(doCallRealMethod().when(omDbCheckpointServletMock).getBootstrapTempData());
doCallRealMethod().when(omDbCheckpointServletMock).
processMetadataSnapshotRequest(any(), any(), anyBoolean(), anyBoolean());
doCallRealMethod().when(omDbCheckpointServletMock).writeDbDataToStream(any(), any(), any(), any());
doCallRealMethod().when(omDbCheckpointServletMock).writeDbDataToStream(any(), any(), any(), any(), any());
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate mock setup: Line 247 sets up a mock for 'writeDbDataToStream' with 5 parameters, but the same method is already mocked on line 236 with the same signature. This is redundant and one of these lines should be removed.

Suggested change
doCallRealMethod().when(omDbCheckpointServletMock).writeDbDataToStream(any(), any(), any(), any(), any());

Copilot uses AI. Check for mistakes.
doCallRealMethod().when(omDbCheckpointServletMock).getCompactionLogDir();
doCallRealMethod().when(omDbCheckpointServletMock).getSstBackupDir();
doCallRealMethod().when(omDbCheckpointServletMock)
.transferSnapshotData(anySet(), any(), anyCollection(), anyCollection(), any(), any(), anyMap());
.collectSnapshotData(anySet(), anyCollection(), anyCollection(), any(), any());
doCallRealMethod().when(omDbCheckpointServletMock).createAndPrepareCheckpoint(anyBoolean());
doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirsFromDB(any(), any(), any());
}
Expand Down Expand Up @@ -424,9 +423,10 @@ public void testWriteDBToArchive(boolean expectOnlySstFiles) throws Exception {
Files.write(nonSstFile, "log content".getBytes(StandardCharsets.UTF_8));
Set<String> sstFilesToExclude = new HashSet<>();
AtomicLong maxTotalSstSize = new AtomicLong(1000000); // Sufficient size
Map<String, String> hardLinkFileMap = new java.util.HashMap<>();
OMDBArchiver omdbArchiver = new OMDBArchiver();
Path tmpDir = folder.resolve("tmp");
Files.createDirectories(tmpDir);
omdbArchiver.setTmpDir(tmpDir);
TarArchiveOutputStream mockArchiveOutputStream = mock(TarArchiveOutputStream.class);
List<String> fileNames = new ArrayList<>();
try (MockedStatic<Archiver> archiverMock = mockStatic(Archiver.class)) {
Expand All @@ -441,9 +441,8 @@ public void testWriteDBToArchive(boolean expectOnlySstFiles) throws Exception {
aos.closeArchiveEntry();
return 100L;
});
boolean success = omDbCheckpointServletMock.writeDBToArchive(
sstFilesToExclude, dbDir, maxTotalSstSize, mockArchiveOutputStream,
tmpDir, hardLinkFileMap, expectOnlySstFiles);
boolean success = omDbCheckpointServletMock.collectFilesFromDir(
sstFilesToExclude, dbDir, maxTotalSstSize, expectOnlySstFiles, omdbArchiver);
assertTrue(success);
verify(mockArchiveOutputStream, times(fileNames.size())).putArchiveEntry(any());
verify(mockArchiveOutputStream, times(fileNames.size())).closeArchiveEntry();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.om;

import static org.apache.hadoop.hdds.utils.Archiver.includeFile;
import static org.apache.hadoop.hdds.utils.Archiver.tar;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag;
import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile;

import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.ArchiveOutputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.util.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Class for handling operations relevant to archiving the OM DB tarball.
* Mainly maintains a map for recording the files collected from reading
* the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores
* the link data in the map to release the bootstrap lock quickly
* and do the actual write at the end outside the lock.
*/
public class OMDBArchiver {

private Path tmpDir;
private Map<String, File> filesToWriteIntoTarball;
private Map<String, String> hardLinkFileMap;
private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class);

public OMDBArchiver() {
this.tmpDir = null;
this.filesToWriteIntoTarball = new HashMap<>();
hardLinkFileMap = null;
}

public void setTmpDir(Path tmpDir) {
this.tmpDir = tmpDir;
}

public Map<String, String> getHardLinkFileMap() {
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getHardLinkFileMap exposes the internal representation stored in field hardLinkFileMap. The value may be modified after this call to getHardLinkFileMap.
getHardLinkFileMap exposes the internal representation stored in field hardLinkFileMap. The value may be modified after this call to getHardLinkFileMap.

Copilot uses AI. Check for mistakes.
return hardLinkFileMap;
}

public void setHardLinkFileMap(Map<String, String> hardLinkFileMap) {
this.hardLinkFileMap = hardLinkFileMap;
}

/**
* @param file the file to create a hardlink and record into the map
* @param entryName name of the entry corresponding to file
* @return the file size
* @throws IOException in case of hardlink failure
*
* Records the given file entry into the map after taking a hardlink.
Comment on lines +90 to +95
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-standard javadoc format: The javadoc comment uses @param and @return tags that appear after the method description (lines 90-93) instead of following standard javadoc conventions where these tags should come after the main description. While this may still compile, it doesn't follow the standard javadoc format where the description comes first, followed by all @param tags, and then @return and @throws tags.

Suggested change
* @param file the file to create a hardlink and record into the map
* @param entryName name of the entry corresponding to file
* @return the file size
* @throws IOException in case of hardlink failure
*
* Records the given file entry into the map after taking a hardlink.
* Records the given file entry into the map after taking a hardlink.
*
* @param file the file to create a hardlink and record into the map
* @param entryName name of the entry corresponding to file
* @return the file size
* @throws IOException in case of hardlink failure

Copilot uses AI. Check for mistakes.
*/
public long recordFileEntry(File file, String entryName) throws IOException {
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing null check for tmpDir: The recordFileEntry method uses tmpDir.resolve() on line 98 without verifying that tmpDir has been set. If recordFileEntry is called before setTmpDir, this will result in a NullPointerException. Consider adding validation to ensure tmpDir is not null before using it.

Suggested change
public long recordFileEntry(File file, String entryName) throws IOException {
public long recordFileEntry(File file, String entryName) throws IOException {
if (tmpDir == null) {
throw new IllegalStateException(
"Temporary directory not set. Call setTmpDir() before recordFileEntry().");
}

Copilot uses AI. Check for mistakes.
File link = tmpDir.resolve(entryName).toFile();
long bytes = 0;
try {
Files.createLink(link.toPath(), file.toPath());
Comment on lines +98 to +101
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential file name collision in hardlink creation: The method creates a hardlink using only the entryName without considering potential collisions. If recordFileEntry is called multiple times with the same entryName but different files, the second call will fail because Files.createLink will throw FileAlreadyExistsException. The existing hardlink should either be checked and handled, or the code should ensure unique entryNames are passed.

Suggested change
File link = tmpDir.resolve(entryName).toFile();
long bytes = 0;
try {
Files.createLink(link.toPath(), file.toPath());
Path linkPath = tmpDir.resolve(entryName);
File link = linkPath.toFile();
long bytes = 0;
try {
if (Files.exists(linkPath)) {
// If the existing file is already a link to the same source, just reuse it.
if (Files.isSameFile(linkPath, file.toPath())) {
filesToWriteIntoTarball.put(entryName, link);
return file.length();
}
// Otherwise, remove the stale link/entry so we can recreate it.
Files.delete(linkPath);
}
Files.createLink(linkPath, file.toPath());

Copilot uses AI. Check for mistakes.
filesToWriteIntoTarball.put(entryName, link);
bytes = file.length();
} catch (IOException ioe) {
LOG.error("Couldn't create hardlink for file {} while including it in tarball.",
file.getAbsolutePath(), ioe);
throw ioe;
}
return bytes;
}

/**
* @param conf the configuration object to obtain metadata paths
* @param outputStream the tarball archive output stream
* @throws IOException in case of write failure to the archive
*
* Writes all the files captured by the map into the archive and
* also includes the hardlinkFile and the completion marker file.
*/
Comment on lines +112 to +119
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-standard javadoc format: Similar to the recordFileEntry method, this javadoc uses @param and @throws tags (lines 113-115) appearing before the method description (lines 117-119). The description should come first, followed by the parameter documentation tags.

Copilot uses AI. Check for mistakes.
public void writeToArchive(OzoneConfiguration conf, OutputStream outputStream)
throws IOException {
long bytesWritten = 0;
long lastLoggedTime = Time.now();
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent time method usage. Line 123 uses 'Time.now()' for initialization, but lines 131 and 134 use 'Time.monotonicNow()' for comparison. Since monotonic time should be used for measuring elapsed time intervals, line 123 should also use 'Time.monotonicNow()' to ensure consistent time measurement.

Suggested change
long lastLoggedTime = Time.now();
long lastLoggedTime = Time.monotonicNow();

Copilot uses AI. Check for mistakes.
long filesWritten = 0;
try (ArchiveOutputStream<TarArchiveEntry> archiveOutput = tar(outputStream)) {
for (Map.Entry<String, File> kv : filesToWriteIntoTarball.entrySet()) {
String entryName = kv.getKey();
File link = kv.getValue();
try {
bytesWritten += includeFile(link, entryName, archiveOutput);
if (Time.monotonicNow() - lastLoggedTime >= 30000) {
LOG.info("Transferred {} KB, #files {} to checkpoint tarball stream...",
bytesWritten / (1024), filesWritten);
lastLoggedTime = Time.monotonicNow();
Comment on lines +124 to +134
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable 'filesWritten' is incremented but never used in the logging statement. Line 133 references 'filesWritten' in the log message, but it should be incremented after line 130 to track the actual number of files written. Currently, it remains at 0 throughout the loop.

Copilot uses AI. Check for mistakes.
}
} catch (IOException ioe) {
LOG.error("Couldn't create hardlink for file {} while including it in tarball.",
Copy link

Copilot AI Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message on line 137 is misleading. It states "Couldn't create hardlink for file" but this error occurs during the write phase when including the file in the tarball, not when creating the hardlink. The hardlink was already created earlier in the recordFileEntry method. The error message should accurately describe that the issue is with writing the file to the archive.

Suggested change
LOG.error("Couldn't create hardlink for file {} while including it in tarball.",
LOG.error("Failed to write file {} to checkpoint tarball archive.",

Copilot uses AI. Check for mistakes.
link.getAbsolutePath(), ioe);
throw ioe;
} finally {
Files.deleteIfExists(link.toPath());
}
}
writeHardlinkFile(conf, hardLinkFileMap, archiveOutput);
includeRatisSnapshotCompleteFlag(archiveOutput);
}
}
}
Loading