From a7652a5408ca0963e11f7f485ab8d95d14155cb0 Mon Sep 17 00:00:00 2001 From: Ravi Singal Date: Thu, 3 Apr 2025 13:09:46 +0530 Subject: [PATCH 1/2] remove tar.gz suffix condition from segment file names --- .../batch/common/BaseSegmentPushJobRunner.java | 5 ++--- .../segment/local/utils/SegmentPushUtils.java | 15 +++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/BaseSegmentPushJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/BaseSegmentPushJobRunner.java index 821e42d2568d..d45032a9ccd9 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/BaseSegmentPushJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/BaseSegmentPushJobRunner.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.plugin.ingestion.batch.common; -import com.google.common.base.Preconditions; import java.io.File; import java.io.IOException; import java.net.URI; @@ -113,8 +112,8 @@ public List getSegmentsToReplace(Map segmentsUriToTarPat for (String tarFilePath : tarFilePaths) { File tarFile = new File(tarFilePath); String fileName = tarFile.getName(); - Preconditions.checkArgument(fileName.endsWith(Constants.TAR_GZ_FILE_EXT)); - String segmentName = fileName.substring(0, fileName.length() - Constants.TAR_GZ_FILE_EXT.length()); + String segmentName = fileName.endsWith(Constants.TAR_GZ_FILE_EXT) + ? fileName.substring(0, fileName.length() - Constants.TAR_GZ_FILE_EXT.length()) : fileName; segmentNames.add(segmentName); } return segmentNames; diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java index 6ca93f24918e..94f863f4ee23 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.segment.local.utils; -import com.google.common.base.Preconditions; import java.io.File; import java.io.InputStream; import java.io.Serializable; @@ -152,8 +151,8 @@ public static void pushSegments(SegmentGenerationJobSpec spec, PinotFS fileSyste URI tarFileURI = URI.create(tarFilePath); File tarFile = new File(tarFilePath); String fileName = tarFile.getName(); - Preconditions.checkArgument(fileName.endsWith(Constants.TAR_GZ_FILE_EXT)); - String segmentName = fileName.substring(0, fileName.length() - Constants.TAR_GZ_FILE_EXT.length()); + String segmentName = fileName.endsWith(Constants.TAR_GZ_FILE_EXT) + ? fileName.substring(0, fileName.length() - Constants.TAR_GZ_FILE_EXT.length()) : fileName; for (PinotClusterSpec pinotClusterSpec : spec.getPinotClusterSpecs()) { URI controllerURI; try { @@ -379,11 +378,11 @@ public static Map getSegmentUriToTarPathMap(URI outputDirURI, Pu // Skip segment metadata tar gz files continue; } - if (uri.getPath().endsWith(Constants.TAR_GZ_FILE_EXT)) { - URI updatedURI = SegmentPushUtils.generateSegmentTarURI(outputDirURI, uri, pushSpec.getSegmentUriPrefix(), - pushSpec.getSegmentUriSuffix()); - segmentUriToTarPathMap.put(updatedURI.toString(), file); - } + //if (uri.getPath().endsWith(Constants.TAR_GZ_FILE_EXT)) { + URI updatedURI = SegmentPushUtils.generateSegmentTarURI(outputDirURI, uri, pushSpec.getSegmentUriPrefix(), + pushSpec.getSegmentUriSuffix()); + segmentUriToTarPathMap.put(updatedURI.toString(), file); + //} } return segmentUriToTarPathMap; } From 001fec0da923d42587b820e16fcf2b34cd36d855 Mon Sep 17 00:00:00 2001 From: Ravi Singal Date: Thu, 3 Apr 2025 16:43:54 +0530 Subject: [PATCH 2/2] log ignored files --- .../org/apache/pinot/segment/local/utils/SegmentPushUtils.java | 1 + 1 file changed, 1 insertion(+) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java index 94f863f4ee23..eadd908b8717 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java @@ -369,6 +369,7 @@ public static Map getSegmentUriToTarPathMap(URI outputDirURI, Pu for (String file : files) { if (pushFilePathMatcher != null) { if (!pushFilePathMatcher.matches(Paths.get(file))) { + LOGGER.info("Ignoring file {}", file); continue; } }