From 513417a903dd1b0ee1109c4894bf4de7005f654a Mon Sep 17 00:00:00 2001 From: Qiyuan Dong Date: Thu, 2 Oct 2025 11:39:37 +0000 Subject: [PATCH 1/2] Throw typed exception for Parquet footer error --- .../org/apache/parquet/hadoop/ParquetFileReader.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java index 551b1bf6c7..ee55909530 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java @@ -596,7 +596,7 @@ private static final ParquetMetadata readFooter( int FOOTER_LENGTH_SIZE = 4; if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC - throw new RuntimeException(filePath + " is not a Parquet file (length is too low: " + fileLen + ")"); + throw new ParquetDecodingException(filePath + " is not a Parquet file (length is too low: " + fileLen + ")"); } // Read footer length and magic string - with a single seek @@ -606,7 +606,7 @@ private static final ParquetMetadata readFooter( f.seek(fileMetadataLengthIndex); long readFileMetadataLength = readIntLittleEndian(f) & 0xFFFFFFFFL; if (readFileMetadataLength > Integer.MAX_VALUE) { - throw new RuntimeException("footer is too large: " + readFileMetadataLength + "to be read"); + throw new ParquetDecodingException("footer is too large: " + readFileMetadataLength + "to be read"); } int fileMetadataLength = (int) readFileMetadataLength; @@ -618,14 +618,15 @@ private static final ParquetMetadata readFooter( } else if (Arrays.equals(EFMAGIC, magic)) { encryptedFooterMode = true; } else { - throw new RuntimeException(filePath + " is not a Parquet file. Expected magic number at tail, but found " - + Arrays.toString(magic)); + throw new ParquetDecodingException( + filePath + " is not a Parquet file. Expected magic number at tail, but found " + Arrays.toString(magic)); } long fileMetadataIndex = fileMetadataLengthIndex - fileMetadataLength; LOG.debug("read footer length: {}, footer index: {}", fileMetadataLength, fileMetadataIndex); if (fileMetadataIndex < magic.length || fileMetadataIndex >= fileMetadataLengthIndex) { - throw new RuntimeException("corrupted file: the footer index is not within the file: " + fileMetadataIndex); + throw new ParquetDecodingException( + "corrupted file: the footer index is not within the file: " + fileMetadataIndex); } f.seek(fileMetadataIndex); From fbb67e41d080ea1c9fb20c2c780e780a9ce660a8 Mon Sep 17 00:00:00 2001 From: Qiyuan Dong Date: Thu, 2 Oct 2025 14:32:37 +0000 Subject: [PATCH 2/2] format --- .../org/apache/parquet/hadoop/ParquetFileReader.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java index ee55909530..aa3c9a8b54 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java @@ -596,7 +596,8 @@ private static final ParquetMetadata readFooter( int FOOTER_LENGTH_SIZE = 4; if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC - throw new ParquetDecodingException(filePath + " is not a Parquet file (length is too low: " + fileLen + ")"); + throw new ParquetDecodingException( + filePath + " is not a Parquet file (length is too low: " + fileLen + ")"); } // Read footer length and magic string - with a single seek @@ -618,15 +619,15 @@ private static final ParquetMetadata readFooter( } else if (Arrays.equals(EFMAGIC, magic)) { encryptedFooterMode = true; } else { - throw new ParquetDecodingException( - filePath + " is not a Parquet file. Expected magic number at tail, but found " + Arrays.toString(magic)); + throw new ParquetDecodingException(filePath + + " is not a Parquet file. Expected magic number at tail, but found " + Arrays.toString(magic)); } long fileMetadataIndex = fileMetadataLengthIndex - fileMetadataLength; LOG.debug("read footer length: {}, footer index: {}", fileMetadataLength, fileMetadataIndex); if (fileMetadataIndex < magic.length || fileMetadataIndex >= fileMetadataLengthIndex) { throw new ParquetDecodingException( - "corrupted file: the footer index is not within the file: " + fileMetadataIndex); + "corrupted file: the footer index is not within the file: " + fileMetadataIndex); } f.seek(fileMetadataIndex);