From c7fefc367f992ee54ad08c8115d929f5ca4c9ba1 Mon Sep 17 00:00:00 2001 From: Socrates Date: Fri, 13 Dec 2024 09:57:41 +0800 Subject: [PATCH] [fix](hive) fix block decompressor bug (#45289) ### What problem does this PR solve? Problem Summary: In the block decompressor, when it is found that the input data is less than 4 bytes (the header size of the large block), should set more_input_bytes instead of reporting an error. --- be/src/exec/decompressor.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/be/src/exec/decompressor.cpp b/be/src/exec/decompressor.cpp index 9365bb00288db1..5da2e6acbb9bdf 100644 --- a/be/src/exec/decompressor.cpp +++ b/be/src/exec/decompressor.cpp @@ -492,15 +492,15 @@ Status Lz4BlockDecompressor::decompress(uint8_t* input, size_t input_len, size_t auto* output_ptr = output; while (input_len > 0) { - //if faild , fall back to large block begin - auto* large_block_input_ptr = input_ptr; - auto* large_block_output_ptr = output_ptr; - if (input_len < sizeof(uint32_t)) { - return Status::InvalidArgument(strings::Substitute( - "fail to do hadoop-lz4 decompress, input_len=$0", input_len)); + *more_input_bytes = sizeof(uint32_t) - input_len; + break; } + //if faild, fall back to large block begin + auto* large_block_input_ptr = input_ptr; + auto* large_block_output_ptr = output_ptr; + uint32_t remaining_decompressed_large_block_len = BigEndian::Load32(input_ptr); input_ptr += sizeof(uint32_t); @@ -609,15 +609,15 @@ Status SnappyBlockDecompressor::decompress(uint8_t* input, size_t input_len, auto* output_ptr = output; while (input_len > 0) { - //if faild , fall back to large block begin - auto* large_block_input_ptr = input_ptr; - auto* large_block_output_ptr = output_ptr; - if (input_len < sizeof(uint32_t)) { - return Status::InvalidArgument(strings::Substitute( - "fail to do hadoop-snappy decompress, input_len=$0", input_len)); + *more_input_bytes = sizeof(uint32_t) - input_len; + break; } + //if faild, fall back to large block begin + auto* large_block_input_ptr = input_ptr; + auto* large_block_output_ptr = output_ptr; + uint32_t remaining_decompressed_large_block_len = BigEndian::Load32(input_ptr); input_ptr += sizeof(uint32_t);