Skip to content

Commit 00b25fa

Browse files
authored
Merge pull request #692 from onekey-sec/ui-no-extract-report
Improve unblob "skip-extraction" mode of operation
2 parents bcbf49e + d1b6ccc commit 00b25fa

File tree

4 files changed

+132
-6
lines changed

4 files changed

+132
-6
lines changed

tests/test_cli.py

+34
Original file line numberDiff line numberDiff line change
@@ -333,3 +333,37 @@ def test_skip_extension(
333333
result = runner.invoke(unblob.cli.cli, params)
334334
assert extracted_files_count == len(list(tmp_path.rglob("*")))
335335
assert result.exit_code == 0
336+
337+
338+
@pytest.mark.parametrize(
339+
"args, skip_extraction, fail_message",
340+
[
341+
([], False, "Should *NOT* have skipped extraction"),
342+
(["-s"], True, "Should have skipped extraction"),
343+
(["--skip-extraction"], True, "Should have skipped extraction"),
344+
],
345+
)
346+
def test_skip_extraction(
347+
args: List[str], skip_extraction: bool, fail_message: str, tmp_path: Path
348+
):
349+
runner = CliRunner()
350+
in_path = (
351+
Path(__file__).parent
352+
/ "integration"
353+
/ "archive"
354+
/ "zip"
355+
/ "regular"
356+
/ "__input__"
357+
/ "apple.zip"
358+
)
359+
params = [*args, "--extract-dir", str(tmp_path), str(in_path)]
360+
361+
process_file_mock = mock.MagicMock()
362+
with mock.patch.object(unblob.cli, "process_file", process_file_mock):
363+
result = runner.invoke(unblob.cli.cli, params)
364+
365+
assert result.exit_code == 0
366+
process_file_mock.assert_called_once()
367+
assert (
368+
process_file_mock.call_args.args[0].skip_extraction == skip_extraction
369+
), fail_message

tests/test_processing.py

+31
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,37 @@ def get_all(file_name, report_type: Type[ReportType]) -> List[ReportType]:
447447
)
448448

449449

450+
@pytest.mark.parametrize(
451+
"skip_extraction, file_count, extracted_file_count",
452+
[
453+
(True, 5, 0),
454+
(False, 5, 6),
455+
],
456+
)
457+
def test_skip_extraction(
458+
skip_extraction: bool,
459+
file_count: int,
460+
extracted_file_count: int,
461+
tmp_path: Path,
462+
extraction_config: ExtractionConfig,
463+
):
464+
input_file = tmp_path / "input"
465+
with zipfile.ZipFile(input_file, "w") as zf:
466+
for i in range(file_count):
467+
zf.writestr(f"file{i}", data=b"This is a test file.")
468+
469+
extraction_config.extract_root = tmp_path / "output"
470+
extraction_config.skip_extraction = skip_extraction
471+
472+
process_result = process_file(extraction_config, input_file)
473+
task_result_by_path = {r.task.path: r for r in process_result.results}
474+
475+
assert len(task_result_by_path) == extracted_file_count + 1
476+
assert (
477+
len(list(extraction_config.extract_root.rglob("**/*"))) == extracted_file_count
478+
)
479+
480+
450481
class ConcatenateExtractor(DirectoryExtractor):
451482
def extract(self, paths: List[Path], outdir: Path):
452483
outfile = outdir / "data"

unblob/cli.py

+56-3
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,18 @@
88
import click
99
from rich.console import Console
1010
from rich.panel import Panel
11+
from rich.style import Style
1112
from rich.table import Table
1213
from structlog import get_logger
1314

1415
from unblob.models import DirectoryHandlers, Handlers, ProcessResult
1516
from unblob.plugins import UnblobPluginManager
16-
from unblob.report import ChunkReport, Severity, StatReport, UnknownChunkReport
17+
from unblob.report import (
18+
ChunkReport,
19+
Severity,
20+
StatReport,
21+
UnknownChunkReport,
22+
)
1723

1824
from .cli_options import verbosity_option
1925
from .dependencies import get_dependencies, pretty_format_dependencies
@@ -200,7 +206,7 @@ def __init__(
200206
)
201207
@click.option(
202208
"-s",
203-
"--skip_extraction",
209+
"--skip-extraction",
204210
"skip_extraction",
205211
is_flag=True,
206212
show_default=True,
@@ -279,7 +285,10 @@ def cli(
279285
logger.info("Start processing file", file=file)
280286
process_results = process_file(config, file, report_file)
281287
if verbose == 0:
282-
print_report(process_results)
288+
if skip_extraction:
289+
print_scan_report(process_results)
290+
else:
291+
print_report(process_results)
283292
return process_results
284293

285294

@@ -349,6 +358,50 @@ def get_size_report(task_results: List) -> Tuple[int, int, int, int]:
349358
return total_files, total_dirs, total_links, extracted_size
350359

351360

361+
def print_scan_report(reports: ProcessResult):
362+
console = Console(stderr=True)
363+
364+
chunks_offset_table = Table(
365+
expand=False,
366+
show_lines=True,
367+
show_edge=True,
368+
style=Style(color="white"),
369+
header_style=Style(color="white"),
370+
row_styles=[Style(color="red")],
371+
)
372+
chunks_offset_table.add_column("Start offset")
373+
chunks_offset_table.add_column("End offset")
374+
chunks_offset_table.add_column("Size")
375+
chunks_offset_table.add_column("Description")
376+
377+
for task_result in reports.results:
378+
chunk_reports = [
379+
report
380+
for report in task_result.reports
381+
if isinstance(report, (ChunkReport, UnknownChunkReport))
382+
]
383+
chunk_reports.sort(key=lambda x: x.start_offset)
384+
385+
for chunk_report in chunk_reports:
386+
if isinstance(chunk_report, ChunkReport):
387+
chunks_offset_table.add_row(
388+
f"{chunk_report.start_offset:0d}",
389+
f"{chunk_report.end_offset:0d}",
390+
human_size(chunk_report.size),
391+
chunk_report.handler_name,
392+
style=Style(color="#00FFC8"),
393+
)
394+
if isinstance(chunk_report, UnknownChunkReport):
395+
chunks_offset_table.add_row(
396+
f"{chunk_report.start_offset:0d}",
397+
f"{chunk_report.end_offset:0d}",
398+
human_size(chunk_report.size),
399+
"unknown",
400+
style=Style(color="#008ED5"),
401+
)
402+
console.print(chunks_offset_table)
403+
404+
352405
def print_report(reports: ProcessResult):
353406
total_files, total_dirs, total_links, extracted_size = get_size_report(
354407
reports.results

unblob/processing.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,9 @@ def process_file(
136136

137137
process_result = _process_task(config, task)
138138

139-
# ensure that the root extraction directory is created even for empty extractions
140-
extract_dir.mkdir(parents=True, exist_ok=True)
139+
if not config.skip_extraction:
140+
# ensure that the root extraction directory is created even for empty extractions
141+
extract_dir.mkdir(parents=True, exist_ok=True)
141142

142143
if report_file:
143144
write_json_report(report_file, process_result)
@@ -475,7 +476,7 @@ def __init__(
475476
def process(self):
476477
logger.debug("Processing file", path=self.task.path, size=self.size)
477478

478-
if self.carve_dir.exists():
479+
if self.carve_dir.exists() and not self.config.skip_extraction:
479480
# Extraction directory is not supposed to exist, it is usually a simple mistake of running
480481
# unblob again without cleaning up or using --force.
481482
# It would cause problems continuing, as it would mix up original and extracted files,
@@ -515,6 +516,13 @@ def _process_chunks(
515516
if unknown_chunks:
516517
logger.warning("Found unknown Chunks", chunks=unknown_chunks)
517518

519+
if self.config.skip_extraction:
520+
for chunk in unknown_chunks:
521+
self.result.add_report(chunk.as_report(entropy=None))
522+
for chunk in outer_chunks:
523+
self.result.add_report(chunk.as_report(extraction_reports=[]))
524+
return
525+
518526
for chunk in unknown_chunks:
519527
carved_unknown_path = carve_unknown_chunk(self.carve_dir, file, chunk)
520528
entropy = self._calculate_entropy(carved_unknown_path)

0 commit comments

Comments
 (0)