From ae266d8fd71009211302b4afa35b54912a77269b Mon Sep 17 00:00:00 2001 From: Revaa Rathore Date: Mon, 26 Jan 2026 10:43:23 +0530 Subject: [PATCH] feat: Add check-data command for fast, side-effect-free dataset validation. --- Atif/Gsoc-HealingStones/.gitignore | 3 + Atif/Gsoc-HealingStones/README.md | 2 + Atif/Gsoc-HealingStones/README_ONBOARDING.md | 32 ++++++++ Atif/Gsoc-HealingStones/batch_processor.py | 11 +++ Atif/Gsoc-HealingStones/cli_utils.py | 77 ++++++++++++++++++++ Atif/Gsoc-HealingStones/main_pipeline.py | 26 +++++++ 6 files changed, 151 insertions(+) create mode 100644 Atif/Gsoc-HealingStones/.gitignore create mode 100644 Atif/Gsoc-HealingStones/README_ONBOARDING.md create mode 100644 Atif/Gsoc-HealingStones/cli_utils.py diff --git a/Atif/Gsoc-HealingStones/.gitignore b/Atif/Gsoc-HealingStones/.gitignore new file mode 100644 index 0000000..b908d4c --- /dev/null +++ b/Atif/Gsoc-HealingStones/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.pyc +.DS_Store diff --git a/Atif/Gsoc-HealingStones/README.md b/Atif/Gsoc-HealingStones/README.md index 3b96ac2..b8fcb7a 100644 --- a/Atif/Gsoc-HealingStones/README.md +++ b/Atif/Gsoc-HealingStones/README.md @@ -1,3 +1,5 @@ +> šŸ“Œ New contributors: see README_ONBOARDING.md for setup and usage instructions. + # mayan_stele_readme # Mayan Stele Fragment Reconstruction System diff --git a/Atif/Gsoc-HealingStones/README_ONBOARDING.md b/Atif/Gsoc-HealingStones/README_ONBOARDING.md new file mode 100644 index 0000000..ac0fdd5 --- /dev/null +++ b/Atif/Gsoc-HealingStones/README_ONBOARDING.md @@ -0,0 +1,32 @@ +# Mayan Stele Reconstruction Pipeline - Onboarding + +## Running via CLI + +### Data Validation +Before running expensive processing steps, you can quickly validate your dataset structure: +```bash +python main_pipeline.py check-data +``` +> [!TIP] +> **check-data** is a fast, read-only check that ensures the directory is accessible and contains `.ply` files. Use it to verify your paths. +> +> **validate** (in `batch_processor.py`) is a heavy-duty check that analyzes the actual mesh quality (vertex counts, colors, etc.). + +### Reconstruction Pipeline +To run the main reconstruction pipeline: +```bash +python main_pipeline.py +``` + +### Batch Processor +The batch processor supports validation, preprocessing, and data checking: +```bash +# Check dataset structure +python batch_processor.py check-data + +# Validate PLY files (heavy processing) +python batch_processor.py validate + +# Preprocess PLY files +python batch_processor.py preprocess +``` diff --git a/Atif/Gsoc-HealingStones/batch_processor.py b/Atif/Gsoc-HealingStones/batch_processor.py index c0d5625..f9affe0 100644 --- a/Atif/Gsoc-HealingStones/batch_processor.py +++ b/Atif/Gsoc-HealingStones/batch_processor.py @@ -8,6 +8,7 @@ import argparse from typing import List, Dict, Tuple, Optional import cv2 +from cli_utils import validate_dataset, print_validation_report class PLYValidator: """ @@ -502,6 +503,11 @@ def main(): parser = argparse.ArgumentParser(description="PLY Preprocessing and Validation Tools") subparsers = parser.add_subparsers(dest='command', help='Available commands') + # Check-data command + check_parser = subparsers.add_parser('check-data', help='Validate dataset structure and inputs') + check_parser.add_argument('input', help='Directory containing PLY files to validate') + check_parser.add_argument('--output', help='Optional output directory to check writability') + # Validation command validate_parser = subparsers.add_parser('validate', help='Validate PLY files') validate_parser.add_argument('input', help='PLY file or directory to validate') @@ -519,6 +525,11 @@ def main(): args = parser.parse_args() + if args.command == 'check-data': + results, success = validate_dataset(args.input, args.output) + print_validation_report(results, "BATCH PROCESSOR DATA CHECK") + sys.exit(0 if success else 1) + if args.command == 'validate': validator = PLYValidator() diff --git a/Atif/Gsoc-HealingStones/cli_utils.py b/Atif/Gsoc-HealingStones/cli_utils.py new file mode 100644 index 0000000..87955ca --- /dev/null +++ b/Atif/Gsoc-HealingStones/cli_utils.py @@ -0,0 +1,77 @@ +import os +import glob +from pathlib import Path + +def validate_dataset(input_path, output_path=None): + """ + Validate dataset structure and inputs without side effects. + + Args: + input_path: Path to the directory containing .ply files + output_path: Optional path to an output directory + + Returns: + tuple: (results_dict, success_bool) + """ + ip = Path(input_path).resolve() + results = { + 'input_path': str(input_path), + 'exists': False, + 'is_dir': False, + 'ply_count': 0, + 'output_writable': True if output_path is None else False, + 'issues': [] + } + + # Check input path (strictly read-only) + if not ip.exists(): + results['issues'].append(f"Input path does not exist: {input_path}") + else: + results['exists'] = True + if not ip.is_dir(): + results['issues'].append(f"Input path is not a directory: {input_path}") + else: + results['is_dir'] = True + try: + ply_files = glob.glob(str(ip / "*.ply")) + results['ply_count'] = len(ply_files) + if results['ply_count'] == 0: + results['issues'].append(f"No .ply files found in: {input_path}") + except Exception as e: + results['issues'].append(f"Error accessing input directory: {e}") + + # Check output path writability (strictly read-only) + if output_path: + op = Path(output_path).resolve() + # Find first existing parent + curr = op + while not curr.exists() and curr != curr.parent: + curr = curr.parent + + if not os.access(curr, os.W_OK): + results['issues'].append(f"Output location is not writable: {curr}") + else: + results['output_writable'] = True + + success = len(results['issues']) == 0 + return results, success + +def print_validation_report(results, command_name="DATASET CHECK"): + """ + Print a standardized validation report summary. + """ + print(f"\nšŸ” {command_name}") + print("=" * 60) + print(f"Input Path: {results['input_path']}") + print(f"PLY Files: {results['ply_count']}") + + if results['issues']: + print("\nāŒ ISSUES DETECTED:") + for issue in results['issues']: + print(f" - {issue}") + print("\nRESULT: FAIL āŒ") + else: + print("\nRESULT: PASS āœ…") + print("Dataset is ready for processing.") + + print("=" * 60) diff --git a/Atif/Gsoc-HealingStones/main_pipeline.py b/Atif/Gsoc-HealingStones/main_pipeline.py index 151706d..353a2b0 100644 --- a/Atif/Gsoc-HealingStones/main_pipeline.py +++ b/Atif/Gsoc-HealingStones/main_pipeline.py @@ -26,6 +26,7 @@ from surface_matcher import SurfaceMatcher from fragment_aligner import FragmentAligner from reconstruction_visualizer import ReconstructionVisualizer +from cli_utils import validate_dataset, print_validation_report class ReconstructionPipeline: """ @@ -904,6 +905,31 @@ def main(): description="Mayan Stele Fragment Reconstruction Pipeline" ) + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # Check-data command + check_parser = subparsers.add_parser("check-data", help="Validate dataset structure and inputs") + check_parser.add_argument("input_dir", help="Directory containing PLY files to validate") + check_parser.add_argument("--output_dir", help="Optional output directory to check writability") + + # To maintain backward compatibility, we can't easily use subparsers for the main run + # if it doesn't have a keyword. However, we can handle it manually or by + # checking if the first argument is a command. + + # Re-adding the original arguments as a "run" command or handling them if no command is given + # But clean CLI usually uses subcommands. Let's check for "check-data" specifically. + + if len(sys.argv) > 1 and sys.argv[1] == "check-data": + args = parser.parse_args() + results, success = validate_dataset(args.input_dir, args.output_dir) + print_validation_report(results, "MAIN PIPELINE DATA CHECK") + sys.exit(0 if success else 1) + + # If not check-data, use the original parser logic + parser = argparse.ArgumentParser( + description="Mayan Stele Fragment Reconstruction Pipeline" + ) + parser.add_argument( "input_dir", help="Directory containing PLY files with colored break surfaces"