From 9535bb24a797a1bff0830477a1ba0c4082dff014 Mon Sep 17 00:00:00 2001 From: vividf Date: Fri, 26 Sep 2025 16:54:38 +0900 Subject: [PATCH 01/19] feat: add onnx and tensorrt evaluation and int8 optimization Signed-off-by: vividf --- .../DockerfileOpt | 38 + .../configs/deploy/resnet18_5ch.py | 6 +- .../deploy/main.py | 113 ++- tools/calibration_classification/README.md | 72 +- .../evaluate_inference.py | 667 ++++++++++++++++++ .../visualize_lidar_camera_projection.py | 197 +++++- 6 files changed, 1033 insertions(+), 60 deletions(-) create mode 100644 projects/CalibrationStatusClassification/DockerfileOpt create mode 100644 tools/calibration_classification/evaluate_inference.py diff --git a/projects/CalibrationStatusClassification/DockerfileOpt b/projects/CalibrationStatusClassification/DockerfileOpt new file mode 100644 index 00000000..e0510174 --- /dev/null +++ b/projects/CalibrationStatusClassification/DockerfileOpt @@ -0,0 +1,38 @@ + +# Use an NVIDIA CUDA base image with Python +FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04 + +# Prevents prompts during apt installs +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +# Update system and install basic dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-dev python3-pip \ + build-essential git curl ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Upgrade pip +RUN python3 -m pip install --upgrade pip setuptools wheel + + +RUN pip3 install --no-cache-dir \ + "numpy<2" \ + "protobuf<5" \ + "onnx==1.16.2" \ + "onnxruntime==1.23.0" \ + "ml_dtypes==0.5.3" + +# Install nvidia-modelopt from NVIDIA PyPI +# RUN pip3 install --no-cache-dir --extra-index-url https://pypi.nvidia.com nvidia-modelopt +RUN pip3 install --no-cache-dir --extra-index-url https://pypi.nvidia.com \ + onnx-graphsurgeon \ + nvidia-modelopt==0.33.1 + + +# Set working directory +WORKDIR /workspace + +# Default command +CMD ["/bin/bash"] diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py index 0318272b..a32b3823 100644 --- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py @@ -2,7 +2,11 @@ backend_config = dict( type="tensorrt", - common_config=dict(max_workspace_size=1 << 30), + common_config=dict( + max_workspace_size=1 << 30, + fp16_mode=True, + strongly_typed=False, + ), model_inputs=[ dict( input_shapes=dict( diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index cef136d7..8a19b20a 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -179,6 +179,7 @@ def tensorrt_settings(self) -> Dict: "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE), "fp16_mode": common_config.get("fp16_mode", False), "model_inputs": self.backend_config.get("model_inputs", []), + "strongly_typed": common_config.get("strongly_typed", False), } @@ -197,6 +198,11 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--device", default="cpu", help="device for conversion") parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="logging level") parser.add_argument("--verify", action="store_true", help="verify model outputs") + parser.add_argument("--onnx", action="store_true", help="export to ONNX format") + parser.add_argument("--tensorrt", action="store_true", help="export to TensorRT format") + parser.add_argument( + "--onnx-file", help="path to existing ONNX file (required when --tensorrt is specified without --onnx)" + ) return parser.parse_args() @@ -267,7 +273,17 @@ def export_to_tensorrt( builder = trt.Builder(TRT_LOGGER) # Create network and config - network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + strongly_typed = settings.get("strongly_typed", False) + + if strongly_typed: + flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) | ( + 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) + ) + network = builder.create_network(flags) + logger.info("Using strongly typed TensorRT network creation") + else: + network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + logger.info("Using standard TensorRT network creation (EXPLICIT_BATCH only)") config_trt = builder.create_builder_config() config_trt.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"]) @@ -563,6 +579,18 @@ def main(): args = parse_args() logger = setup_logging(args.log_level) + # Check if at least one export format is specified + if not args.onnx and not args.tensorrt: + logger.error("Please specify at least one export format: --onnx or --tensorrt") + return + + # Check if TensorRT is requested but no ONNX file is provided and ONNX export is not requested + if args.tensorrt and not args.onnx and not args.onnx_file: + logger.error( + "TensorRT export requires either --onnx (to export ONNX first) or --onnx-file (to use existing ONNX file)" + ) + return + # Setup mmengine.mkdir_or_exist(osp.abspath(args.work_dir)) @@ -575,12 +603,35 @@ def main(): model_cfg = Config.fromfile(args.model_cfg) # Setup file paths - onnx_settings = config.onnx_settings - onnx_path = osp.join(args.work_dir, onnx_settings["save_file"]) - trt_path: Optional[str] = None - trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") - trt_path = osp.join(args.work_dir, trt_file) - + onnx_path = None + trt_path = None + trt_file = None + + if args.onnx: + onnx_settings = config.onnx_settings + onnx_path = osp.join(args.work_dir, onnx_settings["save_file"]) + + if args.tensorrt: + if args.onnx_file: + # Use provided ONNX file path + onnx_path = args.onnx_file + if not osp.exists(onnx_path): + logger.error(f"Provided ONNX file does not exist: {onnx_path}") + return + logger.info(f"Using existing ONNX file: {onnx_path}") + elif args.onnx: + # ONNX will be exported first, use the path from config + onnx_settings = config.onnx_settings + onnx_path = osp.join(args.work_dir, onnx_settings["save_file"]) + else: + # This should not happen due to validation above, but just in case + logger.error("No ONNX file path available for TensorRT conversion") + return + + # Set TensorRT output path + onnx_settings = config.onnx_settings + trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") + trt_path = osp.join(args.work_dir, trt_file) # Load model logger.info(f"Loading model from checkpoint: {args.checkpoint}") device = torch.device(args.device) @@ -596,22 +647,23 @@ def main(): ) # Export models - export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger) - - logger.info("Converting ONNX to TensorRT...") - - # Ensure CUDA device for TensorRT - if args.device == "cpu": - logger.warning("TensorRT requires CUDA device, switching to cuda") - device = torch.device("cuda") - input_tensor_calibrated = input_tensor_calibrated.to(device) - input_tensor_miscalibrated = input_tensor_miscalibrated.to(device) - - success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger) - if success: - logger.info(f"TensorRT conversion successful: {trt_path}") - else: - logger.error("TensorRT conversion failed, keeping ONNX model") + if args.onnx: + export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger) + if args.tensorrt: + logger.info("Converting ONNX to TensorRT...") + + # Ensure CUDA device for TensorRT + if args.device == "cpu": + logger.warning("TensorRT requires CUDA device, switching to cuda") + device = torch.device("cuda") + input_tensor_calibrated = input_tensor_calibrated.to(device) + input_tensor_miscalibrated = input_tensor_miscalibrated.to(device) + + success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger) + if success: + logger.info(f"TensorRT conversion successful: {trt_path}") + else: + logger.error("TensorRT conversion failed") # Run verification if requested if args.verify: @@ -619,10 +671,23 @@ def main(): "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..." ) input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated} - run_verification(model, onnx_path, trt_path, input_tensors, logger) + # Only verify formats that were exported + onnx_path_for_verify = onnx_path if (args.onnx or (args.tensorrt and args.onnx_file)) else None + trt_path_for_verify = trt_path if args.tensorrt else None + + run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger) logger.info("Deployment completed successfully!") + # Log what was exported + exported_formats = [] + if args.onnx: + exported_formats.append("ONNX") + if args.tensorrt: + exported_formats.append("TensorRT") + + logger.info(f"Exported formats: {', '.join(exported_formats)}") + # TODO: make deployment script inherit from awml base deploy script or use awml deploy script directly if __name__ == "__main__": diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index 33fffda8..11126fdf 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -223,7 +223,7 @@ tensorboard --logdir work_dirs --bind_all ``` ## 5. Analyze -### 5.1. Evaluation +### 5.1. Evaluation Pytorch - Evaluation @@ -234,3 +234,73 @@ python tools/calibration_classification/test.py {config_file} {checkpoint_file} ```sh python tools/calibration_classification/test.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py epoch_25.pth --out {output_file} ``` + + + +## 6. Deployment +This directory contains scripts for evaluating calibration classification models in different formats. + +### 6.1 Convert to ONNX and TensorRT +```sh +python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt +``` + +### 6.1.1 Convert to ONNX +```sh +python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx +``` + +### 6.1.2 Convert to TensorRT +```sh +python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --tensorrt --onnx-file model.onnx +``` + + + + +### 6.2 Evaluate ONNX and tensorrt + +##### ONNX Model Evaluation +```bash +python tools/calibration_classification/evaluate_inference.py --onnx work_dirs/end2end.onnx --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl +``` + +##### TensorRT Model Evaluation +```bash +python tools/calibration_classification/evaluate_inference.py --tensorrt work_dirs/end2end.quant.engine --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl +``` + + + + +### 6.3 INT8 Optimization + +Set the number of image you want for calibration. Note that you need to consider the size of your memory. +For 32 GB, the most you can do is 1860 x 2880 x 5 x 4 Bytes / 32 GB + +Thus, please limit the calibration data with the indices parameters + +```python +python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200 +``` + + + +```python +DOCKER_BUILDKIT=1 docker build -t autoware-ml-calib-opt -f projects/CalibrationStatusClassification/DockerfileOpt . +docker run -it --rm --gpus all --shm-size=32g --name awml-opt -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-calib-opt +# Access the optimization docker +python3 -m modelopt.onnx.quantization --onnx_path=end2end.onnx --quantize_mode=int8 --calibration_data_path=calibration_file.npz + +# After getting the end2en2.quant.onnx, you can evaluate with quant onnx or engine +``` + + + +#### Command Line Arguments + +- `--onnx`: Path to ONNX model file (mutually exclusive with `--tensorrt`) +- `--tensorrt`: Path to TensorRT engine file (mutually exclusive with `--onnx`) +- `--model-cfg`: Path to model config file (default: `projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py`) +- `--device`: Device to use for inference (`cpu` or `cuda`, default: `cpu`) +- `--log-level`: Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, default: `INFO`) diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py new file mode 100644 index 00000000..b367e0d6 --- /dev/null +++ b/tools/calibration_classification/evaluate_inference.py @@ -0,0 +1,667 @@ +#!/usr/bin/env python3 +""" +Integrated model evaluation script for calibration classification. +Supports both ONNX and TensorRT inference modes. +Simplified version that directly uses info.pkl files instead of dataset. +""" + +import argparse +import gc +import logging +import os +import pickle +import signal +import sys +import time +from typing import Any, Dict, Optional, Tuple + +import numpy as np +import torch +import torch.nn.functional as F +from mmengine.config import Config + +# Now we can import the class directly +from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import ( + CalibrationClassificationTransform, +) + +# TensorRT imports (only if needed) - match original test_tensorrt.py +try: + import pycuda.autoinit + import pycuda.driver as cuda + import tensorrt as trt + + TENSORRT_AVAILABLE = True +except ImportError: + TENSORRT_AVAILABLE = False + +# Constants +LABELS = {"0": "miscalibrated", "1": "calibrated"} + + +def signal_handler(signum, frame): + """Handle segmentation faults and other signals gracefully.""" + print(f"\nReceived signal {signum}. Cleaning up...") + try: + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + print("Cleanup completed.") + except: + pass + sys.exit(1) + + +# Register signal handlers +signal.signal(signal.SIGSEGV, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + + +def setup_logging(level: str = "INFO") -> logging.Logger: + """Setup logging configuration.""" + logging.basicConfig(level=getattr(logging, level.upper()), format="%(asctime)s - %(levelname)s - %(message)s") + return logging.getLogger(__name__) + + +def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]: + """ + Load a single sample from info.pkl file. + Args: + info_pkl_path: Path to the info.pkl file + sample_idx: Index of the sample to load (default: 0) + Returns: + Sample dictionary with the required structure for CalibrationClassificationTransform + Raises: + FileNotFoundError: If info.pkl file doesn't exist + ValueError: If data format is unexpected or sample index is invalid + """ + if not os.path.exists(info_pkl_path): + raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}") + + try: + with open(info_pkl_path, "rb") as f: + info_data = pickle.load(f) + except Exception as e: + raise ValueError(f"Failed to load info.pkl file: {e}") + + # Extract samples from info.pkl + if isinstance(info_data, dict): + if "data_list" in info_data: + samples_list = info_data["data_list"] + else: + raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}") + else: + raise ValueError(f"Expected dict format, got {type(info_data)}") + + if not samples_list: + raise ValueError("No samples found in info.pkl") + + if sample_idx >= len(samples_list): + raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})") + + sample = samples_list[sample_idx] + + # Validate sample structure + required_keys = ["image", "lidar_points"] + if not all(key in sample for key in required_keys): + raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}") + + return sample + + +def load_sample_data_from_info_pkl( + info_pkl_path: str, + model_cfg: Config, + sample_idx: int = 0, + force_generate_miscalibration: bool = False, + device: str = "cpu", + transform_test: Optional[CalibrationClassificationTransform] = None, +) -> torch.Tensor: + """ + Load and preprocess sample data from info.pkl using CalibrationClassificationTransform. + Args: + info_pkl_path: Path to the info.pkl file + model_cfg: Model configuration containing data_root setting + sample_idx: Index of the sample to load (default: 0) + force_generate_miscalibration: Whether to force generation of miscalibration + device: Device to load tensor on + transform_test: Pre-created test transform instance (optional) + Returns: + Preprocessed tensor ready for model inference + """ + # Load sample data from info.pkl + sample_data = load_info_pkl_data(info_pkl_path, sample_idx) + + # Get data_root from model config + data_root = model_cfg.get("data_root", None) + if data_root is None: + raise ValueError("data_root not found in model configuration") + + # Use pre-created transform or create new one (always use test mode) + if transform_test is None: + transform = CalibrationClassificationTransform( + transform_config=model_cfg.get("transform_config", None), + mode="test", + data_root=data_root, + projection_vis_dir=None, + results_vis_dir=None, + enable_augmentation=False, + ) + else: + transform = transform_test + + # Apply transform with miscalibration control + results = transform.transform(sample_data, force_generate_miscalibration=force_generate_miscalibration) + input_data_processed = results["fused_img"] # (H, W, 5) + + # Convert to tensor + input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float() # (5, H, W) + input_tensor = input_tensor.unsqueeze(0).to(device) # (1, 5, H, W) + + return input_tensor + + +def create_test_transform(model_cfg: Config) -> CalibrationClassificationTransform: + """Create test transform instance once for reuse.""" + data_root = model_cfg.get("data_root", None) + if data_root is None: + raise ValueError("data_root not found in model configuration") + + transform_test = CalibrationClassificationTransform( + transform_config=model_cfg.get("transform_config", None), + mode="test", + data_root=data_root, + projection_vis_dir=None, + results_vis_dir=None, + enable_augmentation=False, + ) + + return transform_test + + +def run_onnx_inference( + onnx_path: str, + input_tensor: torch.Tensor, + logger: logging.Logger, +) -> Tuple[np.ndarray, float]: + """Run ONNX inference directly and return output and latency.""" + try: + import onnxruntime as ort + except ImportError: + raise ImportError( + "onnxruntime is required for ONNX inference. Please install it with: pip install onnxruntime" + ) + + # Clear GPU cache if available + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + # Convert input tensor to float32 + input_tensor = input_tensor.float() + + # Debug: Print input tensor info before preprocessing + logger.debug( + f"Input tensor before preprocessing - Shape: {input_tensor.shape}, Dtype: {input_tensor.dtype}, Min: {input_tensor.min():.4f}, Max: {input_tensor.max():.4f}" + ) + + # Add batch dimension if needed (ONNX expects 4D input: batch, channels, height, width) + if input_tensor.dim() == 3: + input_tensor = input_tensor.unsqueeze(0) # Add batch dimension + logger.debug(f"Added batch dimension - Shape: {input_tensor.shape}") + + # ONNX inference with timing + providers = ["CPUExecutionProvider"] + ort_session = ort.InferenceSession(onnx_path, providers=providers) + + # Debug: Print ONNX model input info + input_name = ort_session.get_inputs()[0].name + input_shape = ort_session.get_inputs()[0].shape + input_type = ort_session.get_inputs()[0].type + logger.debug(f"ONNX model expects - Input name: {input_name}, Shape: {input_shape}, Type: {input_type}") + + onnx_input = {input_name: input_tensor.cpu().numpy().astype(np.float32)} + + start_time = time.perf_counter() + onnx_output = ort_session.run(None, onnx_input)[0] + end_time = time.perf_counter() + onnx_latency = (end_time - start_time) * 1000 + + logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms") + + # Ensure onnx_output is numpy array + if not isinstance(onnx_output, np.ndarray): + logger.error(f"Unexpected ONNX output type: {type(onnx_output)}") + return None, 0.0 + + return onnx_output, onnx_latency + + +def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]: + """Run TensorRT inference and return output with timing.""" + if not TENSORRT_AVAILABLE: + raise ImportError("TensorRT and PyCUDA are required for TensorRT inference. Please install them.") + + context = None + stream = None + start = None + end = None + d_input = None + d_output = None + + try: + context = engine.create_execution_context() + stream = cuda.Stream() + start = cuda.Event() + end = cuda.Event() + + # Get tensor names and shapes + input_name, output_name = None, None + for i in range(engine.num_io_tensors): + tensor_name = engine.get_tensor_name(i) + if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT: + input_name = tensor_name + elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT: + output_name = tensor_name + + if input_name is None or output_name is None: + raise RuntimeError("Could not find input/output tensor names") + + # Prepare arrays + input_np = input_tensor.numpy().astype(np.float32) + if not input_np.flags["C_CONTIGUOUS"]: + input_np = np.ascontiguousarray(input_np) + + # Validate input shape + expected_shape = engine.get_tensor_shape(input_name) + if input_np.shape != tuple(expected_shape): + logger.warning(f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}") + # Try to reshape if possible + if len(input_np.shape) == len(expected_shape): + try: + input_np = input_np.reshape(expected_shape) + logger.info(f"Reshaped input to {input_np.shape}") + except Exception as e: + raise RuntimeError(f"Cannot reshape input from {input_np.shape} to {expected_shape}: {e}") + elif len(input_np.shape) == len(expected_shape) - 1 and expected_shape[0] == 1: + # Add batch dimension if missing + try: + input_np = input_np.reshape(expected_shape) + logger.info(f"Added batch dimension to input: {input_np.shape}") + except Exception as e: + raise RuntimeError( + f"Cannot add batch dimension to input from {input_np.shape} to {expected_shape}: {e}" + ) + else: + raise RuntimeError( + f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}. Please ensure input has correct batch dimension." + ) + + context.set_input_shape(input_name, input_np.shape) + output_shape = context.get_tensor_shape(output_name) + output_np = np.empty(output_shape, dtype=np.float32) + if not output_np.flags["C_CONTIGUOUS"]: + output_np = np.ascontiguousarray(output_np) + + # Allocate GPU memory + d_input = cuda.mem_alloc(input_np.nbytes) + d_output = cuda.mem_alloc(output_np.nbytes) + + # Set tensor addresses + context.set_tensor_address(input_name, int(d_input)) + context.set_tensor_address(output_name, int(d_output)) + + # Run inference with timing + cuda.memcpy_htod_async(d_input, input_np, stream) + start.record(stream) + context.execute_async_v3(stream_handle=stream.handle) + end.record(stream) + cuda.memcpy_dtoh_async(output_np, d_output, stream) + stream.synchronize() + + latency = end.time_since(start) + return output_np, latency + + except Exception as e: + logger.error(f"TensorRT inference failed: {e}") + raise + finally: + # Cleanup with better error handling + try: + if d_input is not None: + d_input.free() + except Exception as e: + logger.warning(f"Failed to free input memory: {e}") + + try: + if d_output is not None: + d_output.free() + except Exception as e: + logger.warning(f"Failed to free output memory: {e}") + + # Note: Don't try to free stream, start, end, or context as they are managed by TensorRT + # and may cause issues if freed manually + + +def load_tensorrt_engine(engine_path: str, logger: logging.Logger): + """Load TensorRT engine from file.""" + if not TENSORRT_AVAILABLE: + raise ImportError("TensorRT is required for TensorRT inference. Please install it.") + + trt_logger = trt.Logger(trt.Logger.WARNING) + + try: + with open(engine_path, "rb") as f: + engine_bytes = f.read() + engine = trt.Runtime(trt_logger).deserialize_cuda_engine(engine_bytes) + if engine is None: + raise RuntimeError("Failed to deserialize TensorRT engine") + return engine + except Exception as e: + logger.error(f"Error loading TensorRT engine: {e}") + logger.error("This might be due to TensorRT version incompatibility.") + logger.error("Please rebuild the TensorRT engine with the current TensorRT version.") + raise + + +def evaluate_model( + model_path: str, + model_type: str, + model_cfg_path: str, + info_pkl_path: str, + logger: logging.Logger, + device: str = "cpu", + num_samples: int = 10, + verbose: bool = False, +) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Evaluate model using info.pkl data and return predictions, ground truth, probabilities, and latencies.""" + + # Load model config + model_cfg = Config.fromfile(model_cfg_path) + + # Load info.pkl data + try: + with open(info_pkl_path, "rb") as f: + info_data = pickle.load(f) + + if "data_list" not in info_data: + raise ValueError("Expected 'data_list' key in info.pkl") + + samples_list = info_data["data_list"] + logger.info(f"Loaded {len(samples_list)} samples from info.pkl") + + # Limit number of samples for evaluation + num_samples = min(num_samples, len(samples_list)) + logger.info(f"Evaluating {num_samples} samples") + + except Exception as e: + logger.error(f"Failed to load info.pkl: {e}") + raise + + # Create transform instances once + transform_test = create_test_transform(model_cfg) + + # Initialize inference engine + if model_type == "onnx": + logger.info(f"Using ONNX model: {model_path}") + inference_func = lambda input_tensor: run_onnx_inference(model_path, input_tensor, logger) + elif model_type == "tensorrt": + logger.info(f"Using TensorRT model: {model_path}") + engine = load_tensorrt_engine(model_path, logger) + inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor, logger) + else: + raise ValueError(f"Unsupported model type: {model_type}") + + # Lists to store results + all_predictions = [] + all_ground_truth = [] + all_probabilities = [] + all_latencies = [] + + # Evaluate samples + for sample_idx in range(num_samples): + if sample_idx % 5 == 0: + logger.info(f"Processing sample {sample_idx + 1}/{num_samples}") + + try: + # Load sample data directly from info.pkl + input_tensor_calibrated = load_sample_data_from_info_pkl( + info_pkl_path, + model_cfg, + sample_idx, + force_generate_miscalibration=False, + device=device, + transform_test=transform_test, + ) + input_tensor_miscalibrated = load_sample_data_from_info_pkl( + info_pkl_path, + model_cfg, + sample_idx, + force_generate_miscalibration=True, + device=device, + transform_test=transform_test, + ) + + # Test both calibrated and miscalibrated samples + test_samples = [ + (input_tensor_calibrated, 1), # calibrated sample + (input_tensor_miscalibrated, 0), # miscalibrated sample + ] + + for input_tensor, gt_label in test_samples: + # Debug: Print input tensor info only in verbose mode + if verbose: + logger.info(f"Sample {sample_idx + 1} (GT={gt_label}) input tensor:") + logger.info(f" Dtype: {input_tensor.dtype}") + + # Run inference + output_np, latency = inference_func(input_tensor) + + if output_np is None: + logger.error(f"Failed to get output for sample {sample_idx}") + continue + + # Convert logits to probabilities + logits = torch.from_numpy(output_np) + probabilities = F.softmax(logits, dim=-1) + predicted_class = torch.argmax(probabilities, dim=-1).item() + confidence = probabilities.max().item() + + # Store results + all_predictions.append(predicted_class) + all_ground_truth.append(gt_label) + all_probabilities.append(probabilities.cpu().numpy()) + all_latencies.append(latency) + + # Print sample results only in verbose mode + if verbose: + logger.info( + f"Sample {sample_idx + 1} (GT={gt_label}): Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms" + ) + + # Clear GPU memory periodically for TensorRT + if model_type == "tensorrt" and sample_idx % 10 == 0: + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + except Exception as e: + logger.error(f"Error processing sample {sample_idx}: {e}") + continue + + return ( + np.array(all_predictions), + np.array(all_ground_truth), + np.array(all_probabilities), + np.array(all_latencies), + ) + + +def print_results( + all_predictions: np.ndarray, + all_ground_truth: np.ndarray, + all_probabilities: np.ndarray, + all_latencies: np.ndarray, + model_type: str, + logger: logging.Logger, +): + """Print evaluation results.""" + if len(all_predictions) == 0: + logger.error("No samples were processed successfully.") + return + + correct_predictions = (all_predictions == all_ground_truth).sum() + total_samples = len(all_predictions) + accuracy = correct_predictions / total_samples + avg_latency = np.mean(all_latencies) + + # Print results + logger.info(f"\n{'='*50}") + logger.info(f"{model_type.upper()} Model Evaluation Results") + logger.info(f"{'='*50}") + logger.info(f"Total samples: {total_samples}") + logger.info(f"Correct predictions: {correct_predictions}") + logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") + logger.info(f"Average latency: {avg_latency:.2f} ms") + + # Calculate per-class accuracy + unique_classes = np.unique(all_ground_truth) + logger.info(f"\nPer-class accuracy:") + for cls in unique_classes: + cls_mask = all_ground_truth == cls + cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum() + cls_total = cls_mask.sum() + cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0 + logger.info( + f" Class {cls} ({LABELS[str(cls)]}): {cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)" + ) + + # Calculate average confidence + avg_confidence = np.mean([prob.max() for prob in all_probabilities]) + logger.info(f"\nAverage confidence: {avg_confidence:.4f}") + + # Calculate latency statistics + min_latency = np.min(all_latencies) + max_latency = np.max(all_latencies) + std_latency = np.std(all_latencies) + + logger.info(f"\nLatency Statistics:") + logger.info(f" Average latency: {avg_latency:.2f} ms") + logger.info(f" Min latency: {min_latency:.2f} ms") + logger.info(f" Max latency: {max_latency:.2f} ms") + logger.info(f" Std latency: {std_latency:.2f} ms") + + # Show confusion matrix + logger.info(f"\nConfusion Matrix:") + logger.info(f"Predicted ->") + logger.info(f"Actual 0 1") + for true_cls in unique_classes: + row = [] + for pred_cls in unique_classes: + count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum() + row.append(f"{count:4d}") + logger.info(f" {true_cls} {' '.join(row)}") + + logger.info(f"\n{model_type.upper()} model evaluation completed successfully!") + logger.info(f"Model accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") + logger.info(f"Average latency: {avg_latency:.2f} ms") + + +def main(): + """Main function.""" + parser = argparse.ArgumentParser(description="Evaluate calibration classification model using info.pkl") + parser.add_argument("--onnx", type=str, help="Path to ONNX model file") + parser.add_argument("--tensorrt", type=str, help="Path to TensorRT engine file") + parser.add_argument( + "--model-cfg", + type=str, + default="projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py", + help="Path to model config file", + ) + parser.add_argument( + "--info-pkl", + type=str, + required=True, + help="Path to info.pkl file containing calibration data", + ) + parser.add_argument( + "--num-samples", + type=int, + default=10, + help="Number of samples to evaluate (default: 10)", + ) + parser.add_argument( + "--device", type=str, default="cpu", choices=["cpu", "cuda"], help="Device to use for inference" + ) + parser.add_argument( + "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging level" + ) + parser.add_argument( + "--verbose", action="store_true", help="Enable verbose logging for detailed sample information" + ) + + args = parser.parse_args() + + # Validate arguments + if not args.onnx and not args.tensorrt: + parser.error("Either --onnx or --tensorrt must be specified") + + if args.onnx and args.tensorrt: + parser.error("Only one of --onnx or --tensorrt can be specified") + + # Setup logging + logger = setup_logging(args.log_level) + + try: + # Determine model type and path + if args.onnx: + model_type = "onnx" + model_path = args.onnx + else: + model_type = "tensorrt" + model_path = args.tensorrt + + # Check if files exist + if not os.path.exists(model_path): + raise FileNotFoundError(f"Model file not found: {model_path}") + if not os.path.exists(args.model_cfg): + raise FileNotFoundError(f"Model config file not found: {args.model_cfg}") + if not os.path.exists(args.info_pkl): + raise FileNotFoundError(f"Info.pkl file not found: {args.info_pkl}") + + logger.info(f"Starting {model_type.upper()} model evaluation...") + logger.info(f"Model path: {model_path}") + logger.info(f"Model config: {args.model_cfg}") + logger.info(f"Info.pkl: {args.info_pkl}") + logger.info(f"Device: {args.device}") + logger.info(f"Number of samples: {args.num_samples}") + logger.info(f"Verbose logging: {args.verbose}") + + # Evaluate model + all_predictions, all_ground_truth, all_probabilities, all_latencies = evaluate_model( + model_path, model_type, args.model_cfg, args.info_pkl, logger, args.device, args.num_samples, args.verbose + ) + + # Print results + print_results(all_predictions, all_ground_truth, all_probabilities, all_latencies, model_type, logger) + + # Cleanup + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + logger.info("Script completed successfully!") + + except Exception as e: + logger.error(f"Error in main execution: {e}") + # Cleanup on error + try: + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + except: + pass + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tools/calibration_classification/visualize_lidar_camera_projection.py b/tools/calibration_classification/visualize_lidar_camera_projection.py index d0739ff8..78e692df 100644 --- a/tools/calibration_classification/visualize_lidar_camera_projection.py +++ b/tools/calibration_classification/visualize_lidar_camera_projection.py @@ -6,6 +6,7 @@ import traceback from typing import Any, Dict, List, Optional, Union +import numpy as np from mmengine.config import Config from mmengine.logging import MMLogger @@ -14,22 +15,24 @@ ) -class CalibrationVisualizer: +class CalibrationToolkit: """ - A comprehensive tool for visualizing LiDAR-camera calibration data. + A comprehensive tool for processing LiDAR-camera calibration data. This class provides functionality to load calibration data from info.pkl files - and generate visualizations using the CalibrationClassificationTransform. + and optionally generate visualizations or save results as NPZ files. Attributes: transform: The calibration classification transform instance data_root: Root directory for data files - output_dir: Directory for saving visualizations + output_dir: Directory for saving visualizations (only used if visualize=True) logger: MMLogger instance for logging + collected_results: List to store processed results for NPZ saving """ def __init__(self, model_cfg: Config, data_root: Optional[str] = None, output_dir: Optional[str] = None): """ - Initialize the CalibrationVisualizer. + Initialize the CalibrationToolkit. Args: + model_cfg: Model configuration data_root: Root directory for data files. If None, absolute paths are used. output_dir: Directory for saving visualizations. If None, no visualizations are saved. """ @@ -37,7 +40,8 @@ def __init__(self, model_cfg: Config, data_root: Optional[str] = None, output_di self.data_root = data_root self.output_dir = output_dir self.transform = None - self.logger = MMLogger.get_instance(name="calibration_visualizer") + self.logger = MMLogger.get_instance(name="calibration_toolkit") + self.collected_results = [] self._initialize_transform() def _initialize_transform(self) -> None: @@ -46,12 +50,15 @@ def _initialize_transform(self) -> None: if transform_config is None: raise ValueError("transform_config not found in model configuration") + # Only set projection_vis_dir if output_dir is provided (for visualization) + projection_vis_dir = self.output_dir if self.output_dir else None + self.transform = CalibrationClassificationTransform( transform_config=transform_config, mode="test", undistort=True, data_root=self.data_root, - projection_vis_dir=self.output_dir, + projection_vis_dir=projection_vis_dir, results_vis_dir=None, enable_augmentation=False, ) @@ -110,12 +117,15 @@ def _validate_sample_structure(self, sample: Dict[str, Any]) -> bool: required_keys = ["image", "lidar_points"] return all(key in sample for key in required_keys) - def process_single_sample(self, sample: Dict[str, Any], sample_idx: int) -> Optional[Dict[str, Any]]: + def process_single_sample( + self, sample: Dict[str, Any], sample_idx: int, save_npz: bool = False + ) -> Optional[Dict[str, Any]]: """ Process a single sample using the transform. Args: sample: Sample dictionary to process. sample_idx: Index of the sample for logging purposes. + save_npz: Whether to collect result for NPZ saving. Returns: Transformed sample data if successful, None otherwise. """ @@ -125,6 +135,11 @@ def process_single_sample(self, sample: Dict[str, Any], sample_idx: int) -> Opti return None result = self.transform(sample) + + # Store the result for NPZ saving only if requested + if save_npz: + self.collected_results.append(result["fused_img"]) + self.logger.info(f"Successfully processed sample {sample_idx}") return result @@ -133,12 +148,46 @@ def process_single_sample(self, sample: Dict[str, Any], sample_idx: int) -> Opti self.logger.debug(traceback.format_exc()) return None - def visualize_samples(self, info_pkl_path: str, indices: Optional[List[int]] = None) -> None: + def save_npz_file(self, output_path: str) -> None: """ - Visualize multiple samples from info.pkl file. + Save all collected results as an NPZ file with the correct structure. + Args: + output_path: Path where to save the NPZ file. + """ + if not self.collected_results: + self.logger.warning("No results collected to save as NPZ") + return + + try: + # Convert list of arrays to a single array with shape (number_of_samples, 5, 1860, 2880) + # Each result['fused_img'] has shape (1860, 2880, 5), so we need to transpose + input_array = np.array([result.transpose(2, 0, 1) for result in self.collected_results], dtype=np.float32) + + # Save as NPZ file + np.savez(output_path, input=input_array) + + self.logger.info(f"Saved NPZ file with shape {input_array.shape} to {output_path}") + + except Exception as e: + self.logger.error(f"Failed to save NPZ file: {e}") + self.logger.debug(traceback.format_exc()) + + def process_samples( + self, + info_pkl_path: str, + indices: Optional[List[int]] = None, + visualize: bool = False, + save_npz: bool = False, + npz_output_path: Optional[str] = None, + ) -> None: + """ + Process multiple samples from info.pkl file. Args: info_pkl_path: Path to the info.pkl file. indices: Optional list of sample indices to process. If None, all samples are processed. + visualize: Whether to generate visualizations (requires output_dir to be set). + save_npz: Whether to collect results for NPZ saving. + npz_output_path: Path for saving NPZ file (only used if save_npz=True). """ try: samples_list = self.load_info_pkl(info_pkl_path) @@ -147,6 +196,9 @@ def visualize_samples(self, info_pkl_path: str, indices: Optional[List[int]] = N self.logger.warning("No samples found in info.pkl") return + # Clear previous results + self.collected_results = [] + # Determine which samples to process if indices is not None: samples_to_process = [samples_list[i] for i in indices if i < len(samples_list)] @@ -154,23 +206,41 @@ def visualize_samples(self, info_pkl_path: str, indices: Optional[List[int]] = N samples_to_process = samples_list self.logger.info(f"Processing {len(samples_to_process)} samples") + if visualize: + self.logger.info("Visualization enabled") + if save_npz: + self.logger.info("NPZ saving enabled") # Process each sample for i, sample in enumerate(samples_to_process): - self.process_single_sample(sample, i + 1) + self.process_single_sample(sample, i + 1, save_npz=save_npz) + + # Save NPZ file if requested + if save_npz and npz_output_path: + self.save_npz_file(npz_output_path) self.logger.info("Finished processing all samples") except Exception as e: - self.logger.error(f"Failed to visualize samples: {e}") + self.logger.error(f"Failed to process samples: {e}") self.logger.debug(traceback.format_exc()) - def visualize_single_sample(self, info_pkl_path: str, sample_idx: int) -> Optional[Dict[str, Any]]: + def process_single_sample_from_file( + self, + info_pkl_path: str, + sample_idx: int, + visualize: bool = False, + save_npz: bool = False, + npz_output_path: Optional[str] = None, + ) -> Optional[Dict[str, Any]]: """ - Visualize a single sample from info.pkl file. + Process a single sample from info.pkl file. Args: info_pkl_path: Path to the info.pkl file. sample_idx: Index of the sample to process (0-based). + visualize: Whether to generate visualizations (requires output_dir to be set). + save_npz: Whether to collect results for NPZ saving. + npz_output_path: Path for saving NPZ file (only used if save_npz=True). Returns: Transformed sample data if successful, None otherwise. """ @@ -182,17 +252,27 @@ def visualize_single_sample(self, info_pkl_path: str, sample_idx: int) -> Option return None self.logger.info(f"Processing sample {sample_idx} from {len(samples_list)} total samples") + if visualize: + self.logger.info("Visualization enabled") + if save_npz: + self.logger.info("NPZ saving enabled") + + # Clear previous results for single sample processing + self.collected_results = [] sample = samples_list[sample_idx] - result = self.process_single_sample(sample, sample_idx) + result = self.process_single_sample(sample, sample_idx, save_npz=save_npz) - if result and self.output_dir: + if result and visualize and self.output_dir: self.logger.info(f"Visualizations saved to directory: {self.output_dir}") + # Save NPZ file if requested + if save_npz and npz_output_path and result: + self.save_npz_file(npz_output_path) return result except Exception as e: - self.logger.error(f"Failed to visualize single sample: {e}") + self.logger.error(f"Failed to process single sample: {e}") self.logger.debug(traceback.format_exc()) return None @@ -204,25 +284,42 @@ def create_argument_parser() -> argparse.ArgumentParser: """ examples = """ Examples: - # Process all samples - python visualize_calibration_and_image.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis - # Process specific sample - python visualize_calibration_and_image.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --sample_idx 0 - # Process specific indices - python visualize_calibration_and_image.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --indices 0 1 2 + # Process all samples without visualization or NPZ saving + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ + # Process all samples with visualization + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize + # Process all samples and save as NPZ + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --npz_output_path results.npz --save_npz + # Process all samples with both visualization and NPZ saving + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --npz_output_path results.npz --save_npz + # Process specific sample with visualization + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --sample_idx 0 + # Process specific indices with NPZ saving + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --save_npz --npz_output_path results.npz --indices 0 1 2 + # Process first 5 samples (indices 0, 1, 2, 3, 4) with both features + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --save_npz --npz_output_path results.npz --indices 5 """ parser = argparse.ArgumentParser( - description="Visualize LiDAR points projected on camera images using info.pkl", + description="Process LiDAR-camera calibration data with optional visualization and NPZ saving", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples, ) + parser.add_argument("model_cfg", help="model config path") parser.add_argument("--info_pkl", required=True, help="Path to info.pkl file containing calibration data") - parser.add_argument("--output_dir", help="Output directory for saving visualizations") + parser.add_argument("--output_dir", help="Output directory for saving visualizations (only used with --visualize)") parser.add_argument("--data_root", help="Root directory for data files (images, point clouds, etc.)") parser.add_argument("--sample_idx", type=int, help="Specific sample index to process (0-based)") - parser.add_argument("--indices", nargs="+", type=int, help="Specific sample indices to process (0-based)") + parser.add_argument( + "--indices", + nargs="+", + type=int, + help="Specific sample indices to process (0-based), or a single number N to process indices 0 to N-1", + ) + parser.add_argument("--visualize", action="store_true", help="Enable visualization (requires --output_dir)") + parser.add_argument("--save_npz", action="store_true", help="Enable NPZ saving (requires --npz_output_path)") + parser.add_argument("--npz_output_path", help="Path for saving NPZ file (only used with --save_npz)") parser.add_argument( "--show_point_details", action="store_true", help="Show detailed point cloud field information" ) @@ -232,26 +329,58 @@ def create_argument_parser() -> argparse.ArgumentParser: def main() -> None: """ - Main entry point for the calibration visualization script. - Parses command line arguments and runs the appropriate visualization mode. - Supports both single sample and batch processing modes. + Main entry point for the calibration toolkit script. + Parses command line arguments and runs the appropriate processing mode. + Supports both single sample and batch processing modes with optional features. """ parser = create_argument_parser() args = parser.parse_args() + # Validate argument combinations + if args.visualize and not args.output_dir: + parser.error("--visualize requires --output_dir to be specified") + + if args.save_npz and not args.npz_output_path: + parser.error("--save_npz requires --npz_output_path to be specified") + # Load model configuration model_cfg = Config.fromfile(args.model_cfg) - # Initialize visualizer - visualizer = CalibrationVisualizer(model_cfg=model_cfg, data_root=args.data_root, output_dir=args.output_dir) + # Initialize toolkit + toolkit = CalibrationToolkit(model_cfg=model_cfg, data_root=args.data_root, output_dir=args.output_dir) + + # Process indices argument + processed_indices = None + if args.indices is not None: + if len(args.indices) == 1: + # If only one number provided, treat it as range 0 to N-1 + n = args.indices[0] + processed_indices = list(range(n)) + toolkit.logger.info(f"Processing indices 0 to {n-1} (total: {n} samples)") + else: + # If multiple numbers provided, use them as specific indices + processed_indices = args.indices + toolkit.logger.info(f"Processing specific indices: {processed_indices}") - # Run appropriate visualization mode + # Run appropriate processing mode if args.sample_idx is not None: # Process single sample - visualizer.visualize_single_sample(args.info_pkl, args.sample_idx) + toolkit.process_single_sample_from_file( + args.info_pkl, + args.sample_idx, + visualize=args.visualize, + save_npz=args.save_npz, + npz_output_path=args.npz_output_path, + ) else: # Process all samples or specific indices - visualizer.visualize_samples(args.info_pkl, args.indices) + toolkit.process_samples( + args.info_pkl, + processed_indices, + visualize=args.visualize, + save_npz=args.save_npz, + npz_output_path=args.npz_output_path, + ) if __name__ == "__main__": From de3400aabb99b3c9898b81cace6e08c6c4f73629 Mon Sep 17 00:00:00 2001 From: vividf Date: Fri, 26 Sep 2025 17:08:38 +0900 Subject: [PATCH 02/19] chore: remove force miscalibration Signed-off-by: vividf --- tools/calibration_classification/evaluate_inference.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py index b367e0d6..95f7fd1a 100644 --- a/tools/calibration_classification/evaluate_inference.py +++ b/tools/calibration_classification/evaluate_inference.py @@ -113,7 +113,6 @@ def load_sample_data_from_info_pkl( info_pkl_path: str, model_cfg: Config, sample_idx: int = 0, - force_generate_miscalibration: bool = False, device: str = "cpu", transform_test: Optional[CalibrationClassificationTransform] = None, ) -> torch.Tensor: @@ -123,7 +122,6 @@ def load_sample_data_from_info_pkl( info_pkl_path: Path to the info.pkl file model_cfg: Model configuration containing data_root setting sample_idx: Index of the sample to load (default: 0) - force_generate_miscalibration: Whether to force generation of miscalibration device: Device to load tensor on transform_test: Pre-created test transform instance (optional) Returns: @@ -151,7 +149,7 @@ def load_sample_data_from_info_pkl( transform = transform_test # Apply transform with miscalibration control - results = transform.transform(sample_data, force_generate_miscalibration=force_generate_miscalibration) + results = transform.transform(sample_data) input_data_processed = results["fused_img"] # (H, W, 5) # Convert to tensor @@ -428,7 +426,6 @@ def evaluate_model( info_pkl_path, model_cfg, sample_idx, - force_generate_miscalibration=False, device=device, transform_test=transform_test, ) @@ -436,7 +433,6 @@ def evaluate_model( info_pkl_path, model_cfg, sample_idx, - force_generate_miscalibration=True, device=device, transform_test=transform_test, ) From b25012a3e2cfc28e2a7a60a6b42bb66b89532b84 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 1 Oct 2025 14:01:21 +0900 Subject: [PATCH 03/19] chore: fix documentation and refactor export to tensorrt Signed-off-by: vividf --- .../deploy/main.py | 72 +++++++++++-------- tools/calibration_classification/README.md | 6 +- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index 8a19b20a..2c136afd 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -127,7 +127,7 @@ def load_sample_data_from_info_pkl( transform = CalibrationClassificationTransform( transform_config=transform_config, mode="test", - lidar_range=model_cfg.get("lidar_range", 128.0), + max_depth=model_cfg.get("max_depth", 128.0), dilation_size=model_cfg.get("dilation_size", 1), undistort=True, miscalibration_probability=miscalibration_probability, @@ -263,42 +263,59 @@ def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None: def export_to_tensorrt( onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger ) -> bool: - """Export ONNX model to TensorRT format.""" + """Export ONNX model to a TensorRT engine. + + Notes: + - Always uses EXPLICIT_BATCH. + - If strongly_typed=True, disallows precision-changing builder flags (FP16/BF16/FP8/INT8/INT4/FP4). + Only TF32 is permitted (controls Tensor Cores for FP32, not dtype). + """ settings = config.tensorrt_settings + strongly_typed = settings.get("strongly_typed", False) + fp16_mode = settings.get("fp16_mode", False) + tf32_mode = bool(settings.get("tf32_mode", False)) # Initialize TensorRT - TRT_LOGGER = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(TRT_LOGGER, "") - runtime = trt.Runtime(TRT_LOGGER) - builder = trt.Builder(TRT_LOGGER) + trt_logger = trt.Logger(trt.Logger.WARNING) + trt.init_libnvinfer_plugins(trt_logger, "") - # Create network and config - strongly_typed = settings.get("strongly_typed", False) + builder = trt.Builder(trt_logger) + builder_config = builder.create_builder_config() + builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"]) + # Create network and config + flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) if strongly_typed: - flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) | ( - 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) - ) - network = builder.create_network(flags) + flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) logger.info("Using strongly typed TensorRT network creation") + network = builder.create_network(flags) + + def _warn_if(flag_name: str, enabled: bool): + if enabled: + logger.warning( + f"'{flag_name}' requested but will be IGNORED because strongly_typed=True. " + "Strongly typed mode must derive dtypes from the model/Q-DQ/Cast." + ) + + if strongly_typed: + # Disallow precision-changing flags in strongly typed mode + _warn_if("FP16", fp16_mode) else: - network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) - logger.info("Using standard TensorRT network creation (EXPLICIT_BATCH only)") - config_trt = builder.create_builder_config() - config_trt.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"]) + if fp16_mode: + builder_config.set_flag(trt.BuilderFlag.FP16) + logger.info("BuilderFlag.FP16 enabled") - # Enable FP16 if specified - if settings["fp16_mode"]: - config_trt.set_flag(trt.BuilderFlag.FP16) - logger.info("FP16 mode enabled") + if tf32_mode and hasattr(trt.BuilderFlag, "TF32"): + builder_config.set_flag(trt.BuilderFlag.TF32) + logger.info("BuilderFlag.TF32 enabled") # Setup optimization profile profile = builder.create_optimization_profile() _configure_input_shapes(profile, input_tensor, settings["model_inputs"], logger) - config_trt.add_optimization_profile(profile) + builder_config.add_optimization_profile(profile) # Parse ONNX model - parser = trt.OnnxParser(network, TRT_LOGGER) + parser = trt.OnnxParser(network, trt_logger) with open(onnx_path, "rb") as f: if not parser.parse(f.read()): _log_parser_errors(parser, logger) @@ -307,7 +324,7 @@ def export_to_tensorrt( # Build engine logger.info("Building TensorRT engine...") - serialized_engine = builder.build_serialized_network(network, config_trt) + serialized_engine = builder.build_serialized_network(network, builder_config) if serialized_engine is None: logger.error("Failed to build TensorRT engine") @@ -317,9 +334,8 @@ def export_to_tensorrt( with open(output_path, "wb") as f: f.write(serialized_engine) - workspace_gb = settings["max_workspace_size"] / (1024**3) logger.info(f"TensorRT engine saved to {output_path}") - logger.info(f"Engine max workspace size: {workspace_gb:.2f} GB") + logger.info(f"Engine max workspace size: {settings['max_workspace_size'] / (1024**3):.2f} GB") return True @@ -425,9 +441,9 @@ def run_tensorrt_inference( torch.cuda.empty_cache() # Load TensorRT engine - TRT_LOGGER = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(TRT_LOGGER, "") - runtime = trt.Runtime(TRT_LOGGER) + trt_logger = trt.Logger(trt.Logger.WARNING) + trt.init_libnvinfer_plugins(trt_logger, "") + runtime = trt.Runtime(trt_logger) with open(tensorrt_path, "rb") as f: engine = runtime.deserialize_cuda_engine(f.read()) diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index 11126fdf..6c515f66 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -242,17 +242,17 @@ This directory contains scripts for evaluating calibration classification models ### 6.1 Convert to ONNX and TensorRT ```sh -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt +python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt ``` ### 6.1.1 Convert to ONNX ```sh -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx +python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx ``` ### 6.1.2 Convert to TensorRT ```sh -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --tensorrt --onnx-file model.onnx +python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --tensorrt --onnx-file model.onnx ``` From dbe27cc0928c571d16a98baa98caed597af92f84 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 1 Oct 2025 15:30:22 +0900 Subject: [PATCH 04/19] chore: update readme Signed-off-by: vividf --- .../configs/deploy/resnet18_5ch.py | 3 ++- tools/calibration_classification/README.md | 14 ++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py index a32b3823..b16c2346 100644 --- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py @@ -4,7 +4,8 @@ type="tensorrt", common_config=dict( max_workspace_size=1 << 30, - fp16_mode=True, + fp16_mode=False, + tf32_mode=False, strongly_typed=False, ), model_inputs=[ diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index 6c515f66..4ebcd141 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -240,21 +240,27 @@ python tools/calibration_classification/test.py projects/CalibrationStatusClassi ## 6. Deployment This directory contains scripts for evaluating calibration classification models in different formats. -### 6.1 Convert to ONNX and TensorRT +### 6.1 Convert from Pytorch to ONNX and TensorRT ```sh python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt ``` -### 6.1.1 Convert to ONNX +### 6.1.1 Convert from Pytorch to ONNX ```sh python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx ``` -### 6.1.2 Convert to TensorRT +### 6.1.2 Convert from ONNX to TensorRT ```sh -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --tensorrt --onnx-file model.onnx +# Without verification (checkpoint not needed) +python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py dummy.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --tensorrt --onnx-file model.onnx + +# With verification (checkpoint required for reference) +python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --tensorrt --onnx-file model.onnx ``` +**Note:** When converting from ONNX to TensorRT without `--verify`, the checkpoint file is not actually used (you can use a dummy filename). However, with `--verify`, a valid checkpoint is required to compare outputs. + From 216d75a5475b39067c7ce71ba552bfc4073479b4 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 1 Oct 2025 17:09:15 +0900 Subject: [PATCH 05/19] chore: fix config Signed-off-by: vividf --- .../configs/deploy/resnet18_5ch.py | 112 ++++++- .../deploy/main.py | 275 +++++++++++------- tools/calibration_classification/README.md | 170 +++++++---- 3 files changed, 384 insertions(+), 173 deletions(-) diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py index b16c2346..729b0ed7 100644 --- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py @@ -1,35 +1,117 @@ +# projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +# Deployment configuration for ResNet18 5-channel calibration classification model +# +# 1. export: Controls export behavior (mode, verification, device, output directory) +# 2. runtime_io: Runtime I/O configuration (data paths, sample selection) +# 3. Backend configs: ONNX and TensorRT specific settings + +# ============================================================================== +# Export Configuration +# ============================================================================== +export = dict( + mode="both", # Export mode: "onnx", "trt", or "both" + # - "onnx": Export to ONNX only + # - "trt": Convert to TensorRT only (requires onnx_file in runtime_io) + # - "both": Export to ONNX then convert to TensorRT + verify=True, # Run verification comparing PyTorch/ONNX/TRT outputs + device="cuda:0", # Device for export (use "cuda:0" or "cpu") + # Note: TensorRT always requires CUDA, will auto-switch if needed + work_dir="/workspace/work_dirs", # Output directory for exported models +) + +# ============================================================================== +# Runtime I/O Configuration +# ============================================================================== +runtime_io = dict( + info_pkl="data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl", + sample_idx=0, # Sample index to use for export and verification + onnx_file=None, # Optional: Path to existing ONNX file + # - If provided with mode="trt", will convert this ONNX to TensorRT + # - If None with mode="both", will export ONNX first then convert +) + +# ============================================================================== +# Codebase Configuration +# ============================================================================== codebase_config = dict(type="mmpretrain", task="Classification", model_type="end2end") +# ============================================================================== +# TensorRT Backend Configuration +# ============================================================================== backend_config = dict( type="tensorrt", common_config=dict( - max_workspace_size=1 << 30, - fp16_mode=False, - tf32_mode=False, - strongly_typed=False, + max_workspace_size=1 << 30, # 1 GiB workspace for TensorRT + # Precision policy controls how TensorRT handles numerical precision: + # - "auto": TensorRT automatically selects precision (default) + # - "fp16": Enable FP16 mode for faster inference with slight accuracy trade-off + # - "fp32_tf32": Enable TF32 mode (Tensor Cores for FP32 operations on Ampere+) + # - "strongly_typed": Enforce strict type checking (prevents automatic precision conversion) + precision_policy="fp32_tf32", ), + # Dynamic shape configuration for different input resolutions model_inputs=[ dict( input_shapes=dict( input=dict( - min_shape=[1, 5, 1080, 1920], - opt_shape=[1, 5, 1860, 2880], - max_shape=[1, 5, 2160, 3840], + min_shape=[1, 5, 1080, 1920], # Minimum supported input shape + opt_shape=[1, 5, 1860, 2880], # Optimal shape for performance tuning + max_shape=[1, 5, 2160, 3840], # Maximum supported input shape ), ) ) ], ) +# ============================================================================== +# ONNX Export Configuration +# ============================================================================== onnx_config = dict( type="onnx", - export_params=True, - keep_initializers_as_inputs=False, - opset_version=16, - do_constant_folding=True, - save_file="end2end.onnx", - input_names=["input"], - output_names=["output"], - dynamic_axes={"input": {0: "batch_size", 2: "height", 3: "width"}, "output": {0: "batch_size"}}, + export_params=True, # Include trained parameters in the model + keep_initializers_as_inputs=False, # Don't expose initializers as inputs + opset_version=16, # ONNX opset version (16 recommended for modern features) + do_constant_folding=True, # Optimize by folding constant expressions + save_file="end2end.onnx", # Output filename + input_names=["input"], # Name of input tensor + output_names=["output"], # Name of output tensor + # Dynamic axes for variable input dimensions + dynamic_axes={ + "input": {0: "batch_size", 2: "height", 3: "width"}, # Batch, H, W are dynamic + "output": {0: "batch_size"}, # Batch is dynamic + }, input_shape=None, ) + +# ============================================================================== +# Usage Examples +# ============================================================================== +# +# Basic usage (export to both ONNX and TensorRT): +# python projects/CalibrationStatusClassification/deploy/main.py \ +# projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ +# projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ +# checkpoint.pth +# +# Export to ONNX only: +# Set export.mode = "onnx" in this config, then run the command above +# +# Convert existing ONNX to TensorRT: +# 1) Set export.mode = "trt" +# 2) Set runtime_io.onnx_file = "/path/to/existing/model.onnx" +# 3) Run the command above +# +# Override config settings via command line: +# python projects/CalibrationStatusClassification/deploy/main.py \ +# \ +# --work-dir ./custom_output \ +# --device cuda:1 \ +# --info-pkl /path/to/custom/info.pkl \ +# --sample-idx 5 +# +# Available precision policies: +# - auto: Let TensorRT decide (default, good balance) +# - fp16: Faster inference, ~2x speedup, small accuracy loss +# - fp32_tf32: Use Tensor Cores on Ampere+ GPUs for FP32 +# - explicit_int8: INT8 quantization (requires calibration dataset) +# - strongly_typed: Strict type enforcement, no automatic conversion diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index 2c136afd..bcfadc3b 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -41,6 +41,15 @@ EXPECTED_CHANNELS = 5 # RGB + Depth + Intensity LABELS = {"0": "miscalibrated", "1": "calibrated"} +# Precision policy mapping +PRECISION_POLICIES = { + "auto": {}, # No special flags, TensorRT decides + "fp16": {"FP16": True}, + "fp32_tf32": {"TF32": True}, # TF32 for FP32 operations + "explicit_int8": {"INT8": True}, + "strongly_typed": {"STRONGLY_TYPED": True}, # Network creation flag +} + def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]: """ @@ -150,36 +159,105 @@ def load_sample_data_from_info_pkl( class DeploymentConfig: - """Configuration container for deployment settings.""" + """Enhanced configuration container for deployment settings with validation.""" def __init__(self, deploy_cfg: Config): self.deploy_cfg = deploy_cfg - self.backend_config = deploy_cfg.get("backend_config", {}) - self.onnx_config = deploy_cfg.get("onnx_config", {}) + self._validate_config() + + def _validate_config(self) -> None: + """Validate configuration structure and required fields.""" + if "export" not in self.deploy_cfg: + raise ValueError( + "Missing 'export' section in deploy config. " + "Please update your config to the new format with 'export', 'runtime_io' sections." + ) + + if "runtime_io" not in self.deploy_cfg: + raise ValueError("Missing 'runtime_io' section in deploy config.") + + # Validate export mode + valid_modes = ["onnx", "trt", "both"] + mode = self.export_config.get("mode", "both") + if mode not in valid_modes: + raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}") + + # Validate precision policy if present + backend_cfg = self.deploy_cfg.get("backend_config", {}) + common_cfg = backend_cfg.get("common_config", {}) + precision_policy = common_cfg.get("precision_policy", "auto") + if precision_policy not in PRECISION_POLICIES: + raise ValueError( + f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}" + ) + + @property + def export_config(self) -> Dict: + """Get export configuration (mode, verify, device, work_dir).""" + return self.deploy_cfg.get("export", {}) + + @property + def runtime_io_config(self) -> Dict: + """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file).""" + return self.deploy_cfg.get("runtime_io", {}) + + @property + def should_export_onnx(self) -> bool: + """Check if ONNX export is requested.""" + mode = self.export_config.get("mode", "both") + return mode in ["onnx", "both"] + + @property + def should_export_tensorrt(self) -> bool: + """Check if TensorRT export is requested.""" + mode = self.export_config.get("mode", "both") + return mode in ["trt", "both"] + + @property + def should_verify(self) -> bool: + """Check if verification is requested.""" + return self.export_config.get("verify", False) + + @property + def device(self) -> str: + """Get device for export.""" + return self.export_config.get("device", "cuda:0") + + @property + def work_dir(self) -> str: + """Get working directory.""" + return self.export_config.get("work_dir", os.getcwd()) @property def onnx_settings(self) -> Dict: """Get ONNX export settings.""" + onnx_config = self.deploy_cfg.get("onnx_config", {}) return { - "opset_version": self.onnx_config.get("opset_version", 11), - "do_constant_folding": self.onnx_config.get("do_constant_folding", True), - "input_names": self.onnx_config.get("input_names", ["input"]), - "output_names": self.onnx_config.get("output_names", ["output"]), - "dynamic_axes": self.onnx_config.get("dynamic_axes"), - "export_params": self.onnx_config.get("export_params", True), - "keep_initializers_as_inputs": self.onnx_config.get("keep_initializers_as_inputs", False), - "save_file": self.onnx_config.get("save_file", "calibration_classifier.onnx"), + "opset_version": onnx_config.get("opset_version", 16), + "do_constant_folding": onnx_config.get("do_constant_folding", True), + "input_names": onnx_config.get("input_names", ["input"]), + "output_names": onnx_config.get("output_names", ["output"]), + "dynamic_axes": onnx_config.get("dynamic_axes"), + "export_params": onnx_config.get("export_params", True), + "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False), + "save_file": onnx_config.get("save_file", "calibration_classifier.onnx"), } @property def tensorrt_settings(self) -> Dict: - """Get TensorRT export settings.""" - common_config = self.backend_config.get("common_config", {}) + """Get TensorRT export settings with precision policy support.""" + backend_config = self.deploy_cfg.get("backend_config", {}) + common_config = backend_config.get("common_config", {}) + + # Get precision policy + precision_policy = common_config.get("precision_policy", "auto") + policy_flags = PRECISION_POLICIES.get(precision_policy, {}) + return { "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE), - "fp16_mode": common_config.get("fp16_mode", False), - "model_inputs": self.backend_config.get("model_inputs", []), - "strongly_typed": common_config.get("strongly_typed", False), + "precision_policy": precision_policy, + "policy_flags": policy_flags, + "model_inputs": backend_config.get("model_inputs", []), } @@ -189,20 +267,16 @@ def parse_args() -> argparse.Namespace: description="Export CalibrationStatusClassification model to ONNX/TensorRT.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - parser.add_argument("deploy_cfg", help="deploy config path") - parser.add_argument("model_cfg", help="model config path") - parser.add_argument("checkpoint", help="model checkpoint path") - parser.add_argument("--info_pkl", required=True, help="info.pkl file path containing calibration data") - parser.add_argument("--sample_idx", type=int, default=0, help="sample index from info.pkl (default: 0)") - parser.add_argument("--work-dir", default=os.getcwd(), help="output directory") - parser.add_argument("--device", default="cpu", help="device for conversion") - parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="logging level") - parser.add_argument("--verify", action="store_true", help="verify model outputs") - parser.add_argument("--onnx", action="store_true", help="export to ONNX format") - parser.add_argument("--tensorrt", action="store_true", help="export to TensorRT format") - parser.add_argument( - "--onnx-file", help="path to existing ONNX file (required when --tensorrt is specified without --onnx)" - ) + parser.add_argument("deploy_cfg", help="Deploy config path") + parser.add_argument("model_cfg", help="Model config path") + parser.add_argument("checkpoint", help="Model checkpoint path") + + # Optional overrides + parser.add_argument("--work-dir", help="Override output directory from config") + parser.add_argument("--device", help="Override device from config") + parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level") + parser.add_argument("--info-pkl", help="Override info.pkl path from config") + parser.add_argument("--sample-idx", type=int, help="Override sample index from config") return parser.parse_args() @@ -263,17 +337,12 @@ def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None: def export_to_tensorrt( onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger ) -> bool: - """Export ONNX model to a TensorRT engine. - - Notes: - - Always uses EXPLICIT_BATCH. - - If strongly_typed=True, disallows precision-changing builder flags (FP16/BF16/FP8/INT8/INT4/FP4). - Only TF32 is permitted (controls Tensor Cores for FP32, not dtype). - """ + """Export ONNX model to a TensorRT engine with precision policy support.""" settings = config.tensorrt_settings - strongly_typed = settings.get("strongly_typed", False) - fp16_mode = settings.get("fp16_mode", False) - tf32_mode = bool(settings.get("tf32_mode", False)) + precision_policy = settings["precision_policy"] + policy_flags = settings["policy_flags"] + + logger.info(f"Building TensorRT engine with precision policy: {precision_policy}") # Initialize TensorRT trt_logger = trt.Logger(trt.Logger.WARNING) @@ -283,31 +352,23 @@ def export_to_tensorrt( builder_config = builder.create_builder_config() builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"]) - # Create network and config + # Create network with appropriate flags flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - if strongly_typed: + + # Handle strongly typed flag (network creation flag, not builder flag) + if policy_flags.get("STRONGLY_TYPED"): flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) logger.info("Using strongly typed TensorRT network creation") - network = builder.create_network(flags) - def _warn_if(flag_name: str, enabled: bool): - if enabled: - logger.warning( - f"'{flag_name}' requested but will be IGNORED because strongly_typed=True. " - "Strongly typed mode must derive dtypes from the model/Q-DQ/Cast." - ) - - if strongly_typed: - # Disallow precision-changing flags in strongly typed mode - _warn_if("FP16", fp16_mode) - else: - if fp16_mode: - builder_config.set_flag(trt.BuilderFlag.FP16) - logger.info("BuilderFlag.FP16 enabled") + network = builder.create_network(flags) - if tf32_mode and hasattr(trt.BuilderFlag, "TF32"): - builder_config.set_flag(trt.BuilderFlag.TF32) - logger.info("BuilderFlag.TF32 enabled") + # Apply precision flags to builder config + for flag_name, enabled in policy_flags.items(): + if flag_name == "STRONGLY_TYPED": + continue # Already handled as network creation flag above + if enabled and hasattr(trt.BuilderFlag, flag_name): + builder_config.set_flag(getattr(trt.BuilderFlag, flag_name)) + logger.info(f"BuilderFlag.{flag_name} enabled") # Setup optimization profile profile = builder.create_optimization_profile() @@ -595,21 +656,6 @@ def main(): args = parse_args() logger = setup_logging(args.log_level) - # Check if at least one export format is specified - if not args.onnx and not args.tensorrt: - logger.error("Please specify at least one export format: --onnx or --tensorrt") - return - - # Check if TensorRT is requested but no ONNX file is provided and ONNX export is not requested - if args.tensorrt and not args.onnx and not args.onnx_file: - logger.error( - "TensorRT export requires either --onnx (to export ONNX first) or --onnx-file (to use existing ONNX file)" - ) - return - - # Setup - mmengine.mkdir_or_exist(osp.abspath(args.work_dir)) - # Load configurations logger.info(f"Loading deploy config from: {args.deploy_cfg}") deploy_cfg = Config.fromfile(args.deploy_cfg) @@ -618,58 +664,69 @@ def main(): logger.info(f"Loading model config from: {args.model_cfg}") model_cfg = Config.fromfile(args.model_cfg) - # Setup file paths + # Get settings from config with CLI overrides + work_dir = args.work_dir if args.work_dir else config.work_dir + device = args.device if args.device else config.device + info_pkl = args.info_pkl if args.info_pkl else config.runtime_io_config.get("info_pkl") + sample_idx = args.sample_idx if args.sample_idx is not None else config.runtime_io_config.get("sample_idx", 0) + existing_onnx = config.runtime_io_config.get("onnx_file") + + # Validate required parameters + if not info_pkl: + logger.error("info_pkl path must be provided either in config or via --info-pkl") + return + + # Setup working directory + mmengine.mkdir_or_exist(osp.abspath(work_dir)) + logger.info(f"Working directory: {work_dir}") + logger.info(f"Device: {device}") + logger.info(f"Export mode: {config.export_config.get('mode', 'both')}") + + # Determine export paths onnx_path = None trt_path = None - trt_file = None - if args.onnx: + if config.should_export_onnx: onnx_settings = config.onnx_settings - onnx_path = osp.join(args.work_dir, onnx_settings["save_file"]) + onnx_path = osp.join(work_dir, onnx_settings["save_file"]) - if args.tensorrt: - if args.onnx_file: - # Use provided ONNX file path - onnx_path = args.onnx_file + if config.should_export_tensorrt: + # Use existing ONNX if provided, otherwise use the one we'll export + if existing_onnx and not config.should_export_onnx: + onnx_path = existing_onnx if not osp.exists(onnx_path): logger.error(f"Provided ONNX file does not exist: {onnx_path}") return logger.info(f"Using existing ONNX file: {onnx_path}") - elif args.onnx: - # ONNX will be exported first, use the path from config - onnx_settings = config.onnx_settings - onnx_path = osp.join(args.work_dir, onnx_settings["save_file"]) - else: - # This should not happen due to validation above, but just in case - logger.error("No ONNX file path available for TensorRT conversion") + elif not onnx_path: + # Need ONNX for TensorRT but neither export nor existing file specified + logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.") return # Set TensorRT output path onnx_settings = config.onnx_settings trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") - trt_path = osp.join(args.work_dir, trt_file) + trt_path = osp.join(work_dir, trt_file) + # Load model logger.info(f"Loading model from checkpoint: {args.checkpoint}") - device = torch.device(args.device) + device = torch.device(device) model = get_model(model_cfg, args.checkpoint, device=device) # Load sample data - logger.info(f"Loading sample data from info.pkl: {args.info_pkl}") - input_tensor_calibrated = load_sample_data_from_info_pkl( - args.info_pkl, model_cfg, 0.0, args.sample_idx, device=args.device - ) - input_tensor_miscalibrated = load_sample_data_from_info_pkl( - args.info_pkl, model_cfg, 1.0, args.sample_idx, device=args.device - ) + logger.info(f"Loading sample data from info.pkl: {info_pkl}") + input_tensor_calibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 0.0, sample_idx, device=device) + input_tensor_miscalibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 1.0, sample_idx, device=device) # Export models - if args.onnx: + if config.should_export_onnx: export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger) - if args.tensorrt: + + if config.should_export_tensorrt: logger.info("Converting ONNX to TensorRT...") # Ensure CUDA device for TensorRT - if args.device == "cpu": + if not device.startswith("cuda"): logger.warning("TensorRT requires CUDA device, switching to cuda") device = torch.device("cuda") input_tensor_calibrated = input_tensor_calibrated.to(device) @@ -682,14 +739,15 @@ def main(): logger.error("TensorRT conversion failed") # Run verification if requested - if args.verify: + if config.should_verify: logger.info( "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..." ) input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated} - # Only verify formats that were exported - onnx_path_for_verify = onnx_path if (args.onnx or (args.tensorrt and args.onnx_file)) else None - trt_path_for_verify = trt_path if args.tensorrt else None + + # Only verify formats that were exported or provided + onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None + trt_path_for_verify = trt_path if config.should_export_tensorrt else None run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger) @@ -697,14 +755,13 @@ def main(): # Log what was exported exported_formats = [] - if args.onnx: + if config.should_export_onnx: exported_formats.append("ONNX") - if args.tensorrt: + if config.should_export_tensorrt: exported_formats.append("TensorRT") logger.info(f"Exported formats: {', '.join(exported_formats)}") -# TODO: make deployment script inherit from awml base deploy script or use awml deploy script directly if __name__ == "__main__": main() diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index 4ebcd141..7e2ce683 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -5,14 +5,14 @@ It contains training, evaluation, and visualization for Calibration classificati - [Support priority](https://github.com/tier4/AWML/blob/main/docs/design/autoware_ml_design.md#support-priority): Tier B - Supported dataset - - [] NuScenes + - [ ] NuScenes - [x] T4dataset - Other supported feature - [ ] Add unit test ## 1. Setup environment -Please follow the [installation tutorial](/docs/tutorial/tutorial_detection_3d.md)to set up the environment. +Please follow the [installation tutorial](/docs/tutorial/tutorial_detection_3d.md) to set up the environment. ## 2. Prepare dataset @@ -26,7 +26,7 @@ Prepare the dataset you use. docker run -it --rm --gpus --shm-size=64g --name awml -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml ``` -- Make info files for T4dataset X2 Gen2 +- Create info files for T4dataset X2 Gen2 ```sh python tools/calibration_classification/create_data_t4dataset.py --config /workspace/autoware_ml/configs/calibration_classification/dataset/t4dataset/gen2_base.py --version gen2_base --root_path ./data/t4dataset -o ./data/t4dataset/calibration_info/ @@ -124,19 +124,19 @@ Each file contains calibration information including: ```sh # Process all samples from info.pkl -python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization +python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization # Process specific sample -python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization --sample_idx 0 +python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization --sample_idx 0 # Process specific indices -python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization --indices 0 1 2 +python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization --indices 0 1 2 ``` - For T4dataset visualization: ```sh -python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --data_root data/t4dataset --output_dir ./work_dirs/calibration_visualization +python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --data_root data/t4dataset --output_dir ./work_dirs/calibration_visualization ``` ## 3. Visualization Settings (During training, validation, testing) @@ -174,7 +174,7 @@ dict( ### 3.3. Usage Strategy -**Training/Validataion Phase:** +**Training/Validation Phase:** - Disable visualization for efficiency: `projection_vis_dir=None`, `results_vis_dir=None` - Focus on model training performance @@ -188,8 +188,8 @@ dict( - `results_vis_dir/`: Classification results with predicted labels ## 4. Train -### 4.1. Environment set up -Set `CUBLAS_WORKSPACE_CONFIG` for the deterministic behavior, plese check this [nvidia doc](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility) for more info +### 4.1. Environment Setup +Set `CUBLAS_WORKSPACE_CONFIG` for deterministic behavior, please check this [nvidia doc](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility) for more info ```sh export CUBLAS_WORKSPACE_CONFIG=:4096:8 @@ -214,16 +214,16 @@ python tools/calibration_classification/train.py projects/CalibrationStatusClass docker run -it --rm --gpus --name autoware-ml --shm-size=64g -d -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml bash -c 'python tools/calibration_classification/train.py {config_file}' ``` -### 4.3. Log analysis by Tensorboard +### 4.3. Log Analysis with TensorBoard -- Run the TensorBoard and navigate to http://127.0.0.1:6006/ +- Run TensorBoard and navigate to http://127.0.0.1:6006/ ```sh tensorboard --logdir work_dirs --bind_all ``` -## 5. Analyze -### 5.1. Evaluation Pytorch +## 5. Evaluation +### 5.1. PyTorch Model Evaluation - Evaluation @@ -238,56 +238,138 @@ python tools/calibration_classification/test.py projects/CalibrationStatusClassi ## 6. Deployment -This directory contains scripts for evaluating calibration classification models in different formats. -### 6.1 Convert from Pytorch to ONNX and TensorRT -```sh -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt +The deployment system supports exporting models to ONNX and TensorRT formats with a unified configuration approach. + +### 6.1. Deployment Configuration + +The deployment uses a **config-first approach** where all settings are defined in the deployment config file. The config has three main sections: + +#### Configuration Structure + +```python +# Export configuration - controls export behavior +export = dict( + mode="both", # "onnx", "trt", or "both" + verify=True, # Run verification comparing outputs + device="cuda:0", # Device for export + work_dir="/workspace/work_dirs", # Output directory +) + +# Runtime I/O configuration - data paths +runtime_io = dict( + info_pkl="data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl", + sample_idx=0, # Sample for export/verification + onnx_file=None, # Optional: existing ONNX path +) + +# TensorRT backend configuration +backend_config = dict( + type="tensorrt", + common_config=dict( + max_workspace_size=1 << 30, # 1 GiB + precision_policy="auto", # Precision policy (see below) + ), + model_inputs=[...], # Dynamic shape configuration +) ``` -### 6.1.1 Convert from Pytorch to ONNX +#### Precision Policies + +Control TensorRT inference precision with the `precision_policy` parameter: + +| Policy | Description | Use Case | +|--------|-------------|----------| +| `auto` | TensorRT decides automatically | Default, FP32 | +| `fp16` | Half-precision floating point | 2x faster, small accuracy loss | +| `fp32_tf32` | Tensor Cores for FP32 | Ampere+ GPUs, FP32 speedup | +| `strongly_typed` | Enforces explicit type constraints, preserves QDQ (Quantize-Dequantize) nodes | INT8 quantized models from QAT or PTQ with explicit Q/DQ ops | + +### 6.2. Basic Usage + +#### Export to Both ONNX and TensorRT + +All settings come from the config file: + ```sh -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth ``` -### 6.1.2 Convert from ONNX to TensorRT -```sh -# Without verification (checkpoint not needed) -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py dummy.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --tensorrt --onnx-file model.onnx +#### Override Config Settings via CLI -# With verification (checkpoint required for reference) -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --tensorrt --onnx-file model.onnx +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth \ + --work-dir ./custom_output \ + --device cuda:1 \ + --info-pkl /path/to/custom/info.pkl \ + --sample-idx 5 ``` -**Note:** When converting from ONNX to TensorRT without `--verify`, the checkpoint file is not actually used (you can use a dummy filename). However, with `--verify`, a valid checkpoint is required to compare outputs. +### 6.3. Export Modes + +#### Export to ONNX Only + +In config file, set `export.mode = "onnx"`. + + +#### Convert Existing ONNX to TensorRT + +In config file: +- Set `export.mode = "trt"` +- Set `runtime_io.onnx_file = "/path/to/model.onnx"` + +**Note:** The checkpoint is used for verification. To skip verification, set `export.verify = False` in config. -### 6.2 Evaluate ONNX and tensorrt -##### ONNX Model Evaluation + + +### 6.4. Evaluate ONNX and TensorRT Models + +#### ONNX Model Evaluation + ```bash -python tools/calibration_classification/evaluate_inference.py --onnx work_dirs/end2end.onnx --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl +python tools/calibration_classification/evaluate_inference.py \ + --onnx work_dirs/end2end.onnx \ + --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py \ + --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl ``` -##### TensorRT Model Evaluation +#### TensorRT Model Evaluation + ```bash -python tools/calibration_classification/evaluate_inference.py --tensorrt work_dirs/end2end.quant.engine --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl +python tools/calibration_classification/evaluate_inference.py \ + --tensorrt work_dirs/end2end.engine \ + --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py \ + --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl ``` +#### Command Line Arguments +- `--onnx`: Path to ONNX model file (mutually exclusive with `--tensorrt`) +- `--tensorrt`: Path to TensorRT engine file (mutually exclusive with `--onnx`) +- `--model-cfg`: Path to model config file +- `--info-pkl`: Path to dataset info file +- `--device`: Device to use for inference (`cpu` or `cuda`, default: `cpu`) +- `--log-level`: Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, default: `INFO`) +### 6.5. INT8 Quantization -### 6.3 INT8 Optimization - -Set the number of image you want for calibration. Note that you need to consider the size of your memory. -For 32 GB, the most you can do is 1860 x 2880 x 5 x 4 Bytes / 32 GB +Set the number of images you want for calibration. Note that you need to consider the size of your memory. +For 32 GB memory, the maximum you can use is approximately: 1860 x 2880 x 5 x 4 Bytes / 32 GB -Thus, please limit the calibration data with the indices parameters +Therefore, please limit the calibration data using the indices parameter ```python -python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200 +python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200 ``` @@ -298,15 +380,5 @@ docker run -it --rm --gpus all --shm-size=32g --name awml-opt -p 6006:6006 -v $P # Access the optimization docker python3 -m modelopt.onnx.quantization --onnx_path=end2end.onnx --quantize_mode=int8 --calibration_data_path=calibration_file.npz -# After getting the end2en2.quant.onnx, you can evaluate with quant onnx or engine +# After getting the end2end.quant.onnx, you can evaluate with quant onnx or conver to int8 engine by following section 6.3 ``` - - - -#### Command Line Arguments - -- `--onnx`: Path to ONNX model file (mutually exclusive with `--tensorrt`) -- `--tensorrt`: Path to TensorRT engine file (mutually exclusive with `--onnx`) -- `--model-cfg`: Path to model config file (default: `projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py`) -- `--device`: Device to use for inference (`cpu` or `cuda`, default: `cpu`) -- `--log-level`: Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, default: `INFO`) From b11f2a57200006eaf142e1184089ca1c2d2270f6 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 1 Oct 2025 17:38:22 +0900 Subject: [PATCH 06/19] chore: fix readme Signed-off-by: vividf --- .../deploy/main.py | 2 +- tools/calibration_classification/README.md | 6 +- .../evaluate_inference.py | 76 +++++++++++-------- 3 files changed, 47 insertions(+), 37 deletions(-) diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index bcfadc3b..32084dc4 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -726,7 +726,7 @@ def main(): logger.info("Converting ONNX to TensorRT...") # Ensure CUDA device for TensorRT - if not device.startswith("cuda"): + if device.type != "cuda": logger.warning("TensorRT requires CUDA device, switching to cuda") device = torch.device("cuda") input_tensor_calibrated = input_tensor_calibrated.to(device) diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index 7e2ce683..9de32e1a 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -339,7 +339,7 @@ In config file: ```bash python tools/calibration_classification/evaluate_inference.py \ --onnx work_dirs/end2end.onnx \ - --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py \ + --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl ``` @@ -348,7 +348,7 @@ python tools/calibration_classification/evaluate_inference.py \ ```bash python tools/calibration_classification/evaluate_inference.py \ --tensorrt work_dirs/end2end.engine \ - --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py \ + --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl ``` @@ -369,7 +369,7 @@ For 32 GB memory, the maximum you can use is approximately: 1860 x 2880 x 5 x 4 Therefore, please limit the calibration data using the indices parameter ```python -python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200 +python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200 ``` diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py index 95f7fd1a..1842ef01 100644 --- a/tools/calibration_classification/evaluate_inference.py +++ b/tools/calibration_classification/evaluate_inference.py @@ -178,22 +178,11 @@ def create_test_transform(model_cfg: Config) -> CalibrationClassificationTransfo def run_onnx_inference( - onnx_path: str, + ort_session, input_tensor: torch.Tensor, logger: logging.Logger, ) -> Tuple[np.ndarray, float]: """Run ONNX inference directly and return output and latency.""" - try: - import onnxruntime as ort - except ImportError: - raise ImportError( - "onnxruntime is required for ONNX inference. Please install it with: pip install onnxruntime" - ) - - # Clear GPU cache if available - if torch.cuda.is_available(): - torch.cuda.empty_cache() - # Convert input tensor to float32 input_tensor = input_tensor.float() @@ -207,16 +196,8 @@ def run_onnx_inference( input_tensor = input_tensor.unsqueeze(0) # Add batch dimension logger.debug(f"Added batch dimension - Shape: {input_tensor.shape}") - # ONNX inference with timing - providers = ["CPUExecutionProvider"] - ort_session = ort.InferenceSession(onnx_path, providers=providers) - - # Debug: Print ONNX model input info + # Get input name from session input_name = ort_session.get_inputs()[0].name - input_shape = ort_session.get_inputs()[0].shape - input_type = ort_session.get_inputs()[0].type - logger.debug(f"ONNX model expects - Input name: {input_name}, Shape: {input_shape}, Type: {input_type}") - onnx_input = {input_name: input_tensor.cpu().numpy().astype(np.float32)} start_time = time.perf_counter() @@ -271,19 +252,23 @@ def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.L # Validate input shape expected_shape = engine.get_tensor_shape(input_name) - if input_np.shape != tuple(expected_shape): - logger.warning(f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}") - # Try to reshape if possible - if len(input_np.shape) == len(expected_shape): - try: - input_np = input_np.reshape(expected_shape) - logger.info(f"Reshaped input to {input_np.shape}") - except Exception as e: - raise RuntimeError(f"Cannot reshape input from {input_np.shape} to {expected_shape}: {e}") - elif len(input_np.shape) == len(expected_shape) - 1 and expected_shape[0] == 1: + + # Check if shapes are compatible (considering -1 as dynamic dimension) + def shapes_compatible(actual_shape, expected_shape): + """Check if actual shape is compatible with expected shape (which may contain -1 for dynamic dims).""" + if len(actual_shape) != len(expected_shape): + return False + for actual_dim, expected_dim in zip(actual_shape, expected_shape): + if expected_dim != -1 and actual_dim != expected_dim: + return False + return True + + if not shapes_compatible(input_np.shape, expected_shape): + # Only warn/error if shapes are truly incompatible + if len(input_np.shape) == len(expected_shape) - 1 and expected_shape[0] == 1: # Add batch dimension if missing try: - input_np = input_np.reshape(expected_shape) + input_np = np.expand_dims(input_np, axis=0) logger.info(f"Added batch dimension to input: {input_np.shape}") except Exception as e: raise RuntimeError( @@ -293,6 +278,10 @@ def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.L raise RuntimeError( f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}. Please ensure input has correct batch dimension." ) + else: + # Shapes are compatible (dynamic dimensions match), use actual shape + if tuple(expected_shape) != input_np.shape: + logger.debug(f"Using dynamic input shape {input_np.shape} (engine expects {expected_shape})") context.set_input_shape(input_name, input_np.shape) output_shape = context.get_tensor_shape(output_name) @@ -400,8 +389,29 @@ def evaluate_model( # Initialize inference engine if model_type == "onnx": + try: + import onnxruntime as ort + except ImportError: + raise ImportError( + "onnxruntime is required for ONNX inference. Please install it with: pip install onnxruntime" + ) + logger.info(f"Using ONNX model: {model_path}") - inference_func = lambda input_tensor: run_onnx_inference(model_path, input_tensor, logger) + logger.info("Creating ONNX InferenceSession (one-time setup)...") + + # Create session once and reuse it + providers = ["CPUExecutionProvider"] + ort_session = ort.InferenceSession(model_path, providers=providers) + + # Debug: Print ONNX model input info + input_name = ort_session.get_inputs()[0].name + input_shape = ort_session.get_inputs()[0].shape + input_type = ort_session.get_inputs()[0].type + logger.info(f"ONNX model expects - Input name: {input_name}, Shape: {input_shape}, Type: {input_type}") + logger.info("ONNX InferenceSession created successfully.") + + # Bind session to closure for reuse + inference_func = lambda input_tensor: run_onnx_inference(ort_session, input_tensor, logger) elif model_type == "tensorrt": logger.info(f"Using TensorRT model: {model_path}") engine = load_tensorrt_engine(model_path, logger) From a8298b4b36051e2dce48dd16f3b2b0e825983c50 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 1 Oct 2025 19:35:36 +0900 Subject: [PATCH 07/19] chore: fix for latest update Signed-off-by: vividf --- .../evaluate_inference.py | 137 +++++++++++------- 1 file changed, 82 insertions(+), 55 deletions(-) diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py index 1842ef01..b4dcf9f8 100644 --- a/tools/calibration_classification/evaluate_inference.py +++ b/tools/calibration_classification/evaluate_inference.py @@ -111,70 +111,41 @@ def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any def load_sample_data_from_info_pkl( info_pkl_path: str, - model_cfg: Config, + transform: CalibrationClassificationTransform, sample_idx: int = 0, device: str = "cpu", - transform_test: Optional[CalibrationClassificationTransform] = None, -) -> torch.Tensor: + miscalibration_probability: float = 0.0, +) -> Tuple[torch.Tensor, int]: """ Load and preprocess sample data from info.pkl using CalibrationClassificationTransform. Args: info_pkl_path: Path to the info.pkl file - model_cfg: Model configuration containing data_root setting + transform: Reusable CalibrationClassificationTransform instance sample_idx: Index of the sample to load (default: 0) device: Device to load tensor on - transform_test: Pre-created test transform instance (optional) + miscalibration_probability: Probability to generate miscalibrated samples (0.0=calibrated, 1.0=miscalibrated) Returns: - Preprocessed tensor ready for model inference + Tuple of (preprocessed tensor ready for model inference, ground truth label) """ # Load sample data from info.pkl sample_data = load_info_pkl_data(info_pkl_path, sample_idx) - # Get data_root from model config - data_root = model_cfg.get("data_root", None) - if data_root is None: - raise ValueError("data_root not found in model configuration") + # Ensure sample_idx is in the data + sample_data["sample_idx"] = sample_idx - # Use pre-created transform or create new one (always use test mode) - if transform_test is None: - transform = CalibrationClassificationTransform( - transform_config=model_cfg.get("transform_config", None), - mode="test", - data_root=data_root, - projection_vis_dir=None, - results_vis_dir=None, - enable_augmentation=False, - ) - else: - transform = transform_test + # Update transform's miscalibration probability (reuse existing transform instance) + transform.miscalibration_probability = miscalibration_probability # Apply transform with miscalibration control results = transform.transform(sample_data) input_data_processed = results["fused_img"] # (H, W, 5) + gt_label = results["gt_label"] # 0=miscalibrated, 1=calibrated # Convert to tensor input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float() # (5, H, W) input_tensor = input_tensor.unsqueeze(0).to(device) # (1, 5, H, W) - return input_tensor - - -def create_test_transform(model_cfg: Config) -> CalibrationClassificationTransform: - """Create test transform instance once for reuse.""" - data_root = model_cfg.get("data_root", None) - if data_root is None: - raise ValueError("data_root not found in model configuration") - - transform_test = CalibrationClassificationTransform( - transform_config=model_cfg.get("transform_config", None), - mode="test", - data_root=data_root, - projection_vis_dir=None, - results_vis_dir=None, - enable_augmentation=False, - ) - - return transform_test + return input_tensor, gt_label def run_onnx_inference( @@ -246,7 +217,12 @@ def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.L raise RuntimeError("Could not find input/output tensor names") # Prepare arrays - input_np = input_tensor.numpy().astype(np.float32) + # Convert torch tensor to numpy (handle both CPU and CUDA tensors) + if hasattr(input_tensor, "cpu"): + input_np = input_tensor.cpu().numpy().astype(np.float32) + else: + input_np = input_tensor.astype(np.float32) + if not input_np.flags["C_CONTIGUOUS"]: input_np = np.ascontiguousarray(input_np) @@ -384,8 +360,20 @@ def evaluate_model( logger.error(f"Failed to load info.pkl: {e}") raise - # Create transform instances once - transform_test = create_test_transform(model_cfg) + # Initialize transform once and reuse it for all samples + data_root = model_cfg.get("data_root", None) + if data_root is None: + raise ValueError("data_root not found in model configuration") + + transform = CalibrationClassificationTransform( + transform_config=model_cfg.get("transform_config", None), + mode="test", + data_root=data_root, + projection_vis_dir=None, + results_vis_dir=None, + enable_augmentation=False, + miscalibration_probability=0.0, # Will be updated per sample + ) # Initialize inference engine if model_type == "onnx": @@ -399,10 +387,37 @@ def evaluate_model( logger.info(f"Using ONNX model: {model_path}") logger.info("Creating ONNX InferenceSession (one-time setup)...") + # Configure execution providers based on device + if device == "cuda": + # Try to use CUDA provider, fall back to CPU if not available + providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] + logger.info("Attempting to use CUDA acceleration for ONNX inference...") + try: + available_providers = ort.get_available_providers() + if "CUDAExecutionProvider" not in available_providers: + logger.warning( + "CUDAExecutionProvider not available. Available providers: {}. " + "Install onnxruntime-gpu for CUDA acceleration: pip install onnxruntime-gpu".format( + available_providers + ) + ) + logger.warning("Falling back to CPU execution") + providers = ["CPUExecutionProvider"] + else: + logger.info("CUDAExecutionProvider is available") + except Exception as e: + logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU") + providers = ["CPUExecutionProvider"] + else: + providers = ["CPUExecutionProvider"] + logger.info("Using CPU for ONNX inference") + # Create session once and reuse it - providers = ["CPUExecutionProvider"] ort_session = ort.InferenceSession(model_path, providers=providers) + # Log which provider is actually being used + logger.info(f"ONNX session using providers: {ort_session.get_providers()}") + # Debug: Print ONNX model input info input_name = ort_session.get_inputs()[0].name input_shape = ort_session.get_inputs()[0].shape @@ -414,6 +429,10 @@ def evaluate_model( inference_func = lambda input_tensor: run_onnx_inference(ort_session, input_tensor, logger) elif model_type == "tensorrt": logger.info(f"Using TensorRT model: {model_path}") + # TensorRT has its own CUDA memory management, force CPU device for data preparation + # if device != "cpu": + # logger.warning(f"TensorRT inference requires CPU device for data preparation. Overriding device from '{device}' to 'cpu'") + # device = "cpu" engine = load_tensorrt_engine(model_path, logger) inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor, logger) else: @@ -431,26 +450,28 @@ def evaluate_model( logger.info(f"Processing sample {sample_idx + 1}/{num_samples}") try: - # Load sample data directly from info.pkl - input_tensor_calibrated = load_sample_data_from_info_pkl( + # Load sample data directly from info.pkl - generate calibrated sample + input_tensor_calibrated, gt_label_calibrated = load_sample_data_from_info_pkl( info_pkl_path, - model_cfg, + transform, sample_idx, device=device, - transform_test=transform_test, + miscalibration_probability=0.0, # Always calibrated ) - input_tensor_miscalibrated = load_sample_data_from_info_pkl( + + # Load sample data again - generate miscalibrated sample + input_tensor_miscalibrated, gt_label_miscalibrated = load_sample_data_from_info_pkl( info_pkl_path, - model_cfg, + transform, sample_idx, device=device, - transform_test=transform_test, + miscalibration_probability=1.0, # Always miscalibrated ) # Test both calibrated and miscalibrated samples test_samples = [ - (input_tensor_calibrated, 1), # calibrated sample - (input_tensor_miscalibrated, 0), # miscalibrated sample + (input_tensor_calibrated, gt_label_calibrated), + (input_tensor_miscalibrated, gt_label_miscalibrated), ] for input_tensor, gt_label in test_samples: @@ -483,6 +504,8 @@ def evaluate_model( logger.info( f"Sample {sample_idx + 1} (GT={gt_label}): Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms" ) + logger.info(f" Raw logits: {logits.squeeze()}") + logger.info(f" Probabilities: {probabilities.squeeze()}") # Clear GPU memory periodically for TensorRT if model_type == "tensorrt" and sample_idx % 10 == 0: @@ -596,7 +619,11 @@ def main(): help="Number of samples to evaluate (default: 10)", ) parser.add_argument( - "--device", type=str, default="cpu", choices=["cpu", "cuda"], help="Device to use for inference" + "--device", + type=str, + default="cpu", + choices=["cpu", "cuda"], + help="Device to use for inference. For ONNX: enables CUDA acceleration (requires onnxruntime-gpu). For TensorRT: always uses CUDA internally regardless of this setting.", ) parser.add_argument( "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging level" From 3cb94ce7771aa49d7e067d36084d3bfdcb514a9a Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 1 Oct 2025 20:49:51 +0900 Subject: [PATCH 08/19] chore: combine evaluation script with deploy script Signed-off-by: vividf --- .../CalibrationStatusClassification/README.md | 72 +++- .../configs/deploy/resnet18_5ch.py | 31 +- .../deploy/main.py | 396 +++++++++++++++++- .../evaluate_inference.py | 47 +-- 4 files changed, 511 insertions(+), 35 deletions(-) diff --git a/projects/CalibrationStatusClassification/README.md b/projects/CalibrationStatusClassification/README.md index 3790ea2b..76ed49b1 100644 --- a/projects/CalibrationStatusClassification/README.md +++ b/projects/CalibrationStatusClassification/README.md @@ -66,14 +66,80 @@ python tools/calibration_classification/train.py projects/CalibrationStatusClass ### 5. Deploy -Example commands for deployment (modify paths if needed): +The deployment script supports model export, verification, and evaluation. -- Custom script (with verification): +#### 5.1 Basic Export (ONNX and TensorRT) + +Export model to ONNX and TensorRT formats: + +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth \ + --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \ + --device cuda:0 \ + --work-dir ./work_dirs/ +``` + +#### 5.2 Export with Verification + +Verify exported models against PyTorch reference on single sample: + +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth \ + --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \ + --sample-idx 0 \ + --device cuda:0 \ + --work-dir ./work_dirs/ \ + --verify +``` + +#### 5.3 Full Model Evaluation + +Evaluate exported models on multiple samples with comprehensive metrics: ```sh -python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth \ + --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \ + --device cuda:0 \ + --work-dir ./work_dirs/ \ + --evaluate \ + --num-samples 50 \ + --verbose ``` +**Evaluation output includes:** +- Overall accuracy and per-class accuracy +- Confusion matrix +- Latency statistics (mean, median, P95, P99, min, max, std) +- Throughput (samples/sec) + +#### 5.4 Evaluate Existing Models (without re-export) + +To evaluate previously exported models without re-exporting: + +```sh +# Evaluate ONNX model +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth \ + --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \ + --device cuda:0 \ + --work-dir ./work_dirs/ \ + --evaluate \ + --num-samples 100 +``` + +**Note:** The script will automatically detect and evaluate existing ONNX/TensorRT models in the work directory. + ## Troubleshooting ## Reference diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py index 729b0ed7..02d66642 100644 --- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py @@ -30,6 +30,20 @@ # - If None with mode="both", will export ONNX first then convert ) +# ============================================================================== +# Evaluation Configuration +# ============================================================================== +evaluation = dict( + enabled=True, # Enable full model evaluation (set to True to run evaluation) + num_samples=100, # Number of samples to evaluate from info.pkl + verbose=True, # Enable verbose logging showing per-sample results + # Optional: Specify models to evaluate (if None, uses exported models from work_dir) + onnx_model="/workspace/work_dirs/end2end.onnx", # Path to ONNX model file to evaluate (e.g., "/path/to/model.onnx") + tensorrt_model="/workspace/work_dirs/end2end.engine", # Path to TensorRT engine file to evaluate (e.g., "/path/to/model.engine") + # Note: If models are None, will automatically detect and evaluate exported models + # Note: Command line args (--evaluate, --num-samples, --verbose) override these settings +) + # ============================================================================== # Codebase Configuration # ============================================================================== @@ -101,13 +115,28 @@ # 2) Set runtime_io.onnx_file = "/path/to/existing/model.onnx" # 3) Run the command above # +# Enable evaluation in config: +# 1) Set evaluation.enabled = True +# 2) Set evaluation.num_samples = 100 # Adjust as needed +# 3) Optionally set evaluation.onnx_model and/or evaluation.tensorrt_model +# 4) Run the command above +# +# Evaluate specific models: +# 1) Set evaluation.enabled = True +# 2) Set evaluation.onnx_model = "/path/to/model.onnx" +# 3) Set evaluation.tensorrt_model = "/path/to/model.engine" +# 4) Run the command above +# # Override config settings via command line: # python projects/CalibrationStatusClassification/deploy/main.py \ # \ # --work-dir ./custom_output \ # --device cuda:1 \ # --info-pkl /path/to/custom/info.pkl \ -# --sample-idx 5 +# --sample-idx 5 \ +# --evaluate \ +# --num-samples 50 \ +# --verbose # # Available precision policies: # - auto: Let TensorRT decide (default, good balance) diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index 32084dc4..1700917d 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -6,12 +6,15 @@ Features: - ONNX export with optimization -- TensorRT conversion -- Dual verification (ONNX + TensorRT) -- Performance benchmarking +- TensorRT conversion with precision policy support +- Dual verification (ONNX + TensorRT) on single samples +- Full model evaluation on multiple samples with metrics +- Performance benchmarking with latency statistics +- Confusion matrix and per-class accuracy analysis """ import argparse +import gc import logging import os import os.path as osp @@ -28,6 +31,7 @@ import pycuda.driver as cuda import tensorrt as trt import torch +import torch.nn.functional as F from mmengine.config import Config from mmpretrain.apis import get_model @@ -201,6 +205,11 @@ def runtime_io_config(self) -> Dict: """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file).""" return self.deploy_cfg.get("runtime_io", {}) + @property + def evaluation_config(self) -> Dict: + """Get evaluation configuration (enabled, num_samples, verbose, model paths).""" + return self.deploy_cfg.get("evaluation", {}) + @property def should_export_onnx(self) -> bool: """Check if ONNX export is requested.""" @@ -277,6 +286,14 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level") parser.add_argument("--info-pkl", help="Override info.pkl path from config") parser.add_argument("--sample-idx", type=int, help="Override sample index from config") + + # Evaluation mode + parser.add_argument("--evaluate", action="store_true", help="Run full evaluation on multiple samples") + parser.add_argument( + "--num-samples", type=int, default=10, help="Number of samples to evaluate (only with --evaluate)" + ) + parser.add_argument("--verbose", action="store_true", help="Enable verbose logging during evaluation") + return parser.parse_args() @@ -651,6 +668,292 @@ def run_verification( logger.info("=" * 50) +def evaluate_exported_model( + model_path: str, + model_type: str, + model_cfg: Config, + info_pkl_path: str, + logger: logging.Logger, + device: str = "cpu", + num_samples: int = 10, + verbose: bool = False, +) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """ + Evaluate exported model (ONNX or TensorRT) using info.pkl data. + + Args: + model_path: Path to model file (.onnx or .engine) + model_type: Type of model ("onnx" or "tensorrt") + model_cfg: Model configuration + info_pkl_path: Path to info.pkl file + logger: Logger instance + device: Device for preprocessing + num_samples: Number of samples to evaluate + verbose: Enable verbose logging + + Returns: + Tuple of (predictions, ground_truth, probabilities, latencies) + """ + + # Load info.pkl data + try: + with open(info_pkl_path, "rb") as f: + info_data = pickle.load(f) + + if "data_list" not in info_data: + raise ValueError("Expected 'data_list' key in info.pkl") + + samples_list = info_data["data_list"] + logger.info(f"Loaded {len(samples_list)} samples from info.pkl") + + # Limit number of samples for evaluation + num_samples = min(num_samples, len(samples_list)) + logger.info(f"Evaluating {num_samples} samples") + + except Exception as e: + logger.error(f"Failed to load info.pkl: {e}") + raise + + # Initialize transform once and reuse it for all samples + data_root = model_cfg.get("data_root", None) + if data_root is None: + raise ValueError("data_root not found in model configuration") + + transform_config = model_cfg.get("transform_config", None) + if transform_config is None: + raise ValueError("transform_config not found in model configuration") + + transform = CalibrationClassificationTransform( + transform_config=transform_config, + mode="test", + max_depth=model_cfg.get("max_depth", 128.0), + dilation_size=model_cfg.get("dilation_size", 1), + undistort=True, + miscalibration_probability=0.0, # Will be updated per sample + enable_augmentation=False, + data_root=data_root, + projection_vis_dir=None, + results_vis_dir=None, + binary_save_dir=None, + ) + + # Initialize inference engine + if model_type == "onnx": + logger.info(f"Using ONNX model: {model_path}") + logger.info("Creating ONNX InferenceSession...") + + # Configure execution providers + if device.startswith("cuda"): + providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] + logger.info("Attempting to use CUDA acceleration for ONNX inference...") + try: + available_providers = ort.get_available_providers() + if "CUDAExecutionProvider" not in available_providers: + logger.warning( + f"CUDAExecutionProvider not available. Available: {available_providers}. " + "Install onnxruntime-gpu for CUDA acceleration" + ) + providers = ["CPUExecutionProvider"] + else: + logger.info("CUDAExecutionProvider is available") + except Exception as e: + logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU") + providers = ["CPUExecutionProvider"] + else: + providers = ["CPUExecutionProvider"] + logger.info("Using CPU for ONNX inference") + + # Create session once and reuse it + ort_session = ort.InferenceSession(model_path, providers=providers) + logger.info(f"ONNX session using providers: {ort_session.get_providers()}") + + # Define inference function + def inference_func(input_tensor): + input_name = ort_session.get_inputs()[0].name + onnx_input = {input_name: input_tensor.cpu().numpy()} + start_time = time.perf_counter() + onnx_output = ort_session.run(None, onnx_input)[0] + latency = (time.perf_counter() - start_time) * 1000 + return onnx_output, latency + + elif model_type == "tensorrt": + logger.info(f"Using TensorRT model: {model_path}") + + # Load TensorRT engine + trt_logger = trt.Logger(trt.Logger.WARNING) + trt.init_libnvinfer_plugins(trt_logger, "") + runtime = trt.Runtime(trt_logger) + + with open(model_path, "rb") as f: + engine = runtime.deserialize_cuda_engine(f.read()) + + if engine is None: + raise RuntimeError("Failed to deserialize TensorRT engine") + + # Define inference function + def inference_func(input_tensor): + return _run_tensorrt_inference(engine, input_tensor.cpu(), logger) + + else: + raise ValueError(f"Unsupported model type: {model_type}") + + # Lists to store results + all_predictions = [] + all_ground_truth = [] + all_probabilities = [] + all_latencies = [] + + # Evaluate samples + for sample_idx in range(num_samples): + if sample_idx % 5 == 0: + logger.info(f"Processing sample {sample_idx + 1}/{num_samples}") + + try: + # Test both calibrated and miscalibrated versions + for miscalibration_prob, expected_label in [(0.0, 1), (1.0, 0)]: + # Update transform's miscalibration probability + transform.miscalibration_probability = miscalibration_prob + + # Load and preprocess sample + sample_data = load_info_pkl_data(info_pkl_path, sample_idx) + sample_data["sample_idx"] = sample_idx + + results = transform.transform(sample_data) + input_data_processed = results["fused_img"] + gt_label = results["gt_label"] + + # Convert to tensor + input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float() + input_tensor = input_tensor.unsqueeze(0).to(device) + + # Run inference + output_np, latency = inference_func(input_tensor) + + if output_np is None: + logger.error(f"Failed to get output for sample {sample_idx}") + continue + + # Convert logits to probabilities + logits = torch.from_numpy(output_np) + probabilities = F.softmax(logits, dim=-1) + predicted_class = torch.argmax(probabilities, dim=-1).item() + confidence = probabilities.max().item() + + # Store results + all_predictions.append(predicted_class) + all_ground_truth.append(gt_label) + all_probabilities.append(probabilities.cpu().numpy()) + all_latencies.append(latency) + + # Print sample results only in verbose mode + if verbose: + logger.info( + f"Sample {sample_idx + 1} (GT={gt_label}): " + f"Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms" + ) + + # Clear GPU memory periodically + if model_type == "tensorrt" and sample_idx % 10 == 0: + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + except Exception as e: + logger.error(f"Error processing sample {sample_idx}: {e}") + continue + + return ( + np.array(all_predictions), + np.array(all_ground_truth), + np.array(all_probabilities), + np.array(all_latencies), + ) + + +def print_evaluation_results( + all_predictions: np.ndarray, + all_ground_truth: np.ndarray, + all_probabilities: np.ndarray, + all_latencies: np.ndarray, + model_type: str, + logger: logging.Logger, +) -> None: + """Print comprehensive evaluation results with metrics and statistics.""" + + if len(all_predictions) == 0: + logger.error("No samples were processed successfully.") + return + + correct_predictions = (all_predictions == all_ground_truth).sum() + total_samples = len(all_predictions) + accuracy = correct_predictions / total_samples + avg_latency = np.mean(all_latencies) + + # Print header + logger.info(f"\n{'='*60}") + logger.info(f"{model_type.upper()} Model Evaluation Results") + logger.info(f"{'='*60}") + logger.info(f"Total samples: {total_samples}") + logger.info(f"Correct predictions: {correct_predictions}") + logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") + logger.info(f"Average latency: {avg_latency:.2f} ms") + + # Calculate per-class accuracy + unique_classes = np.unique(all_ground_truth) + logger.info(f"\nPer-class accuracy:") + for cls in unique_classes: + cls_mask = all_ground_truth == cls + cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum() + cls_total = cls_mask.sum() + cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0 + logger.info( + f" Class {cls} ({LABELS[str(cls)]}): " + f"{cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)" + ) + + # Calculate average confidence + avg_confidence = np.mean([prob.max() for prob in all_probabilities]) + logger.info(f"\nAverage confidence: {avg_confidence:.4f}") + + # Calculate latency statistics + min_latency = np.min(all_latencies) + max_latency = np.max(all_latencies) + std_latency = np.std(all_latencies) + p50_latency = np.percentile(all_latencies, 50) + p95_latency = np.percentile(all_latencies, 95) + p99_latency = np.percentile(all_latencies, 99) + + logger.info(f"\nLatency Statistics:") + logger.info(f" Average: {avg_latency:.2f} ms") + logger.info(f" Median (P50): {p50_latency:.2f} ms") + logger.info(f" P95: {p95_latency:.2f} ms") + logger.info(f" P99: {p99_latency:.2f} ms") + logger.info(f" Min: {min_latency:.2f} ms") + logger.info(f" Max: {max_latency:.2f} ms") + logger.info(f" Std: {std_latency:.2f} ms") + + # Show confusion matrix + logger.info(f"\nConfusion Matrix:") + logger.info(f"{'':>10} Predicted") + logger.info(f"{'Actual':>10} {'0 (misc)':>10} {'1 (calib)':>10}") + logger.info(f"{'-'*32}") + for true_cls in unique_classes: + row_label = f"{true_cls} ({LABELS[str(true_cls)][:4]})" + row = [f"{row_label:>10}"] + for pred_cls in unique_classes: + count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum() + row.append(f"{count:>10}") + logger.info(" ".join(row)) + + logger.info(f"\n{'='*60}") + logger.info(f"Evaluation Summary:") + logger.info(f" Model Type: {model_type.upper()}") + logger.info(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") + logger.info(f" Avg Latency: {avg_latency:.2f} ms") + logger.info(f" Throughput: {1000/avg_latency:.2f} samples/sec") + logger.info(f"{'='*60}\n") + + def main(): """Main deployment function.""" args = parse_args() @@ -751,6 +1054,93 @@ def main(): run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger) + # Get evaluation settings from config and CLI + eval_cfg = config.evaluation_config + should_evaluate = args.evaluate or eval_cfg.get("enabled", False) + num_samples = args.num_samples if args.num_samples != 10 else eval_cfg.get("num_samples", 10) + verbose_mode = args.verbose or eval_cfg.get("verbose", False) + + # Run full evaluation if requested + if should_evaluate: + logger.info(f"\n{'='*60}") + logger.info("Starting full model evaluation...") + logger.info(f"{'='*60}") + + # Determine which models to evaluate + models_to_evaluate = [] + + # Check for ONNX model (config-specified or auto-detected) + eval_onnx_path = eval_cfg.get("onnx_model") + if eval_onnx_path: + # Use config-specified ONNX model + if osp.exists(eval_onnx_path): + models_to_evaluate.append(("onnx", eval_onnx_path)) + logger.info(f"Using config-specified ONNX model: {eval_onnx_path}") + else: + logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}") + elif config.should_export_onnx or existing_onnx: + # Auto-detect exported ONNX model + if onnx_path and osp.exists(onnx_path): + models_to_evaluate.append(("onnx", onnx_path)) + else: + logger.warning(f"ONNX model not found at {onnx_path}, skipping ONNX evaluation") + + # Check for TensorRT model (config-specified or auto-detected) + eval_trt_path = eval_cfg.get("tensorrt_model") + if eval_trt_path: + # Use config-specified TensorRT model + if osp.exists(eval_trt_path): + models_to_evaluate.append(("tensorrt", eval_trt_path)) + logger.info(f"Using config-specified TensorRT model: {eval_trt_path}") + else: + logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}") + elif config.should_export_tensorrt: + # Auto-detect exported TensorRT model + if trt_path and osp.exists(trt_path): + models_to_evaluate.append(("tensorrt", trt_path)) + else: + logger.warning(f"TensorRT model not found at {trt_path}, skipping TensorRT evaluation") + + if not models_to_evaluate: + logger.error( + "No models available for evaluation. Please export models first or specify model paths in config." + ) + else: + # Evaluate each model + for model_type, model_path in models_to_evaluate: + logger.info(f"\nEvaluating {model_type.upper()} model...") + logger.info(f"Model path: {model_path}") + logger.info(f"Number of samples: {num_samples}") + logger.info(f"Verbose mode: {verbose_mode}") + + try: + # Run evaluation + predictions, ground_truth, probabilities, latencies = evaluate_exported_model( + model_path=model_path, + model_type=model_type, + model_cfg=model_cfg, + info_pkl_path=info_pkl, + logger=logger, + device=device.type if isinstance(device, torch.device) else device, + num_samples=num_samples, + verbose=verbose_mode, + ) + + # Print results + print_evaluation_results(predictions, ground_truth, probabilities, latencies, model_type, logger) + + # Cleanup + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + except Exception as e: + logger.error(f"Evaluation failed for {model_type.upper()} model: {e}") + import traceback + + logger.error(traceback.format_exc()) + continue + logger.info("Deployment completed successfully!") # Log what was exported diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py index b4dcf9f8..a32b6127 100644 --- a/tools/calibration_classification/evaluate_inference.py +++ b/tools/calibration_classification/evaluate_inference.py @@ -154,30 +154,17 @@ def run_onnx_inference( logger: logging.Logger, ) -> Tuple[np.ndarray, float]: """Run ONNX inference directly and return output and latency.""" - # Convert input tensor to float32 - input_tensor = input_tensor.float() - - # Debug: Print input tensor info before preprocessing - logger.debug( - f"Input tensor before preprocessing - Shape: {input_tensor.shape}, Dtype: {input_tensor.dtype}, Min: {input_tensor.min():.4f}, Max: {input_tensor.max():.4f}" - ) - - # Add batch dimension if needed (ONNX expects 4D input: batch, channels, height, width) - if input_tensor.dim() == 3: - input_tensor = input_tensor.unsqueeze(0) # Add batch dimension - logger.debug(f"Added batch dimension - Shape: {input_tensor.shape}") - # Get input name from session input_name = ort_session.get_inputs()[0].name - onnx_input = {input_name: input_tensor.cpu().numpy().astype(np.float32)} + + # Convert to numpy (input_tensor is already in correct shape from load_sample_data_from_info_pkl) + onnx_input = {input_name: input_tensor.cpu().numpy()} start_time = time.perf_counter() onnx_output = ort_session.run(None, onnx_input)[0] end_time = time.perf_counter() onnx_latency = (end_time - start_time) * 1000 - logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms") - # Ensure onnx_output is numpy array if not isinstance(onnx_output, np.ndarray): logger.error(f"Unexpected ONNX output type: {type(onnx_output)}") @@ -217,11 +204,18 @@ def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.L raise RuntimeError("Could not find input/output tensor names") # Prepare arrays - # Convert torch tensor to numpy (handle both CPU and CUDA tensors) - if hasattr(input_tensor, "cpu"): - input_np = input_tensor.cpu().numpy().astype(np.float32) - else: - input_np = input_tensor.astype(np.float32) + # Convert torch tensor to numpy (input_tensor is guaranteed to be on CPU) + input_np = input_tensor.numpy().astype(np.float32) + + # Validate input data quality + if np.isnan(input_np).any(): + raise ValueError(f"Input contains NaN values") + if np.isinf(input_np).any(): + raise ValueError(f"Input contains Inf values") + + logger.debug( + f"Input data stats: min={input_np.min():.4f}, max={input_np.max():.4f}, mean={input_np.mean():.4f}" + ) if not input_np.flags["C_CONTIGUOUS"]: input_np = np.ascontiguousarray(input_np) @@ -429,12 +423,9 @@ def evaluate_model( inference_func = lambda input_tensor: run_onnx_inference(ort_session, input_tensor, logger) elif model_type == "tensorrt": logger.info(f"Using TensorRT model: {model_path}") - # TensorRT has its own CUDA memory management, force CPU device for data preparation - # if device != "cpu": - # logger.warning(f"TensorRT inference requires CPU device for data preparation. Overriding device from '{device}' to 'cpu'") - # device = "cpu" engine = load_tensorrt_engine(model_path, logger) - inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor, logger) + # TensorRT requires CPU tensors - it manages GPU memory internally + inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor.cpu(), logger) else: raise ValueError(f"Unsupported model type: {model_type}") @@ -622,8 +613,8 @@ def main(): "--device", type=str, default="cpu", - choices=["cpu", "cuda"], - help="Device to use for inference. For ONNX: enables CUDA acceleration (requires onnxruntime-gpu). For TensorRT: always uses CUDA internally regardless of this setting.", + choices=["cpu", "cuda:0"], + help="Device to use for data preprocessing. For ONNX: also enables CUDA acceleration if onnxruntime-gpu is installed. For TensorRT: data is always transferred to CPU before inference (TensorRT manages GPU memory internally).", ) parser.add_argument( "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging level" From befb3dffcfdaf3b4f76fab4f8ef2456d83a36805 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 1 Oct 2025 20:56:21 +0900 Subject: [PATCH 09/19] chore: add onnxruntime with cuda Signed-off-by: vividf --- projects/CalibrationStatusClassification/Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/projects/CalibrationStatusClassification/Dockerfile b/projects/CalibrationStatusClassification/Dockerfile index 77153382..e4c5578a 100644 --- a/projects/CalibrationStatusClassification/Dockerfile +++ b/projects/CalibrationStatusClassification/Dockerfile @@ -2,6 +2,13 @@ ARG AWML_BASE_IMAGE="autoware-ml:latest" FROM ${AWML_BASE_IMAGE} ARG TRT_VERSION=10.8.0.43 +# Install system dependencies +RUN apt-get update && apt-get install -y \ + libcudnn9-cuda-12 \ + libcudnn9-dev-cuda-12 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + # Install pip dependencies RUN python3 -m pip --no-cache-dir install \ onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ \ From 878bc8b2ffe8caf18933cea14bcdf8832de62087 Mon Sep 17 00:00:00 2001 From: vividf Date: Fri, 3 Oct 2025 10:53:11 +0900 Subject: [PATCH 10/19] chore: fix procedure Signed-off-by: vividf --- README.md | 1 + ...orial_calibration_status_classification.md | 77 ++++++ docs/tutorial/tutorial_detection_3d.md | 1 + .../CalibrationStatusClassification/README.md | 72 +----- .../configs/deploy/resnet18_5ch.py | 20 +- .../deploy/main.py | 168 +++++++------ tools/calibration_classification/README.md | 233 ++++++++++++++---- .../visualize_lidar_camera_projection.py | 44 ++-- 8 files changed, 397 insertions(+), 219 deletions(-) create mode 100644 docs/tutorial/tutorial_calibration_status_classification.md diff --git a/README.md b/README.md index a4c280d9..9e720af4 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ We hope that `AWML` promotes the community between Autoware and ML researchers a ## Get started - [Start training for 3D object detection](/docs/tutorial/tutorial_detection_3d.md) +- [Start training for Classification Status Classifier](/docs/tutorial/tutorial_calibration_status_classification.md) ## Docs ### Design documents diff --git a/docs/tutorial/tutorial_calibration_status_classification.md b/docs/tutorial/tutorial_calibration_status_classification.md new file mode 100644 index 00000000..8aae7511 --- /dev/null +++ b/docs/tutorial/tutorial_calibration_status_classification.md @@ -0,0 +1,77 @@ +# Tutorial: Calibration Status Classification + +## 1. Setup environment + +See [tutorial_installation](/docs/tutorial/tutorial_installation.md) to set up the environment. + +### Build Docker for CalibrationStatusClassification + +```sh +DOCKER_BUILDKIT=1 docker build -t autoware-ml-calib projects/CalibrationStatusClassification/ +``` + +## 2. Train and evaluation + +In this tutorial, we use [CalibrationStatusClassification](/projects/CalibrationStatusClassification/). +If you want to know the tools in detail, please see [calibration_classification](/tools/calibration_classification/) and [CalibrationStatusClassification](/projects/CalibrationStatusClassification/). + +### 2.1. Prepare T4dataset + +- Run docker + +```sh +docker run -it --rm --gpus all --shm-size=64g --name awml-calib -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-calib +``` + +- Create info files for T4dataset + +```sh +python tools/calibration_classification/create_data_t4dataset.py --config /workspace/autoware_ml/configs/calibration_classification/dataset/t4dataset/gen2_base.py --version gen2_base --root_path ./data/t4dataset -o ./data/t4dataset/calibration_info/ +``` + +- (Optional) Process only specific cameras + +```sh +python tools/calibration_classification/create_data_t4dataset.py --config /workspace/autoware_ml/configs/calibration_classification/dataset/t4dataset/gen2_base.py --version gen2_base --root_path ./data/t4dataset -o ./data/t4dataset/calibration_info/ --target_cameras CAM_FRONT CAM_LEFT CAM_RIGHT +``` + +### 2.2. Visualization (Optional) + +Visualize calibration data before training to verify sensor alignment: + +```sh +python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --data_root data/t4dataset --output_dir ./work_dirs/calibration_visualization +``` + +### 2.3. Train + +- Single GPU: + +```sh +python tools/calibration_classification/train.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py +``` + +- Multi GPU (example with 2 GPUs): + +```sh +./tools/calibration_classification/dist_train.sh projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py 2 +``` + +### 2.4. Evaluation + +```sh +python tools/calibration_classification/test.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py epoch_25.pth --out results.pkl +``` + +### 2.5. Deploy the ONNX file + +Export model to ONNX and TensorRT: + +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth +``` + +For INT8 quantization and advanced deployment options, see [Deployment guide](/tools/calibration_classification/README.md#6-deployment). diff --git a/docs/tutorial/tutorial_detection_3d.md b/docs/tutorial/tutorial_detection_3d.md index 4f2728e0..ef4cdfba 100644 --- a/docs/tutorial/tutorial_detection_3d.md +++ b/docs/tutorial/tutorial_detection_3d.md @@ -1,3 +1,4 @@ +# Tutorial: Detection 3D ## 1. Setup environment diff --git a/projects/CalibrationStatusClassification/README.md b/projects/CalibrationStatusClassification/README.md index 76ed49b1..0940e7ad 100644 --- a/projects/CalibrationStatusClassification/README.md +++ b/projects/CalibrationStatusClassification/README.md @@ -64,81 +64,27 @@ python tools/calibration_classification/train.py projects/CalibrationStatusClass ./tools/calibration_classification/dist_train.sh projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py 2 ``` -### 5. Deploy - -The deployment script supports model export, verification, and evaluation. - -#### 5.1 Basic Export (ONNX and TensorRT) - -Export model to ONNX and TensorRT formats: +### 5. Test +Run testing ```sh -python projects/CalibrationStatusClassification/deploy/main.py \ - projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - checkpoint.pth \ - --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \ - --device cuda:0 \ - --work-dir ./work_dirs/ +python tools/calibration_classification/test.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py epoch_25.pth --out {output_file} ``` -#### 5.2 Export with Verification -Verify exported models against PyTorch reference on single sample: - -```sh -python projects/CalibrationStatusClassification/deploy/main.py \ - projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - checkpoint.pth \ - --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \ - --sample-idx 0 \ - --device cuda:0 \ - --work-dir ./work_dirs/ \ - --verify -``` +### 6. Deploy -#### 5.3 Full Model Evaluation - -Evaluate exported models on multiple samples with comprehensive metrics: +The deployment script provides model export, verification, and evaluation support: ```sh python projects/CalibrationStatusClassification/deploy/main.py \ - projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - checkpoint.pth \ - --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \ - --device cuda:0 \ - --work-dir ./work_dirs/ \ - --evaluate \ - --num-samples 50 \ - --verbose + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth ``` -**Evaluation output includes:** -- Overall accuracy and per-class accuracy -- Confusion matrix -- Latency statistics (mean, median, P95, P99, min, max, std) -- Throughput (samples/sec) - -#### 5.4 Evaluate Existing Models (without re-export) - -To evaluate previously exported models without re-exporting: - -```sh -# Evaluate ONNX model -python projects/CalibrationStatusClassification/deploy/main.py \ - projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - checkpoint.pth \ - --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \ - --device cuda:0 \ - --work-dir ./work_dirs/ \ - --evaluate \ - --num-samples 100 -``` +For more details on configuration options such as INT8 quantization and custom settings, please refer to the [Deployment guide](../../tools/calibration_classification/README.md#6-deployment) -**Note:** The script will automatically detect and evaluate existing ONNX/TensorRT models in the work directory. ## Troubleshooting diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py index 02d66642..4888538e 100644 --- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py @@ -9,10 +9,12 @@ # Export Configuration # ============================================================================== export = dict( - mode="both", # Export mode: "onnx", "trt", or "both" + mode="none", # Export mode: "onnx", "trt", "both", or "none" # - "onnx": Export to ONNX only # - "trt": Convert to TensorRT only (requires onnx_file in runtime_io) # - "both": Export to ONNX then convert to TensorRT + # - "none": Skip export, only run evaluation on existing models + # (requires evaluation.onnx_model and/or evaluation.tensorrt_model) verify=True, # Run verification comparing PyTorch/ONNX/TRT outputs device="cuda:0", # Device for export (use "cuda:0" or "cpu") # Note: TensorRT always requires CUDA, will auto-switch if needed @@ -36,7 +38,7 @@ evaluation = dict( enabled=True, # Enable full model evaluation (set to True to run evaluation) num_samples=100, # Number of samples to evaluate from info.pkl - verbose=True, # Enable verbose logging showing per-sample results + verbose=False, # Enable verbose logging showing per-sample results # Optional: Specify models to evaluate (if None, uses exported models from work_dir) onnx_model="/workspace/work_dirs/end2end.onnx", # Path to ONNX model file to evaluate (e.g., "/path/to/model.onnx") tensorrt_model="/workspace/work_dirs/end2end.engine", # Path to TensorRT engine file to evaluate (e.g., "/path/to/model.engine") @@ -121,11 +123,15 @@ # 3) Optionally set evaluation.onnx_model and/or evaluation.tensorrt_model # 4) Run the command above # -# Evaluate specific models: -# 1) Set evaluation.enabled = True -# 2) Set evaluation.onnx_model = "/path/to/model.onnx" -# 3) Set evaluation.tensorrt_model = "/path/to/model.engine" -# 4) Run the command above +# Evaluate specific models (without export): +# 1) Set export.mode = "none" +# 2) Set evaluation.enabled = True +# 3) Set evaluation.onnx_model = "/path/to/model.onnx" +# 4) Set evaluation.tensorrt_model = "/path/to/model.engine" +# 5) Run the command WITHOUT checkpoint: +# python projects/CalibrationStatusClassification/deploy/main.py \ +# projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ +# projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py # # Override config settings via command line: # python projects/CalibrationStatusClassification/deploy/main.py \ diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index 1700917d..3989b3b9 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -181,7 +181,7 @@ def _validate_config(self) -> None: raise ValueError("Missing 'runtime_io' section in deploy config.") # Validate export mode - valid_modes = ["onnx", "trt", "both"] + valid_modes = ["onnx", "trt", "both", "none"] mode = self.export_config.get("mode", "both") if mode not in valid_modes: raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}") @@ -278,7 +278,9 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument("deploy_cfg", help="Deploy config path") parser.add_argument("model_cfg", help="Model config path") - parser.add_argument("checkpoint", help="Model checkpoint path") + parser.add_argument( + "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')" + ) # Optional overrides parser.add_argument("--work-dir", help="Override output directory from config") @@ -805,7 +807,7 @@ def inference_func(input_tensor): # Evaluate samples for sample_idx in range(num_samples): - if sample_idx % 5 == 0: + if sample_idx % 100 == 0: logger.info(f"Processing sample {sample_idx + 1}/{num_samples}") try: @@ -983,76 +985,90 @@ def main(): mmengine.mkdir_or_exist(osp.abspath(work_dir)) logger.info(f"Working directory: {work_dir}") logger.info(f"Device: {device}") - logger.info(f"Export mode: {config.export_config.get('mode', 'both')}") + export_mode = config.export_config.get("mode", "both") + logger.info(f"Export mode: {export_mode}") + + # Check if eval-only mode + is_eval_only = export_mode == "none" + + # Validate checkpoint requirement + if not is_eval_only and not args.checkpoint: + logger.error("Checkpoint is required when export mode is not 'none'") + return # Determine export paths onnx_path = None trt_path = None - if config.should_export_onnx: - onnx_settings = config.onnx_settings - onnx_path = osp.join(work_dir, onnx_settings["save_file"]) - - if config.should_export_tensorrt: - # Use existing ONNX if provided, otherwise use the one we'll export - if existing_onnx and not config.should_export_onnx: - onnx_path = existing_onnx - if not osp.exists(onnx_path): - logger.error(f"Provided ONNX file does not exist: {onnx_path}") + if not is_eval_only: + if config.should_export_onnx: + onnx_settings = config.onnx_settings + onnx_path = osp.join(work_dir, onnx_settings["save_file"]) + + if config.should_export_tensorrt: + # Use existing ONNX if provided, otherwise use the one we'll export + if existing_onnx and not config.should_export_onnx: + onnx_path = existing_onnx + if not osp.exists(onnx_path): + logger.error(f"Provided ONNX file does not exist: {onnx_path}") + return + logger.info(f"Using existing ONNX file: {onnx_path}") + elif not onnx_path: + # Need ONNX for TensorRT but neither export nor existing file specified + logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.") return - logger.info(f"Using existing ONNX file: {onnx_path}") - elif not onnx_path: - # Need ONNX for TensorRT but neither export nor existing file specified - logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.") - return - - # Set TensorRT output path - onnx_settings = config.onnx_settings - trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") - trt_path = osp.join(work_dir, trt_file) - - # Load model - logger.info(f"Loading model from checkpoint: {args.checkpoint}") - device = torch.device(device) - model = get_model(model_cfg, args.checkpoint, device=device) - - # Load sample data - logger.info(f"Loading sample data from info.pkl: {info_pkl}") - input_tensor_calibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 0.0, sample_idx, device=device) - input_tensor_miscalibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 1.0, sample_idx, device=device) - - # Export models - if config.should_export_onnx: - export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger) - - if config.should_export_tensorrt: - logger.info("Converting ONNX to TensorRT...") - - # Ensure CUDA device for TensorRT - if device.type != "cuda": - logger.warning("TensorRT requires CUDA device, switching to cuda") - device = torch.device("cuda") - input_tensor_calibrated = input_tensor_calibrated.to(device) - input_tensor_miscalibrated = input_tensor_miscalibrated.to(device) - - success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger) - if success: - logger.info(f"TensorRT conversion successful: {trt_path}") - else: - logger.error("TensorRT conversion failed") - # Run verification if requested - if config.should_verify: - logger.info( - "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..." + # Set TensorRT output path + onnx_settings = config.onnx_settings + trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") + trt_path = osp.join(work_dir, trt_file) + + # Load model + logger.info(f"Loading model from checkpoint: {args.checkpoint}") + device = torch.device(device) + model = get_model(model_cfg, args.checkpoint, device=device) + + # Load sample data + logger.info(f"Loading sample data from info.pkl: {info_pkl}") + input_tensor_calibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 0.0, sample_idx, device=device) + input_tensor_miscalibrated = load_sample_data_from_info_pkl( + info_pkl, model_cfg, 1.0, sample_idx, device=device ) - input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated} - # Only verify formats that were exported or provided - onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None - trt_path_for_verify = trt_path if config.should_export_tensorrt else None + # Export models + if config.should_export_onnx: + export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger) + + if config.should_export_tensorrt: + logger.info("Converting ONNX to TensorRT...") - run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger) + # Ensure CUDA device for TensorRT + if device.type != "cuda": + logger.warning("TensorRT requires CUDA device, switching to cuda") + device = torch.device("cuda") + input_tensor_calibrated = input_tensor_calibrated.to(device) + input_tensor_miscalibrated = input_tensor_miscalibrated.to(device) + + success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger) + if success: + logger.info(f"TensorRT conversion successful: {trt_path}") + else: + logger.error("TensorRT conversion failed") + + # Run verification if requested + if config.should_verify: + logger.info( + "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..." + ) + input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated} + + # Only verify formats that were exported or provided + onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None + trt_path_for_verify = trt_path if config.should_export_tensorrt else None + + run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger) + else: + logger.info("Evaluation-only mode: Skipping model loading and export") # Get evaluation settings from config and CLI eval_cfg = config.evaluation_config @@ -1078,8 +1094,8 @@ def main(): logger.info(f"Using config-specified ONNX model: {eval_onnx_path}") else: logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}") - elif config.should_export_onnx or existing_onnx: - # Auto-detect exported ONNX model + elif not is_eval_only and (config.should_export_onnx or existing_onnx): + # Auto-detect exported ONNX model (only if we just exported) if onnx_path and osp.exists(onnx_path): models_to_evaluate.append(("onnx", onnx_path)) else: @@ -1094,8 +1110,8 @@ def main(): logger.info(f"Using config-specified TensorRT model: {eval_trt_path}") else: logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}") - elif config.should_export_tensorrt: - # Auto-detect exported TensorRT model + elif not is_eval_only and config.should_export_tensorrt: + # Auto-detect exported TensorRT model (only if we just exported) if trt_path and osp.exists(trt_path): models_to_evaluate.append(("tensorrt", trt_path)) else: @@ -1144,13 +1160,17 @@ def main(): logger.info("Deployment completed successfully!") # Log what was exported - exported_formats = [] - if config.should_export_onnx: - exported_formats.append("ONNX") - if config.should_export_tensorrt: - exported_formats.append("TensorRT") - - logger.info(f"Exported formats: {', '.join(exported_formats)}") + if not is_eval_only: + exported_formats = [] + if config.should_export_onnx: + exported_formats.append("ONNX") + if config.should_export_tensorrt: + exported_formats.append("TensorRT") + + if exported_formats: + logger.info(f"Exported formats: {', '.join(exported_formats)}") + else: + logger.info("Evaluation-only mode: No models were exported") if __name__ == "__main__": diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index 9de32e1a..f7188dbf 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -1,6 +1,6 @@ # tools/calibration_classification -The pipeline to make the model. +The pipeline to develop the calibration classification model. It contains training, evaluation, and visualization for Calibration classification. - [Support priority](https://github.com/tier4/AWML/blob/main/docs/design/autoware_ml_design.md#support-priority): Tier B @@ -23,7 +23,7 @@ Prepare the dataset you use. - Run docker ```sh -docker run -it --rm --gpus --shm-size=64g --name awml -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml +docker run -it --rm --gpus all --shm-size=64g --name awml-calib -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-calib ``` - Create info files for T4dataset X2 Gen2 @@ -159,17 +159,6 @@ Understanding visualization configuration is crucial for calibration classificat # In config file test_projection_vis_dir = "./test_projection_vis_t4dataset/" test_results_vis_dir = "./test_results_vis_t4dataset/" - -# In transform pipeline -dict( - type="CalibrationClassificationTransform", - mode="test", - undistort=True, - enable_augmentation=False, - data_root=data_root, - projection_vis_dir=test_projection_vis_dir, # LiDAR projection visualization - results_vis_dir=test_results_vis_dir, # Model prediction visualization -), ``` ### 3.3. Usage Strategy @@ -187,7 +176,7 @@ dict( - `projection_vis_dir/`: LiDAR points overlaid on camera images - `results_vis_dir/`: Classification results with predicted labels -## 4. Train +## 4. Training ### 4.1. Environment Setup Set `CUBLAS_WORKSPACE_CONFIG` for deterministic behavior, please check this [nvidia doc](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility) for more info @@ -195,9 +184,9 @@ Set `CUBLAS_WORKSPACE_CONFIG` for deterministic behavior, please check this [nvi export CUBLAS_WORKSPACE_CONFIG=:4096:8 ``` -### 4.2. Train +### 4.2. Run Training -- Train in general by below command. +- Run training with the following command: ```sh python tools/calibration_classification/train.py {config_file} @@ -208,7 +197,7 @@ python tools/calibration_classification/train.py {config_file} python tools/calibration_classification/train.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py ``` -- You can use docker command for training as below. +- Alternatively, you can use Docker: ```sh docker run -it --rm --gpus --name autoware-ml --shm-size=64g -d -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml bash -c 'python tools/calibration_classification/train.py {config_file}' @@ -243,9 +232,9 @@ The deployment system supports exporting models to ONNX and TensorRT formats wit ### 6.1. Deployment Configuration -The deployment uses a **config-first approach** where all settings are defined in the deployment config file. The config has three main sections: #### Configuration Structure +The config has four main sections: ```python # Export configuration - controls export behavior @@ -272,6 +261,15 @@ backend_config = dict( ), model_inputs=[...], # Dynamic shape configuration ) + +# Evaluation configuration - for model evaluation +evaluation = dict( + enabled=False, # Enable evaluation by default + num_samples=100, # Number of samples to evaluate + verbose=False, # Enable detailed logging + onnx_path=None, # Optional: path to existing ONNX model + tensorrt_path=None, # Optional: path to existing TensorRT engine +) ``` #### Precision Policies @@ -328,57 +326,196 @@ In config file: **Note:** The checkpoint is used for verification. To skip verification, set `export.verify = False` in config. +### 6.4. Evaluate ONNX and TensorRT Models +#### Evaluate Exported Models +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth \ + --evaluate \ + --num-samples 100 \ + --verbose +``` +#### Command Line Arguments -### 6.4. Evaluate ONNX and TensorRT Models +- `--evaluate`: Enable full model evaluation on multiple samples +- `--num-samples`: Number of samples to evaluate (default: 10) +- `--verbose`: Enable verbose logging during evaluation + +## 7. INT8 Quantization Guide + +INT8 quantization reduces model size and improves inference speed by converting 32-bit floating-point weights to 8-bit integers. This guide covers Post-Training Quantization (PTQ) using NVIDIA's ModelOpt tool. + +### 7.1. Prerequisites + +Before starting quantization, ensure you have: -#### ONNX Model Evaluation +1. **Trained model checkpoint** (e.g., `epoch_25.pth`) +2. **ONNX model** exported using Section 6 +3. **Dataset info files** (from Section 2.1) -```bash -python tools/calibration_classification/evaluate_inference.py \ - --onnx work_dirs/end2end.onnx \ - --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl +--- + +### 7.2. Generate Calibration Data + +Calibration data is used to determine optimal quantization parameters. Use representative samples from your training dataset. + +#### 7.2.1. Basic Usage + +```sh +python tools/calibration_classification/toolkit.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl \ + --data_root data/t4dataset \ + --npz_output_path calibration_file.npz \ + --indices 200 ``` -#### TensorRT Model Evaluation +#### 7.2.2. Memory Requirements + +Calibration data is loaded entirely into GPU memory during quantization. Calculate required memory: +For 32 GB memory, the maximum you can use is approximately: 32 GB / 1860 x 2880 x 5 x 4 Bytes, around 321 images. +Use `--indices 200` to limit to first 200 samples. + + +### 7.3. Build Optimization Docker Environment + +The quantization requires NVIDIA ModelOpt, which is provided in a separate Docker image. -```bash -python tools/calibration_classification/evaluate_inference.py \ - --tensorrt work_dirs/end2end.engine \ - --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl +```sh +DOCKER_BUILDKIT=1 docker build -t autoware-ml-calib-opt \ + -f projects/CalibrationStatusClassification/DockerfileOpt . ``` -#### Command Line Arguments +**What's included:** +- CUDA 12.6 + cuDNN +- ONNX 1.16.2 + ONNXRuntime 1.23.0 +- NVIDIA ModelOpt 0.33.1 + +### 7.4. Launch Optimization Container + +```sh +docker run -it --rm --gpus all --shm-size=32g \ + --name awml-opt -p 6006:6006 \ + -v $PWD:/workspace \ + -v $PWD/data:/workspace/data \ + autoware-ml-calib-opt +``` + +Remember to put your calibration.npz to the workspace/data + +### 7.5. Run INT8 Quantization + +Inside the Docker container, run the quantization: + +```sh +python3 -m modelopt.onnx.quantization \ + --onnx_path=work_dirs/end2end.onnx \ + --quantize_mode=int8 \ + --calibration_data_path=calibration_file.npz +``` -- `--onnx`: Path to ONNX model file (mutually exclusive with `--tensorrt`) -- `--tensorrt`: Path to TensorRT engine file (mutually exclusive with `--onnx`) -- `--model-cfg`: Path to model config file -- `--info-pkl`: Path to dataset info file -- `--device`: Device to use for inference (`cpu` or `cuda`, default: `cpu`) -- `--log-level`: Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, default: `INFO`) +**Output**: `work_dirs/end2end.quant.onnx` -### 6.5. INT8 Quantization -Set the number of images you want for calibration. Note that you need to consider the size of your memory. -For 32 GB memory, the maximum you can use is approximately: 1860 x 2880 x 5 x 4 Bytes / 32 GB +### 7.6. Evaluate Quantized Model -Therefore, please limit the calibration data using the indices parameter +#### 7.6.1. Evaluate ONNX INT8 Model +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + epoch_25.pth \ + --evaluate \ + --num-samples 100 +``` + +**Note**: Configure the deployment config to use the quantized ONNX model: +```python +runtime_io = dict( + onnx_file="work_dirs/end2end.quant.onnx", # Point to quantized ONNX + # ... +) +``` + +#### 7.6.2. Convert to TensorRT INT8 Engine + +For optimal performance, convert the quantized ONNX to TensorRT: + +**In deployment config** (`projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py`): ```python -python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200 +export = dict( + mode="trt", # Convert ONNX to TensorRT + verify=True, + device="cuda:0", + work_dir="/workspace/work_dirs", +) + +runtime_io = dict( + onnx_file="work_dirs/end2end.quant.onnx", # Use quantized ONNX + info_pkl="data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl", + sample_idx=0, +) + +backend_config = dict( + type="tensorrt", + common_config=dict( + max_workspace_size=1 << 30, + precision_policy="strongly_typed", # Preserve INT8 quantization + ), + # ... model_inputs configuration +) +``` + +**Run conversion:** +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + epoch_25.pth # For verification only ``` +**Output**: `work_dirs/end2end.engine` (INT8 TensorRT engine) +#### 7.6.3. Evaluate TensorRT INT8 Engine + +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + epoch_25.pth \ + --evaluate \ + --num-samples 100 +``` +**Note**: The deployment config should point to the TensorRT engine: ```python -DOCKER_BUILDKIT=1 docker build -t autoware-ml-calib-opt -f projects/CalibrationStatusClassification/DockerfileOpt . -docker run -it --rm --gpus all --shm-size=32g --name awml-opt -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-calib-opt -# Access the optimization docker -python3 -m modelopt.onnx.quantization --onnx_path=end2end.onnx --quantize_mode=int8 --calibration_data_path=calibration_file.npz +runtime_io = dict( + onnx_file="work_dirs/end2end.quant.onnx", + # ... +) -# After getting the end2end.quant.onnx, you can evaluate with quant onnx or conver to int8 engine by following section 6.3 +# TensorRT engine will be automatically loaded from work_dirs/end2end.engine ``` + +### 7.7. Expected Results + +**Typical performance improvements:** + +| Metric | FP32 ONNX | INT8 ONNX | INT8 TensorRT | +|-----------------|-----------|-----------|---------------| +| Model Size | 100% | ~25% | ~25% | +| Inference Speed | 1.0× | 1.5-2.0× | 2.5-4.0× | +| Accuracy Loss | baseline | <1% | <1% | +| Memory Usage | 100% | ~40% | ~30% | + +**Note**: Actual results depend on: +- Model architecture (ResNet18, etc.) +- Input resolution +- Hardware (GPU generation) +- Calibration data quality diff --git a/tools/calibration_classification/visualize_lidar_camera_projection.py b/tools/calibration_classification/visualize_lidar_camera_projection.py index 78e692df..2a2bf85d 100644 --- a/tools/calibration_classification/visualize_lidar_camera_projection.py +++ b/tools/calibration_classification/visualize_lidar_camera_projection.py @@ -118,14 +118,14 @@ def _validate_sample_structure(self, sample: Dict[str, Any]) -> bool: return all(key in sample for key in required_keys) def process_single_sample( - self, sample: Dict[str, Any], sample_idx: int, save_npz: bool = False + self, sample: Dict[str, Any], sample_idx: int, npz_output_path: Optional[str] = None ) -> Optional[Dict[str, Any]]: """ Process a single sample using the transform. Args: sample: Sample dictionary to process. sample_idx: Index of the sample for logging purposes. - save_npz: Whether to collect result for NPZ saving. + npz_output_path: Path for NPZ output. If provided, result will be collected for later saving. Returns: Transformed sample data if successful, None otherwise. """ @@ -136,8 +136,8 @@ def process_single_sample( result = self.transform(sample) - # Store the result for NPZ saving only if requested - if save_npz: + # Store the result for NPZ saving if output path is provided + if npz_output_path: self.collected_results.append(result["fused_img"]) self.logger.info(f"Successfully processed sample {sample_idx}") @@ -177,7 +177,6 @@ def process_samples( info_pkl_path: str, indices: Optional[List[int]] = None, visualize: bool = False, - save_npz: bool = False, npz_output_path: Optional[str] = None, ) -> None: """ @@ -186,8 +185,7 @@ def process_samples( info_pkl_path: Path to the info.pkl file. indices: Optional list of sample indices to process. If None, all samples are processed. visualize: Whether to generate visualizations (requires output_dir to be set). - save_npz: Whether to collect results for NPZ saving. - npz_output_path: Path for saving NPZ file (only used if save_npz=True). + npz_output_path: Path for saving NPZ file. If provided, NPZ saving is automatically enabled. """ try: samples_list = self.load_info_pkl(info_pkl_path) @@ -208,15 +206,15 @@ def process_samples( self.logger.info(f"Processing {len(samples_to_process)} samples") if visualize: self.logger.info("Visualization enabled") - if save_npz: + if npz_output_path: self.logger.info("NPZ saving enabled") # Process each sample for i, sample in enumerate(samples_to_process): - self.process_single_sample(sample, i + 1, save_npz=save_npz) + self.process_single_sample(sample, i + 1, npz_output_path=npz_output_path) # Save NPZ file if requested - if save_npz and npz_output_path: + if npz_output_path: self.save_npz_file(npz_output_path) self.logger.info("Finished processing all samples") @@ -230,7 +228,6 @@ def process_single_sample_from_file( info_pkl_path: str, sample_idx: int, visualize: bool = False, - save_npz: bool = False, npz_output_path: Optional[str] = None, ) -> Optional[Dict[str, Any]]: """ @@ -239,8 +236,7 @@ def process_single_sample_from_file( info_pkl_path: Path to the info.pkl file. sample_idx: Index of the sample to process (0-based). visualize: Whether to generate visualizations (requires output_dir to be set). - save_npz: Whether to collect results for NPZ saving. - npz_output_path: Path for saving NPZ file (only used if save_npz=True). + npz_output_path: Path for saving NPZ file. If provided, NPZ saving is automatically enabled. Returns: Transformed sample data if successful, None otherwise. """ @@ -254,20 +250,20 @@ def process_single_sample_from_file( self.logger.info(f"Processing sample {sample_idx} from {len(samples_list)} total samples") if visualize: self.logger.info("Visualization enabled") - if save_npz: + if npz_output_path: self.logger.info("NPZ saving enabled") # Clear previous results for single sample processing self.collected_results = [] sample = samples_list[sample_idx] - result = self.process_single_sample(sample, sample_idx, save_npz=save_npz) + result = self.process_single_sample(sample, sample_idx, npz_output_path=npz_output_path) if result and visualize and self.output_dir: self.logger.info(f"Visualizations saved to directory: {self.output_dir}") # Save NPZ file if requested - if save_npz and npz_output_path and result: + if npz_output_path and result: self.save_npz_file(npz_output_path) return result @@ -289,15 +285,15 @@ def create_argument_parser() -> argparse.ArgumentParser: # Process all samples with visualization python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize # Process all samples and save as NPZ - python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --npz_output_path results.npz --save_npz + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --npz_output_path results.npz # Process all samples with both visualization and NPZ saving - python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --npz_output_path results.npz --save_npz + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --npz_output_path results.npz # Process specific sample with visualization python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --sample_idx 0 # Process specific indices with NPZ saving - python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --save_npz --npz_output_path results.npz --indices 0 1 2 + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --npz_output_path results.npz --indices 0 1 2 # Process first 5 samples (indices 0, 1, 2, 3, 4) with both features - python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --save_npz --npz_output_path results.npz --indices 5 + python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --npz_output_path results.npz --indices 5 """ parser = argparse.ArgumentParser( @@ -318,8 +314,7 @@ def create_argument_parser() -> argparse.ArgumentParser: help="Specific sample indices to process (0-based), or a single number N to process indices 0 to N-1", ) parser.add_argument("--visualize", action="store_true", help="Enable visualization (requires --output_dir)") - parser.add_argument("--save_npz", action="store_true", help="Enable NPZ saving (requires --npz_output_path)") - parser.add_argument("--npz_output_path", help="Path for saving NPZ file (only used with --save_npz)") + parser.add_argument("--npz_output_path", help="Path for saving NPZ file (automatically enables NPZ saving)") parser.add_argument( "--show_point_details", action="store_true", help="Show detailed point cloud field information" ) @@ -340,9 +335,6 @@ def main() -> None: if args.visualize and not args.output_dir: parser.error("--visualize requires --output_dir to be specified") - if args.save_npz and not args.npz_output_path: - parser.error("--save_npz requires --npz_output_path to be specified") - # Load model configuration model_cfg = Config.fromfile(args.model_cfg) @@ -369,7 +361,6 @@ def main() -> None: args.info_pkl, args.sample_idx, visualize=args.visualize, - save_npz=args.save_npz, npz_output_path=args.npz_output_path, ) else: @@ -378,7 +369,6 @@ def main() -> None: args.info_pkl, processed_indices, visualize=args.visualize, - save_npz=args.save_npz, npz_output_path=args.npz_output_path, ) From 76df38473d57708e839526227d406e6d4616e7b3 Mon Sep 17 00:00:00 2001 From: vividf Date: Fri, 3 Oct 2025 11:04:17 +0900 Subject: [PATCH 11/19] chore: remove file Signed-off-by: vividf --- .../evaluate_inference.py | 691 ------------------ 1 file changed, 691 deletions(-) delete mode 100644 tools/calibration_classification/evaluate_inference.py diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py deleted file mode 100644 index a32b6127..00000000 --- a/tools/calibration_classification/evaluate_inference.py +++ /dev/null @@ -1,691 +0,0 @@ -#!/usr/bin/env python3 -""" -Integrated model evaluation script for calibration classification. -Supports both ONNX and TensorRT inference modes. -Simplified version that directly uses info.pkl files instead of dataset. -""" - -import argparse -import gc -import logging -import os -import pickle -import signal -import sys -import time -from typing import Any, Dict, Optional, Tuple - -import numpy as np -import torch -import torch.nn.functional as F -from mmengine.config import Config - -# Now we can import the class directly -from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import ( - CalibrationClassificationTransform, -) - -# TensorRT imports (only if needed) - match original test_tensorrt.py -try: - import pycuda.autoinit - import pycuda.driver as cuda - import tensorrt as trt - - TENSORRT_AVAILABLE = True -except ImportError: - TENSORRT_AVAILABLE = False - -# Constants -LABELS = {"0": "miscalibrated", "1": "calibrated"} - - -def signal_handler(signum, frame): - """Handle segmentation faults and other signals gracefully.""" - print(f"\nReceived signal {signum}. Cleaning up...") - try: - if torch.cuda.is_available(): - torch.cuda.empty_cache() - gc.collect() - print("Cleanup completed.") - except: - pass - sys.exit(1) - - -# Register signal handlers -signal.signal(signal.SIGSEGV, signal_handler) -signal.signal(signal.SIGINT, signal_handler) - - -def setup_logging(level: str = "INFO") -> logging.Logger: - """Setup logging configuration.""" - logging.basicConfig(level=getattr(logging, level.upper()), format="%(asctime)s - %(levelname)s - %(message)s") - return logging.getLogger(__name__) - - -def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]: - """ - Load a single sample from info.pkl file. - Args: - info_pkl_path: Path to the info.pkl file - sample_idx: Index of the sample to load (default: 0) - Returns: - Sample dictionary with the required structure for CalibrationClassificationTransform - Raises: - FileNotFoundError: If info.pkl file doesn't exist - ValueError: If data format is unexpected or sample index is invalid - """ - if not os.path.exists(info_pkl_path): - raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}") - - try: - with open(info_pkl_path, "rb") as f: - info_data = pickle.load(f) - except Exception as e: - raise ValueError(f"Failed to load info.pkl file: {e}") - - # Extract samples from info.pkl - if isinstance(info_data, dict): - if "data_list" in info_data: - samples_list = info_data["data_list"] - else: - raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}") - else: - raise ValueError(f"Expected dict format, got {type(info_data)}") - - if not samples_list: - raise ValueError("No samples found in info.pkl") - - if sample_idx >= len(samples_list): - raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})") - - sample = samples_list[sample_idx] - - # Validate sample structure - required_keys = ["image", "lidar_points"] - if not all(key in sample for key in required_keys): - raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}") - - return sample - - -def load_sample_data_from_info_pkl( - info_pkl_path: str, - transform: CalibrationClassificationTransform, - sample_idx: int = 0, - device: str = "cpu", - miscalibration_probability: float = 0.0, -) -> Tuple[torch.Tensor, int]: - """ - Load and preprocess sample data from info.pkl using CalibrationClassificationTransform. - Args: - info_pkl_path: Path to the info.pkl file - transform: Reusable CalibrationClassificationTransform instance - sample_idx: Index of the sample to load (default: 0) - device: Device to load tensor on - miscalibration_probability: Probability to generate miscalibrated samples (0.0=calibrated, 1.0=miscalibrated) - Returns: - Tuple of (preprocessed tensor ready for model inference, ground truth label) - """ - # Load sample data from info.pkl - sample_data = load_info_pkl_data(info_pkl_path, sample_idx) - - # Ensure sample_idx is in the data - sample_data["sample_idx"] = sample_idx - - # Update transform's miscalibration probability (reuse existing transform instance) - transform.miscalibration_probability = miscalibration_probability - - # Apply transform with miscalibration control - results = transform.transform(sample_data) - input_data_processed = results["fused_img"] # (H, W, 5) - gt_label = results["gt_label"] # 0=miscalibrated, 1=calibrated - - # Convert to tensor - input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float() # (5, H, W) - input_tensor = input_tensor.unsqueeze(0).to(device) # (1, 5, H, W) - - return input_tensor, gt_label - - -def run_onnx_inference( - ort_session, - input_tensor: torch.Tensor, - logger: logging.Logger, -) -> Tuple[np.ndarray, float]: - """Run ONNX inference directly and return output and latency.""" - # Get input name from session - input_name = ort_session.get_inputs()[0].name - - # Convert to numpy (input_tensor is already in correct shape from load_sample_data_from_info_pkl) - onnx_input = {input_name: input_tensor.cpu().numpy()} - - start_time = time.perf_counter() - onnx_output = ort_session.run(None, onnx_input)[0] - end_time = time.perf_counter() - onnx_latency = (end_time - start_time) * 1000 - - # Ensure onnx_output is numpy array - if not isinstance(onnx_output, np.ndarray): - logger.error(f"Unexpected ONNX output type: {type(onnx_output)}") - return None, 0.0 - - return onnx_output, onnx_latency - - -def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]: - """Run TensorRT inference and return output with timing.""" - if not TENSORRT_AVAILABLE: - raise ImportError("TensorRT and PyCUDA are required for TensorRT inference. Please install them.") - - context = None - stream = None - start = None - end = None - d_input = None - d_output = None - - try: - context = engine.create_execution_context() - stream = cuda.Stream() - start = cuda.Event() - end = cuda.Event() - - # Get tensor names and shapes - input_name, output_name = None, None - for i in range(engine.num_io_tensors): - tensor_name = engine.get_tensor_name(i) - if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT: - input_name = tensor_name - elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT: - output_name = tensor_name - - if input_name is None or output_name is None: - raise RuntimeError("Could not find input/output tensor names") - - # Prepare arrays - # Convert torch tensor to numpy (input_tensor is guaranteed to be on CPU) - input_np = input_tensor.numpy().astype(np.float32) - - # Validate input data quality - if np.isnan(input_np).any(): - raise ValueError(f"Input contains NaN values") - if np.isinf(input_np).any(): - raise ValueError(f"Input contains Inf values") - - logger.debug( - f"Input data stats: min={input_np.min():.4f}, max={input_np.max():.4f}, mean={input_np.mean():.4f}" - ) - - if not input_np.flags["C_CONTIGUOUS"]: - input_np = np.ascontiguousarray(input_np) - - # Validate input shape - expected_shape = engine.get_tensor_shape(input_name) - - # Check if shapes are compatible (considering -1 as dynamic dimension) - def shapes_compatible(actual_shape, expected_shape): - """Check if actual shape is compatible with expected shape (which may contain -1 for dynamic dims).""" - if len(actual_shape) != len(expected_shape): - return False - for actual_dim, expected_dim in zip(actual_shape, expected_shape): - if expected_dim != -1 and actual_dim != expected_dim: - return False - return True - - if not shapes_compatible(input_np.shape, expected_shape): - # Only warn/error if shapes are truly incompatible - if len(input_np.shape) == len(expected_shape) - 1 and expected_shape[0] == 1: - # Add batch dimension if missing - try: - input_np = np.expand_dims(input_np, axis=0) - logger.info(f"Added batch dimension to input: {input_np.shape}") - except Exception as e: - raise RuntimeError( - f"Cannot add batch dimension to input from {input_np.shape} to {expected_shape}: {e}" - ) - else: - raise RuntimeError( - f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}. Please ensure input has correct batch dimension." - ) - else: - # Shapes are compatible (dynamic dimensions match), use actual shape - if tuple(expected_shape) != input_np.shape: - logger.debug(f"Using dynamic input shape {input_np.shape} (engine expects {expected_shape})") - - context.set_input_shape(input_name, input_np.shape) - output_shape = context.get_tensor_shape(output_name) - output_np = np.empty(output_shape, dtype=np.float32) - if not output_np.flags["C_CONTIGUOUS"]: - output_np = np.ascontiguousarray(output_np) - - # Allocate GPU memory - d_input = cuda.mem_alloc(input_np.nbytes) - d_output = cuda.mem_alloc(output_np.nbytes) - - # Set tensor addresses - context.set_tensor_address(input_name, int(d_input)) - context.set_tensor_address(output_name, int(d_output)) - - # Run inference with timing - cuda.memcpy_htod_async(d_input, input_np, stream) - start.record(stream) - context.execute_async_v3(stream_handle=stream.handle) - end.record(stream) - cuda.memcpy_dtoh_async(output_np, d_output, stream) - stream.synchronize() - - latency = end.time_since(start) - return output_np, latency - - except Exception as e: - logger.error(f"TensorRT inference failed: {e}") - raise - finally: - # Cleanup with better error handling - try: - if d_input is not None: - d_input.free() - except Exception as e: - logger.warning(f"Failed to free input memory: {e}") - - try: - if d_output is not None: - d_output.free() - except Exception as e: - logger.warning(f"Failed to free output memory: {e}") - - # Note: Don't try to free stream, start, end, or context as they are managed by TensorRT - # and may cause issues if freed manually - - -def load_tensorrt_engine(engine_path: str, logger: logging.Logger): - """Load TensorRT engine from file.""" - if not TENSORRT_AVAILABLE: - raise ImportError("TensorRT is required for TensorRT inference. Please install it.") - - trt_logger = trt.Logger(trt.Logger.WARNING) - - try: - with open(engine_path, "rb") as f: - engine_bytes = f.read() - engine = trt.Runtime(trt_logger).deserialize_cuda_engine(engine_bytes) - if engine is None: - raise RuntimeError("Failed to deserialize TensorRT engine") - return engine - except Exception as e: - logger.error(f"Error loading TensorRT engine: {e}") - logger.error("This might be due to TensorRT version incompatibility.") - logger.error("Please rebuild the TensorRT engine with the current TensorRT version.") - raise - - -def evaluate_model( - model_path: str, - model_type: str, - model_cfg_path: str, - info_pkl_path: str, - logger: logging.Logger, - device: str = "cpu", - num_samples: int = 10, - verbose: bool = False, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """Evaluate model using info.pkl data and return predictions, ground truth, probabilities, and latencies.""" - - # Load model config - model_cfg = Config.fromfile(model_cfg_path) - - # Load info.pkl data - try: - with open(info_pkl_path, "rb") as f: - info_data = pickle.load(f) - - if "data_list" not in info_data: - raise ValueError("Expected 'data_list' key in info.pkl") - - samples_list = info_data["data_list"] - logger.info(f"Loaded {len(samples_list)} samples from info.pkl") - - # Limit number of samples for evaluation - num_samples = min(num_samples, len(samples_list)) - logger.info(f"Evaluating {num_samples} samples") - - except Exception as e: - logger.error(f"Failed to load info.pkl: {e}") - raise - - # Initialize transform once and reuse it for all samples - data_root = model_cfg.get("data_root", None) - if data_root is None: - raise ValueError("data_root not found in model configuration") - - transform = CalibrationClassificationTransform( - transform_config=model_cfg.get("transform_config", None), - mode="test", - data_root=data_root, - projection_vis_dir=None, - results_vis_dir=None, - enable_augmentation=False, - miscalibration_probability=0.0, # Will be updated per sample - ) - - # Initialize inference engine - if model_type == "onnx": - try: - import onnxruntime as ort - except ImportError: - raise ImportError( - "onnxruntime is required for ONNX inference. Please install it with: pip install onnxruntime" - ) - - logger.info(f"Using ONNX model: {model_path}") - logger.info("Creating ONNX InferenceSession (one-time setup)...") - - # Configure execution providers based on device - if device == "cuda": - # Try to use CUDA provider, fall back to CPU if not available - providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] - logger.info("Attempting to use CUDA acceleration for ONNX inference...") - try: - available_providers = ort.get_available_providers() - if "CUDAExecutionProvider" not in available_providers: - logger.warning( - "CUDAExecutionProvider not available. Available providers: {}. " - "Install onnxruntime-gpu for CUDA acceleration: pip install onnxruntime-gpu".format( - available_providers - ) - ) - logger.warning("Falling back to CPU execution") - providers = ["CPUExecutionProvider"] - else: - logger.info("CUDAExecutionProvider is available") - except Exception as e: - logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU") - providers = ["CPUExecutionProvider"] - else: - providers = ["CPUExecutionProvider"] - logger.info("Using CPU for ONNX inference") - - # Create session once and reuse it - ort_session = ort.InferenceSession(model_path, providers=providers) - - # Log which provider is actually being used - logger.info(f"ONNX session using providers: {ort_session.get_providers()}") - - # Debug: Print ONNX model input info - input_name = ort_session.get_inputs()[0].name - input_shape = ort_session.get_inputs()[0].shape - input_type = ort_session.get_inputs()[0].type - logger.info(f"ONNX model expects - Input name: {input_name}, Shape: {input_shape}, Type: {input_type}") - logger.info("ONNX InferenceSession created successfully.") - - # Bind session to closure for reuse - inference_func = lambda input_tensor: run_onnx_inference(ort_session, input_tensor, logger) - elif model_type == "tensorrt": - logger.info(f"Using TensorRT model: {model_path}") - engine = load_tensorrt_engine(model_path, logger) - # TensorRT requires CPU tensors - it manages GPU memory internally - inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor.cpu(), logger) - else: - raise ValueError(f"Unsupported model type: {model_type}") - - # Lists to store results - all_predictions = [] - all_ground_truth = [] - all_probabilities = [] - all_latencies = [] - - # Evaluate samples - for sample_idx in range(num_samples): - if sample_idx % 5 == 0: - logger.info(f"Processing sample {sample_idx + 1}/{num_samples}") - - try: - # Load sample data directly from info.pkl - generate calibrated sample - input_tensor_calibrated, gt_label_calibrated = load_sample_data_from_info_pkl( - info_pkl_path, - transform, - sample_idx, - device=device, - miscalibration_probability=0.0, # Always calibrated - ) - - # Load sample data again - generate miscalibrated sample - input_tensor_miscalibrated, gt_label_miscalibrated = load_sample_data_from_info_pkl( - info_pkl_path, - transform, - sample_idx, - device=device, - miscalibration_probability=1.0, # Always miscalibrated - ) - - # Test both calibrated and miscalibrated samples - test_samples = [ - (input_tensor_calibrated, gt_label_calibrated), - (input_tensor_miscalibrated, gt_label_miscalibrated), - ] - - for input_tensor, gt_label in test_samples: - # Debug: Print input tensor info only in verbose mode - if verbose: - logger.info(f"Sample {sample_idx + 1} (GT={gt_label}) input tensor:") - logger.info(f" Dtype: {input_tensor.dtype}") - - # Run inference - output_np, latency = inference_func(input_tensor) - - if output_np is None: - logger.error(f"Failed to get output for sample {sample_idx}") - continue - - # Convert logits to probabilities - logits = torch.from_numpy(output_np) - probabilities = F.softmax(logits, dim=-1) - predicted_class = torch.argmax(probabilities, dim=-1).item() - confidence = probabilities.max().item() - - # Store results - all_predictions.append(predicted_class) - all_ground_truth.append(gt_label) - all_probabilities.append(probabilities.cpu().numpy()) - all_latencies.append(latency) - - # Print sample results only in verbose mode - if verbose: - logger.info( - f"Sample {sample_idx + 1} (GT={gt_label}): Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms" - ) - logger.info(f" Raw logits: {logits.squeeze()}") - logger.info(f" Probabilities: {probabilities.squeeze()}") - - # Clear GPU memory periodically for TensorRT - if model_type == "tensorrt" and sample_idx % 10 == 0: - if torch.cuda.is_available(): - torch.cuda.empty_cache() - gc.collect() - - except Exception as e: - logger.error(f"Error processing sample {sample_idx}: {e}") - continue - - return ( - np.array(all_predictions), - np.array(all_ground_truth), - np.array(all_probabilities), - np.array(all_latencies), - ) - - -def print_results( - all_predictions: np.ndarray, - all_ground_truth: np.ndarray, - all_probabilities: np.ndarray, - all_latencies: np.ndarray, - model_type: str, - logger: logging.Logger, -): - """Print evaluation results.""" - if len(all_predictions) == 0: - logger.error("No samples were processed successfully.") - return - - correct_predictions = (all_predictions == all_ground_truth).sum() - total_samples = len(all_predictions) - accuracy = correct_predictions / total_samples - avg_latency = np.mean(all_latencies) - - # Print results - logger.info(f"\n{'='*50}") - logger.info(f"{model_type.upper()} Model Evaluation Results") - logger.info(f"{'='*50}") - logger.info(f"Total samples: {total_samples}") - logger.info(f"Correct predictions: {correct_predictions}") - logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") - logger.info(f"Average latency: {avg_latency:.2f} ms") - - # Calculate per-class accuracy - unique_classes = np.unique(all_ground_truth) - logger.info(f"\nPer-class accuracy:") - for cls in unique_classes: - cls_mask = all_ground_truth == cls - cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum() - cls_total = cls_mask.sum() - cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0 - logger.info( - f" Class {cls} ({LABELS[str(cls)]}): {cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)" - ) - - # Calculate average confidence - avg_confidence = np.mean([prob.max() for prob in all_probabilities]) - logger.info(f"\nAverage confidence: {avg_confidence:.4f}") - - # Calculate latency statistics - min_latency = np.min(all_latencies) - max_latency = np.max(all_latencies) - std_latency = np.std(all_latencies) - - logger.info(f"\nLatency Statistics:") - logger.info(f" Average latency: {avg_latency:.2f} ms") - logger.info(f" Min latency: {min_latency:.2f} ms") - logger.info(f" Max latency: {max_latency:.2f} ms") - logger.info(f" Std latency: {std_latency:.2f} ms") - - # Show confusion matrix - logger.info(f"\nConfusion Matrix:") - logger.info(f"Predicted ->") - logger.info(f"Actual 0 1") - for true_cls in unique_classes: - row = [] - for pred_cls in unique_classes: - count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum() - row.append(f"{count:4d}") - logger.info(f" {true_cls} {' '.join(row)}") - - logger.info(f"\n{model_type.upper()} model evaluation completed successfully!") - logger.info(f"Model accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") - logger.info(f"Average latency: {avg_latency:.2f} ms") - - -def main(): - """Main function.""" - parser = argparse.ArgumentParser(description="Evaluate calibration classification model using info.pkl") - parser.add_argument("--onnx", type=str, help="Path to ONNX model file") - parser.add_argument("--tensorrt", type=str, help="Path to TensorRT engine file") - parser.add_argument( - "--model-cfg", - type=str, - default="projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py", - help="Path to model config file", - ) - parser.add_argument( - "--info-pkl", - type=str, - required=True, - help="Path to info.pkl file containing calibration data", - ) - parser.add_argument( - "--num-samples", - type=int, - default=10, - help="Number of samples to evaluate (default: 10)", - ) - parser.add_argument( - "--device", - type=str, - default="cpu", - choices=["cpu", "cuda:0"], - help="Device to use for data preprocessing. For ONNX: also enables CUDA acceleration if onnxruntime-gpu is installed. For TensorRT: data is always transferred to CPU before inference (TensorRT manages GPU memory internally).", - ) - parser.add_argument( - "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging level" - ) - parser.add_argument( - "--verbose", action="store_true", help="Enable verbose logging for detailed sample information" - ) - - args = parser.parse_args() - - # Validate arguments - if not args.onnx and not args.tensorrt: - parser.error("Either --onnx or --tensorrt must be specified") - - if args.onnx and args.tensorrt: - parser.error("Only one of --onnx or --tensorrt can be specified") - - # Setup logging - logger = setup_logging(args.log_level) - - try: - # Determine model type and path - if args.onnx: - model_type = "onnx" - model_path = args.onnx - else: - model_type = "tensorrt" - model_path = args.tensorrt - - # Check if files exist - if not os.path.exists(model_path): - raise FileNotFoundError(f"Model file not found: {model_path}") - if not os.path.exists(args.model_cfg): - raise FileNotFoundError(f"Model config file not found: {args.model_cfg}") - if not os.path.exists(args.info_pkl): - raise FileNotFoundError(f"Info.pkl file not found: {args.info_pkl}") - - logger.info(f"Starting {model_type.upper()} model evaluation...") - logger.info(f"Model path: {model_path}") - logger.info(f"Model config: {args.model_cfg}") - logger.info(f"Info.pkl: {args.info_pkl}") - logger.info(f"Device: {args.device}") - logger.info(f"Number of samples: {args.num_samples}") - logger.info(f"Verbose logging: {args.verbose}") - - # Evaluate model - all_predictions, all_ground_truth, all_probabilities, all_latencies = evaluate_model( - model_path, model_type, args.model_cfg, args.info_pkl, logger, args.device, args.num_samples, args.verbose - ) - - # Print results - print_results(all_predictions, all_ground_truth, all_probabilities, all_latencies, model_type, logger) - - # Cleanup - if torch.cuda.is_available(): - torch.cuda.empty_cache() - gc.collect() - - logger.info("Script completed successfully!") - - except Exception as e: - logger.error(f"Error in main execution: {e}") - # Cleanup on error - try: - if torch.cuda.is_available(): - torch.cuda.empty_cache() - gc.collect() - except: - pass - sys.exit(1) - - -if __name__ == "__main__": - main() From 524ada7cf3c9dbc51c9b2d627dfa10933e222b90 Mon Sep 17 00:00:00 2001 From: vividf Date: Mon, 6 Oct 2025 15:48:38 +0900 Subject: [PATCH 12/19] chore: update evaluation Signed-off-by: vividf --- .../deploy/main.py | 52 ++++++++----------- tools/calibration_classification/README.md | 5 -- 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index 3989b3b9..b030d0b2 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -1085,37 +1085,31 @@ def main(): # Determine which models to evaluate models_to_evaluate = [] - # Check for ONNX model (config-specified or auto-detected) + # Get model paths from config eval_onnx_path = eval_cfg.get("onnx_model") - if eval_onnx_path: - # Use config-specified ONNX model - if osp.exists(eval_onnx_path): - models_to_evaluate.append(("onnx", eval_onnx_path)) - logger.info(f"Using config-specified ONNX model: {eval_onnx_path}") - else: - logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}") - elif not is_eval_only and (config.should_export_onnx or existing_onnx): - # Auto-detect exported ONNX model (only if we just exported) - if onnx_path and osp.exists(onnx_path): - models_to_evaluate.append(("onnx", onnx_path)) - else: - logger.warning(f"ONNX model not found at {onnx_path}, skipping ONNX evaluation") - - # Check for TensorRT model (config-specified or auto-detected) eval_trt_path = eval_cfg.get("tensorrt_model") - if eval_trt_path: - # Use config-specified TensorRT model - if osp.exists(eval_trt_path): - models_to_evaluate.append(("tensorrt", eval_trt_path)) - logger.info(f"Using config-specified TensorRT model: {eval_trt_path}") - else: - logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}") - elif not is_eval_only and config.should_export_tensorrt: - # Auto-detect exported TensorRT model (only if we just exported) - if trt_path and osp.exists(trt_path): - models_to_evaluate.append(("tensorrt", trt_path)) - else: - logger.warning(f"TensorRT model not found at {trt_path}, skipping TensorRT evaluation") + + # If both paths are None, skip evaluation (no auto-detection) + if eval_onnx_path is None and eval_trt_path is None: + logger.warning("Both onnx_model and tensorrt_model are None. Skipping evaluation.") + logger.warning("To enable evaluation, specify at least one model path in the config.") + else: + # Only evaluate models that are explicitly specified (not None) + if eval_onnx_path is not None: + # Evaluate ONNX model + if osp.exists(eval_onnx_path): + models_to_evaluate.append(("onnx", eval_onnx_path)) + logger.info(f"Using config-specified ONNX model: {eval_onnx_path}") + else: + logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}") + + if eval_trt_path is not None: + # Evaluate TensorRT model + if osp.exists(eval_trt_path): + models_to_evaluate.append(("tensorrt", eval_trt_path)) + logger.info(f"Using config-specified TensorRT model: {eval_trt_path}") + else: + logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}") if not models_to_evaluate: logger.error( diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index f7188dbf..45d35d76 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -335,15 +335,10 @@ python projects/CalibrationStatusClassification/deploy/main.py \ projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ checkpoint.pth \ - --evaluate \ - --num-samples 100 \ --verbose ``` #### Command Line Arguments - -- `--evaluate`: Enable full model evaluation on multiple samples -- `--num-samples`: Number of samples to evaluate (default: 10) - `--verbose`: Enable verbose logging during evaluation ## 7. INT8 Quantization Guide From 7cde05a87c9aba114ffa989075e8e1e8cb14a3ea Mon Sep 17 00:00:00 2001 From: vividf Date: Mon, 6 Oct 2025 17:28:58 +0900 Subject: [PATCH 13/19] chore: update readme Signed-off-by: vividf --- tools/calibration_classification/README.md | 130 ++++++--------------- 1 file changed, 35 insertions(+), 95 deletions(-) diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index 45d35d76..713238e2 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -283,38 +283,14 @@ Control TensorRT inference precision with the `precision_policy` parameter: | `fp32_tf32` | Tensor Cores for FP32 | Ampere+ GPUs, FP32 speedup | | `strongly_typed` | Enforces explicit type constraints, preserves QDQ (Quantize-Dequantize) nodes | INT8 quantized models from QAT or PTQ with explicit Q/DQ ops | -### 6.2. Basic Usage - -#### Export to Both ONNX and TensorRT - -All settings come from the config file: - -```sh -python projects/CalibrationStatusClassification/deploy/main.py \ - projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - checkpoint.pth -``` - -#### Override Config Settings via CLI - -```sh -python projects/CalibrationStatusClassification/deploy/main.py \ - projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - checkpoint.pth \ - --work-dir ./custom_output \ - --device cuda:1 \ - --info-pkl /path/to/custom/info.pkl \ - --sample-idx 5 -``` - ### 6.3. Export Modes -#### Export to ONNX Only +#### Export from checkpoint to Both ONNX and TensorRT +In config file, set `export.mode = "both"`. -In config file, set `export.mode = "onnx"`. +#### Export from checkpoint to ONNX Only +In config file, set `export.mode = "onnx"`. #### Convert Existing ONNX to TensorRT @@ -322,9 +298,16 @@ In config file: - Set `export.mode = "trt"` - Set `runtime_io.onnx_file = "/path/to/model.onnx"` - **Note:** The checkpoint is used for verification. To skip verification, set `export.verify = False` in config. +#### Export +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ + checkpoint.pth +``` + ### 6.4. Evaluate ONNX and TensorRT Models @@ -334,32 +317,20 @@ In config file: python projects/CalibrationStatusClassification/deploy/main.py \ projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - checkpoint.pth \ - --verbose + checkpoint.pth ``` -#### Command Line Arguments -- `--verbose`: Enable verbose logging during evaluation ## 7. INT8 Quantization Guide INT8 quantization reduces model size and improves inference speed by converting 32-bit floating-point weights to 8-bit integers. This guide covers Post-Training Quantization (PTQ) using NVIDIA's ModelOpt tool. -### 7.1. Prerequisites - -Before starting quantization, ensure you have: - -1. **Trained model checkpoint** (e.g., `epoch_25.pth`) -2. **ONNX model** exported using Section 6 -3. **Dataset info files** (from Section 2.1) - ---- -### 7.2. Generate Calibration Data +### 7.1. Generate Calibration Data Calibration data is used to determine optimal quantization parameters. Use representative samples from your training dataset. -#### 7.2.1. Basic Usage +#### 7.1.1. Basic Usage ```sh python tools/calibration_classification/toolkit.py \ @@ -376,6 +347,8 @@ Calibration data is loaded entirely into GPU memory during quantization. Calcula For 32 GB memory, the maximum you can use is approximately: 32 GB / 1860 x 2880 x 5 x 4 Bytes, around 321 images. Use `--indices 200` to limit to first 200 samples. +Additionally, ensure sufficient disk storage is available on the target device for calibration file output. + ### 7.3. Build Optimization Docker Environment @@ -419,26 +392,7 @@ python3 -m modelopt.onnx.quantization \ ### 7.6. Evaluate Quantized Model -#### 7.6.1. Evaluate ONNX INT8 Model - -```sh -python projects/CalibrationStatusClassification/deploy/main.py \ - projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - epoch_25.pth \ - --evaluate \ - --num-samples 100 -``` - -**Note**: Configure the deployment config to use the quantized ONNX model: -```python -runtime_io = dict( - onnx_file="work_dirs/end2end.quant.onnx", # Point to quantized ONNX - # ... -) -``` - -#### 7.6.2. Convert to TensorRT INT8 Engine +#### 7.6.1. Convert to TensorRT INT8 Engine For optimal performance, convert the quantized ONNX to TensorRT: @@ -472,45 +426,31 @@ backend_config = dict( python projects/CalibrationStatusClassification/deploy/main.py \ projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - epoch_25.pth # For verification only + epoch_25.pth ``` **Output**: `work_dirs/end2end.engine` (INT8 TensorRT engine) #### 7.6.3. Evaluate TensorRT INT8 Engine -```sh -python projects/CalibrationStatusClassification/deploy/main.py \ - projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - epoch_25.pth \ - --evaluate \ - --num-samples 100 -``` - -**Note**: The deployment config should point to the TensorRT engine: +In config file, set `export.mode = "none"` and `"evaluation.enabled=True"`. ```python -runtime_io = dict( - onnx_file="work_dirs/end2end.quant.onnx", - # ... +export = dict( + mode="none", # Export mode: "onnx", "trt", "both", or "none" + ... ) -# TensorRT engine will be automatically loaded from work_dirs/end2end.engine +evaluation = dict( + enabled=True, + ... + onnx_model=None, + tensorrt_model="/workspace/work_dirs/end2end.engine", +) ``` -### 7.7. Expected Results - -**Typical performance improvements:** - -| Metric | FP32 ONNX | INT8 ONNX | INT8 TensorRT | -|-----------------|-----------|-----------|---------------| -| Model Size | 100% | ~25% | ~25% | -| Inference Speed | 1.0× | 1.5-2.0× | 2.5-4.0× | -| Accuracy Loss | baseline | <1% | <1% | -| Memory Usage | 100% | ~40% | ~30% | - -**Note**: Actual results depend on: -- Model architecture (ResNet18, etc.) -- Input resolution -- Hardware (GPU generation) -- Calibration data quality +**Evaluate INT8 engine** +```sh +python projects/CalibrationStatusClassification/deploy/main.py \ + projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py +``` From 19b0093a0e25a2519b4a39851d0eecec47a5b495 Mon Sep 17 00:00:00 2001 From: vividf Date: Mon, 6 Oct 2025 17:42:43 +0900 Subject: [PATCH 14/19] chore: change naming Signed-off-by: vividf --- .../deploy/main.py | 25 ++++++------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index b030d0b2..5efcffba 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -163,7 +163,7 @@ def load_sample_data_from_info_pkl( class DeploymentConfig: - """Enhanced configuration container for deployment settings with validation.""" + """Configuration container for deployment settings.""" def __init__(self, deploy_cfg: Config): self.deploy_cfg = deploy_cfg @@ -287,14 +287,6 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--device", help="Override device from config") parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level") parser.add_argument("--info-pkl", help="Override info.pkl path from config") - parser.add_argument("--sample-idx", type=int, help="Override sample index from config") - - # Evaluation mode - parser.add_argument("--evaluate", action="store_true", help="Run full evaluation on multiple samples") - parser.add_argument( - "--num-samples", type=int, default=10, help="Number of samples to evaluate (only with --evaluate)" - ) - parser.add_argument("--verbose", action="store_true", help="Enable verbose logging during evaluation") return parser.parse_args() @@ -969,11 +961,10 @@ def main(): logger.info(f"Loading model config from: {args.model_cfg}") model_cfg = Config.fromfile(args.model_cfg) - # Get settings from config with CLI overrides - work_dir = args.work_dir if args.work_dir else config.work_dir - device = args.device if args.device else config.device - info_pkl = args.info_pkl if args.info_pkl else config.runtime_io_config.get("info_pkl") - sample_idx = args.sample_idx if args.sample_idx is not None else config.runtime_io_config.get("sample_idx", 0) + work_dir = config.work_dir + device = config.device + info_pkl = config.runtime_io_config.get("info_pkl") + sample_idx = config.runtime_io_config.get("sample_idx", 0) existing_onnx = config.runtime_io_config.get("onnx_file") # Validate required parameters @@ -1072,9 +1063,9 @@ def main(): # Get evaluation settings from config and CLI eval_cfg = config.evaluation_config - should_evaluate = args.evaluate or eval_cfg.get("enabled", False) - num_samples = args.num_samples if args.num_samples != 10 else eval_cfg.get("num_samples", 10) - verbose_mode = args.verbose or eval_cfg.get("verbose", False) + should_evaluate = eval_cfg.get("enabled", False) + num_samples = eval_cfg.get("num_samples", 10) + verbose_mode = eval_cfg.get("verbose", False) # Run full evaluation if requested if should_evaluate: From 2a816e427911319ea6ae66f92c3327b31e291a46 Mon Sep 17 00:00:00 2001 From: vividf Date: Tue, 7 Oct 2025 11:53:05 +0900 Subject: [PATCH 15/19] chore: update deploy config Signed-off-by: vividf --- .../configs/deploy/resnet18_5ch.py | 80 +++---------------- 1 file changed, 13 insertions(+), 67 deletions(-) diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py index 4888538e..54496f6b 100644 --- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py @@ -1,4 +1,3 @@ -# projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py # Deployment configuration for ResNet18 5-channel calibration classification model # # 1. export: Controls export behavior (mode, verification, device, output directory) @@ -27,7 +26,7 @@ runtime_io = dict( info_pkl="data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl", sample_idx=0, # Sample index to use for export and verification - onnx_file=None, # Optional: Path to existing ONNX file + onnx_file="/workspace/work_dirs/end2end.onnx", # Optional: Path to existing ONNX file # - If provided with mode="trt", will convert this ONNX to TensorRT # - If None with mode="both", will export ONNX first then convert ) @@ -37,8 +36,8 @@ # ============================================================================== evaluation = dict( enabled=True, # Enable full model evaluation (set to True to run evaluation) - num_samples=100, # Number of samples to evaluate from info.pkl - verbose=False, # Enable verbose logging showing per-sample results + num_samples=10, # Number of samples to evaluate from info.pkl + verbose=True, # Enable verbose logging showing per-sample results # Optional: Specify models to evaluate (if None, uses exported models from work_dir) onnx_model="/workspace/work_dirs/end2end.onnx", # Path to ONNX model file to evaluate (e.g., "/path/to/model.onnx") tensorrt_model="/workspace/work_dirs/end2end.engine", # Path to TensorRT engine file to evaluate (e.g., "/path/to/model.engine") @@ -63,7 +62,7 @@ # - "fp16": Enable FP16 mode for faster inference with slight accuracy trade-off # - "fp32_tf32": Enable TF32 mode (Tensor Cores for FP32 operations on Ampere+) # - "strongly_typed": Enforce strict type checking (prevents automatic precision conversion) - precision_policy="fp32_tf32", + precision_policy="fp16", ), # Dynamic shape configuration for different input resolutions model_inputs=[ @@ -84,69 +83,16 @@ # ============================================================================== onnx_config = dict( type="onnx", - export_params=True, # Include trained parameters in the model - keep_initializers_as_inputs=False, # Don't expose initializers as inputs - opset_version=16, # ONNX opset version (16 recommended for modern features) - do_constant_folding=True, # Optimize by folding constant expressions - save_file="end2end.onnx", # Output filename - input_names=["input"], # Name of input tensor - output_names=["output"], # Name of output tensor - # Dynamic axes for variable input dimensions + export_params=True, + keep_initializers_as_inputs=False, + opset_version=16, + do_constant_folding=True, + save_file="end2end.onnx", + input_names=["input"], + output_names=["output"], dynamic_axes={ - "input": {0: "batch_size", 2: "height", 3: "width"}, # Batch, H, W are dynamic - "output": {0: "batch_size"}, # Batch is dynamic + "input": {0: "batch_size", 2: "height", 3: "width"}, + "output": {0: "batch_size"}, }, input_shape=None, ) - -# ============================================================================== -# Usage Examples -# ============================================================================== -# -# Basic usage (export to both ONNX and TensorRT): -# python projects/CalibrationStatusClassification/deploy/main.py \ -# projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ -# projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ -# checkpoint.pth -# -# Export to ONNX only: -# Set export.mode = "onnx" in this config, then run the command above -# -# Convert existing ONNX to TensorRT: -# 1) Set export.mode = "trt" -# 2) Set runtime_io.onnx_file = "/path/to/existing/model.onnx" -# 3) Run the command above -# -# Enable evaluation in config: -# 1) Set evaluation.enabled = True -# 2) Set evaluation.num_samples = 100 # Adjust as needed -# 3) Optionally set evaluation.onnx_model and/or evaluation.tensorrt_model -# 4) Run the command above -# -# Evaluate specific models (without export): -# 1) Set export.mode = "none" -# 2) Set evaluation.enabled = True -# 3) Set evaluation.onnx_model = "/path/to/model.onnx" -# 4) Set evaluation.tensorrt_model = "/path/to/model.engine" -# 5) Run the command WITHOUT checkpoint: -# python projects/CalibrationStatusClassification/deploy/main.py \ -# projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ -# projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py -# -# Override config settings via command line: -# python projects/CalibrationStatusClassification/deploy/main.py \ -# \ -# --work-dir ./custom_output \ -# --device cuda:1 \ -# --info-pkl /path/to/custom/info.pkl \ -# --sample-idx 5 \ -# --evaluate \ -# --num-samples 50 \ -# --verbose -# -# Available precision policies: -# - auto: Let TensorRT decide (default, good balance) -# - fp16: Faster inference, ~2x speedup, small accuracy loss -# - fp32_tf32: Use Tensor Cores on Ampere+ GPUs for FP32 -# - explicit_int8: INT8 quantization (requires calibration dataset) -# - strongly_typed: Strict type enforcement, no automatic conversion From 10daf63f60a6887fd6b0bb58b4871b8ab3bf545a Mon Sep 17 00:00:00 2001 From: vividf Date: Tue, 7 Oct 2025 11:55:59 +0900 Subject: [PATCH 16/19] chore: refactor code Signed-off-by: vividf --- .../configs/deploy/resnet18_5ch.py | 2 +- .../deploy/__init__.py | 21 + .../deploy/config.py | 161 +++ .../deploy/evaluator.py | 435 +++++++ .../deploy/exporters.py | 680 ++++++++++ .../deploy/main.py | 1132 +---------------- tools/calibration_classification/README.md | 8 +- .../visualize_lidar_camera_projection.py | 11 +- 8 files changed, 1372 insertions(+), 1078 deletions(-) create mode 100644 projects/CalibrationStatusClassification/deploy/__init__.py create mode 100644 projects/CalibrationStatusClassification/deploy/config.py create mode 100644 projects/CalibrationStatusClassification/deploy/evaluator.py create mode 100644 projects/CalibrationStatusClassification/deploy/exporters.py diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py index 54496f6b..0c4673d8 100644 --- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py @@ -8,7 +8,7 @@ # Export Configuration # ============================================================================== export = dict( - mode="none", # Export mode: "onnx", "trt", "both", or "none" + mode="both", # Export mode: "onnx", "trt", "both", or "none" # - "onnx": Export to ONNX only # - "trt": Convert to TensorRT only (requires onnx_file in runtime_io) # - "both": Export to ONNX then convert to TensorRT diff --git a/projects/CalibrationStatusClassification/deploy/__init__.py b/projects/CalibrationStatusClassification/deploy/__init__.py new file mode 100644 index 00000000..38b7c326 --- /dev/null +++ b/projects/CalibrationStatusClassification/deploy/__init__.py @@ -0,0 +1,21 @@ +""" +Calibration Status Classification Model Deployment Package + +This package provides utilities for exporting, verifying, and evaluating +CalibrationStatusClassification models in ONNX and TensorRT formats. +""" + +from .config import DeploymentConfig, parse_args, setup_logging +from .evaluator import evaluate_exported_model, print_evaluation_results +from .exporters import export_to_onnx, export_to_tensorrt, run_verification + +__all__ = [ + "DeploymentConfig", + "parse_args", + "setup_logging", + "export_to_onnx", + "export_to_tensorrt", + "run_verification", + "evaluate_exported_model", + "print_evaluation_results", +] diff --git a/projects/CalibrationStatusClassification/deploy/config.py b/projects/CalibrationStatusClassification/deploy/config.py new file mode 100644 index 00000000..e0071066 --- /dev/null +++ b/projects/CalibrationStatusClassification/deploy/config.py @@ -0,0 +1,161 @@ +""" +Configuration classes and constants for deployment. +""" + +import argparse +import logging +import os +import os.path as osp +from typing import Dict + +from mmengine.config import Config + +# Constants +DEFAULT_VERIFICATION_TOLERANCE = 1e-3 +DEFAULT_WORKSPACE_SIZE = 1 << 30 # 1 GB +EXPECTED_CHANNELS = 5 # RGB + Depth + Intensity +LABELS = {"0": "miscalibrated", "1": "calibrated"} + +# Precision policy mapping +PRECISION_POLICIES = { + "auto": {}, # No special flags, TensorRT decides + "fp16": {"FP16": True}, + "fp32_tf32": {"TF32": True}, # TF32 for FP32 operations + "explicit_int8": {"INT8": True}, + "strongly_typed": {"STRONGLY_TYPED": True}, # Network creation flag +} + + +class DeploymentConfig: + """Configuration container for deployment settings.""" + + def __init__(self, deploy_cfg: Config): + self.deploy_cfg = deploy_cfg + self._validate_config() + + def _validate_config(self) -> None: + """Validate configuration structure and required fields.""" + if "export" not in self.deploy_cfg: + raise ValueError( + "Missing 'export' section in deploy config. " + "Please update your config to the new format with 'export', 'runtime_io' sections." + ) + + if "runtime_io" not in self.deploy_cfg: + raise ValueError("Missing 'runtime_io' section in deploy config.") + + # Validate export mode + valid_modes = ["onnx", "trt", "both", "none"] + mode = self.export_config.get("mode", "both") + if mode not in valid_modes: + raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}") + + # Validate precision policy if present + backend_cfg = self.deploy_cfg.get("backend_config", {}) + common_cfg = backend_cfg.get("common_config", {}) + precision_policy = common_cfg.get("precision_policy", "auto") + if precision_policy not in PRECISION_POLICIES: + raise ValueError( + f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}" + ) + + @property + def export_config(self) -> Dict: + """Get export configuration (mode, verify, device, work_dir).""" + return self.deploy_cfg.get("export", {}) + + @property + def runtime_io_config(self) -> Dict: + """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file).""" + return self.deploy_cfg.get("runtime_io", {}) + + @property + def evaluation_config(self) -> Dict: + """Get evaluation configuration (enabled, num_samples, verbose, model paths).""" + return self.deploy_cfg.get("evaluation", {}) + + @property + def should_export_onnx(self) -> bool: + """Check if ONNX export is requested.""" + mode = self.export_config.get("mode", "both") + return mode in ["onnx", "both"] + + @property + def should_export_tensorrt(self) -> bool: + """Check if TensorRT export is requested.""" + mode = self.export_config.get("mode", "both") + return mode in ["trt", "both"] + + @property + def should_verify(self) -> bool: + """Check if verification is requested.""" + return self.export_config.get("verify", False) + + @property + def device(self) -> str: + """Get device for export.""" + return self.export_config.get("device", "cuda:0") + + @property + def work_dir(self) -> str: + """Get working directory.""" + return self.export_config.get("work_dir", os.getcwd()) + + @property + def onnx_settings(self) -> Dict: + """Get ONNX export settings.""" + onnx_config = self.deploy_cfg.get("onnx_config", {}) + return { + "opset_version": onnx_config.get("opset_version", 16), + "do_constant_folding": onnx_config.get("do_constant_folding", True), + "input_names": onnx_config.get("input_names", ["input"]), + "output_names": onnx_config.get("output_names", ["output"]), + "dynamic_axes": onnx_config.get("dynamic_axes"), + "export_params": onnx_config.get("export_params", True), + "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False), + "save_file": onnx_config.get("save_file", "calibration_classifier.onnx"), + } + + @property + def tensorrt_settings(self) -> Dict: + """Get TensorRT export settings with precision policy support.""" + backend_config = self.deploy_cfg.get("backend_config", {}) + common_config = backend_config.get("common_config", {}) + + # Get precision policy + precision_policy = common_config.get("precision_policy", "auto") + policy_flags = PRECISION_POLICIES.get(precision_policy, {}) + + return { + "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE), + "precision_policy": precision_policy, + "policy_flags": policy_flags, + "model_inputs": backend_config.get("model_inputs", []), + } + + +def parse_args() -> argparse.Namespace: + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Export CalibrationStatusClassification model to ONNX/TensorRT.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("deploy_cfg", help="Deploy config path") + parser.add_argument("model_cfg", help="Model config path") + parser.add_argument( + "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')" + ) + + # Optional overrides + parser.add_argument("--work-dir", help="Override output directory from config") + parser.add_argument("--device", help="Override device from config") + parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level") + parser.add_argument("--info-pkl", help="Override info.pkl path from config") + + return parser.parse_args() + + +def setup_logging(level: str) -> logging.Logger: + """Setup logging configuration.""" + logging.basicConfig(level=getattr(logging, level), format="%(levelname)s:%(name)s:%(message)s") + return logging.getLogger("mmdeploy") diff --git a/projects/CalibrationStatusClassification/deploy/evaluator.py b/projects/CalibrationStatusClassification/deploy/evaluator.py new file mode 100644 index 00000000..efc7dd45 --- /dev/null +++ b/projects/CalibrationStatusClassification/deploy/evaluator.py @@ -0,0 +1,435 @@ +""" +Model evaluation utilities for exported models. +""" + +import gc +import logging +import os.path as osp +import time +from typing import Any, Tuple + +import numpy as np +import onnxruntime as ort +import pycuda.autoinit +import pycuda.driver as cuda +import tensorrt as trt +import torch +import torch.nn.functional as F +from config import LABELS +from exporters import ( + _clear_gpu_memory, + _create_transform_with_config, + _load_info_pkl_file, + _numpy_to_tensor, + _run_tensorrt_inference, + load_info_pkl_data, +) +from mmengine.config import Config + + +def _load_samples_from_info_pkl(info_pkl_path: str, num_samples: int, logger: logging.Logger) -> list: + """Load and validate samples from info.pkl file.""" + try: + samples_list = _load_info_pkl_file(info_pkl_path) + logger.info(f"Loaded {len(samples_list)} samples from info.pkl") + + # Limit number of samples + num_samples = min(num_samples, len(samples_list)) + logger.info(f"Evaluating {num_samples} samples") + + return samples_list[:num_samples] + + except Exception as e: + logger.error(f"Failed to load info.pkl: {e}") + raise + + +def _create_transform(model_cfg: Config): + """Create and configure CalibrationClassificationTransform for evaluation.""" + return _create_transform_with_config( + model_cfg=model_cfg, + miscalibration_probability=0.0, + projection_vis_dir=None, + results_vis_dir=None, + binary_save_dir=None, + ) + + +def _create_onnx_inference_func(model_path: str, device: str, logger: logging.Logger): + """Create ONNX inference function with configured providers.""" + logger.info(f"Using ONNX model: {model_path}") + logger.info("Creating ONNX InferenceSession...") + + # Configure execution providers + if device.startswith("cuda"): + providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] + logger.info("Attempting to use CUDA acceleration for ONNX inference...") + try: + available_providers = ort.get_available_providers() + if "CUDAExecutionProvider" not in available_providers: + logger.warning( + f"CUDAExecutionProvider not available. Available: {available_providers}. " + "Install onnxruntime-gpu for CUDA acceleration" + ) + providers = ["CPUExecutionProvider"] + else: + logger.info("CUDAExecutionProvider is available") + except Exception as e: + logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU") + providers = ["CPUExecutionProvider"] + else: + providers = ["CPUExecutionProvider"] + logger.info("Using CPU for ONNX inference") + + # Create session + ort_session = ort.InferenceSession(model_path, providers=providers) + logger.info(f"ONNX session using providers: {ort_session.get_providers()}") + + # Return inference function + def inference_func(input_tensor): + input_name = ort_session.get_inputs()[0].name + onnx_input = {input_name: input_tensor.cpu().numpy()} + start_time = time.perf_counter() + onnx_output = ort_session.run(None, onnx_input)[0] + latency = (time.perf_counter() - start_time) * 1000 + return onnx_output, latency + + return inference_func + + +def _create_tensorrt_inference_func(model_path: str, logger: logging.Logger): + """Create TensorRT inference function with loaded engine.""" + logger.info(f"Using TensorRT model: {model_path}") + + # Load TensorRT engine + trt_logger = trt.Logger(trt.Logger.WARNING) + trt.init_libnvinfer_plugins(trt_logger, "") + runtime = trt.Runtime(trt_logger) + + with open(model_path, "rb") as f: + engine = runtime.deserialize_cuda_engine(f.read()) + + if engine is None: + raise RuntimeError("Failed to deserialize TensorRT engine") + + # Return inference function + def inference_func(input_tensor): + return _run_tensorrt_inference(engine, input_tensor.cpu(), logger) + + return inference_func + + +def _process_single_sample( + sample_idx: int, + info_pkl_path: str, + transform, + inference_func, + device: str, + logger: logging.Logger, + verbose: bool = False, +) -> Tuple[list, list, list, list]: + """Process a single sample with both calibrated and miscalibrated versions. + + Returns: + Tuple of (predictions, ground_truth, probabilities, latencies) for this sample + """ + predictions = [] + ground_truth = [] + probabilities = [] + latencies = [] + + # Test both calibrated and miscalibrated versions + for miscalibration_prob, expected_label in [(0.0, 1), (1.0, 0)]: + # Update transform's miscalibration probability + transform.miscalibration_probability = miscalibration_prob + + # Load and preprocess sample + sample_data = load_info_pkl_data(info_pkl_path, sample_idx) + sample_data["sample_idx"] = sample_idx + + results = transform.transform(sample_data) + input_data_processed = results["fused_img"] + gt_label = results["gt_label"] + + # Convert to tensor + input_tensor = _numpy_to_tensor(input_data_processed, device) + + # Run inference + output_np, latency = inference_func(input_tensor) + + if output_np is None: + logger.error(f"Failed to get output for sample {sample_idx}") + continue + + # Convert logits to probabilities + logits = torch.from_numpy(output_np) + probs = F.softmax(logits, dim=-1) + predicted_class = torch.argmax(probs, dim=-1).item() + confidence = probs.max().item() + + # Store results + predictions.append(predicted_class) + ground_truth.append(gt_label) + probabilities.append(probs.cpu().numpy()) + latencies.append(latency) + + # Print sample results only in verbose mode + if verbose: + logger.info( + f"Sample {sample_idx + 1} (GT={gt_label}): " + f"Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms" + ) + + return predictions, ground_truth, probabilities, latencies + + +def evaluate_exported_model( + model_path: str, + model_type: str, + model_cfg: Config, + info_pkl_path: str, + logger: logging.Logger, + device: str = "cpu", + num_samples: int = 10, + verbose: bool = False, +) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """ + Evaluate exported model (ONNX or TensorRT) using info.pkl data. + + Args: + model_path: Path to model file (.onnx or .engine) + model_type: Type of model ("onnx" or "tensorrt") + model_cfg: Model configuration + info_pkl_path: Path to info.pkl file + logger: Logger instance + device: Device for preprocessing + num_samples: Number of samples to evaluate + verbose: Enable verbose logging + + Returns: + Tuple of (predictions, ground_truth, probabilities, latencies) + """ + # Load samples + samples_list = _load_samples_from_info_pkl(info_pkl_path, num_samples, logger) + + # Create transform + transform = _create_transform(model_cfg) + + # Create inference function based on model type + if model_type == "onnx": + inference_func = _create_onnx_inference_func(model_path, device, logger) + elif model_type == "tensorrt": + inference_func = _create_tensorrt_inference_func(model_path, logger) + else: + raise ValueError(f"Unsupported model type: {model_type}") + + # Process all samples + all_predictions = [] + all_ground_truth = [] + all_probabilities = [] + all_latencies = [] + + for sample_idx in range(len(samples_list)): + if sample_idx % 100 == 0: + logger.info(f"Processing sample {sample_idx + 1}/{len(samples_list)}") + + try: + predictions, ground_truth, probabilities, latencies = _process_single_sample( + sample_idx=sample_idx, + info_pkl_path=info_pkl_path, + transform=transform, + inference_func=inference_func, + device=device, + logger=logger, + verbose=verbose, + ) + + all_predictions.extend(predictions) + all_ground_truth.extend(ground_truth) + all_probabilities.extend(probabilities) + all_latencies.extend(latencies) + + # Clear GPU memory periodically + if model_type == "tensorrt" and sample_idx % 10 == 0: + _clear_gpu_memory() + + except Exception as e: + logger.error(f"Error processing sample {sample_idx}: {e}") + continue + + return ( + np.array(all_predictions), + np.array(all_ground_truth), + np.array(all_probabilities), + np.array(all_latencies), + ) + + +def print_evaluation_results( + all_predictions: np.ndarray, + all_ground_truth: np.ndarray, + all_probabilities: np.ndarray, + all_latencies: np.ndarray, + model_type: str, + logger: logging.Logger, +) -> None: + """Print comprehensive evaluation results with metrics and statistics.""" + + if len(all_predictions) == 0: + logger.error("No samples were processed successfully.") + return + + correct_predictions = (all_predictions == all_ground_truth).sum() + total_samples = len(all_predictions) + accuracy = correct_predictions / total_samples + avg_latency = np.mean(all_latencies) + + # Print header + logger.info(f"\n{'='*60}") + logger.info(f"{model_type.upper()} Model Evaluation Results") + logger.info(f"{'='*60}") + logger.info(f"Total samples: {total_samples}") + logger.info(f"Correct predictions: {correct_predictions}") + logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") + logger.info(f"Average latency: {avg_latency:.2f} ms") + + # Calculate per-class accuracy + unique_classes = np.unique(all_ground_truth) + logger.info(f"\nPer-class accuracy:") + for cls in unique_classes: + cls_mask = all_ground_truth == cls + cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum() + cls_total = cls_mask.sum() + cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0 + logger.info( + f" Class {cls} ({LABELS[str(cls)]}): " + f"{cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)" + ) + + # Calculate average confidence + avg_confidence = np.mean([prob.max() for prob in all_probabilities]) + logger.info(f"\nAverage confidence: {avg_confidence:.4f}") + + # Calculate latency statistics + min_latency = np.min(all_latencies) + max_latency = np.max(all_latencies) + std_latency = np.std(all_latencies) + p50_latency = np.percentile(all_latencies, 50) + p95_latency = np.percentile(all_latencies, 95) + p99_latency = np.percentile(all_latencies, 99) + + logger.info(f"\nLatency Statistics:") + logger.info(f" Average: {avg_latency:.2f} ms") + logger.info(f" Median (P50): {p50_latency:.2f} ms") + logger.info(f" P95: {p95_latency:.2f} ms") + logger.info(f" P99: {p99_latency:.2f} ms") + logger.info(f" Min: {min_latency:.2f} ms") + logger.info(f" Max: {max_latency:.2f} ms") + logger.info(f" Std: {std_latency:.2f} ms") + + # Show confusion matrix + logger.info(f"\nConfusion Matrix:") + logger.info(f"{'':>10} Predicted") + logger.info(f"{'Actual':>10} {'0 (misc)':>10} {'1 (calib)':>10}") + logger.info(f"{'-'*32}") + for true_cls in unique_classes: + row_label = f"{true_cls} ({LABELS[str(true_cls)][:4]})" + row = [f"{row_label:>10}"] + for pred_cls in unique_classes: + count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum() + row.append(f"{count:>10}") + logger.info(" ".join(row)) + + logger.info(f"\n{'='*60}") + logger.info(f"Evaluation Summary:") + logger.info(f" Model Type: {model_type.upper()}") + logger.info(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") + logger.info(f" Avg Latency: {avg_latency:.2f} ms") + logger.info(f" Throughput: {1000/avg_latency:.2f} samples/sec") + logger.info(f"{'='*60}\n") + + +def get_models_to_evaluate(eval_cfg: dict, logger: logging.Logger) -> list: + """Determine which models to evaluate based on config. + + Returns: + List of (model_type, model_path) tuples + """ + models_to_evaluate = [] + + # Get model paths from config + eval_onnx_path = eval_cfg.get("onnx_model") + eval_trt_path = eval_cfg.get("tensorrt_model") + + # If both paths are None, skip evaluation + if eval_onnx_path is None and eval_trt_path is None: + logger.warning("Both onnx_model and tensorrt_model are None. Skipping evaluation.") + logger.warning("To enable evaluation, specify at least one model path in the config.") + return models_to_evaluate + + # Only evaluate models that are explicitly specified (not None) + if eval_onnx_path is not None: + if osp.exists(eval_onnx_path): + models_to_evaluate.append(("onnx", eval_onnx_path)) + logger.info(f"Using config-specified ONNX model: {eval_onnx_path}") + else: + logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}") + + if eval_trt_path is not None: + if osp.exists(eval_trt_path): + models_to_evaluate.append(("tensorrt", eval_trt_path)) + logger.info(f"Using config-specified TensorRT model: {eval_trt_path}") + else: + logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}") + + return models_to_evaluate + + +def run_full_evaluation( + models_to_evaluate: list, + model_cfg: Config, + info_pkl: str, + device: Any, + num_samples: int, + verbose_mode: bool, + logger: logging.Logger, +) -> None: + """Run evaluation for all specified models.""" + if not models_to_evaluate: + logger.error( + "No models available for evaluation. Please export models first or specify model paths in config." + ) + return + + # Evaluate each model + for model_type, model_path in models_to_evaluate: + logger.info(f"\nEvaluating {model_type.upper()} model...") + logger.info(f"Model path: {model_path}") + logger.info(f"Number of samples: {num_samples}") + logger.info(f"Verbose mode: {verbose_mode}") + + try: + # Run evaluation + predictions, ground_truth, probabilities, latencies = evaluate_exported_model( + model_path=model_path, + model_type=model_type, + model_cfg=model_cfg, + info_pkl_path=info_pkl, + logger=logger, + device=device.type if isinstance(device, torch.device) else device, + num_samples=num_samples, + verbose=verbose_mode, + ) + + # Print results + print_evaluation_results(predictions, ground_truth, probabilities, latencies, model_type, logger) + + # Cleanup + _clear_gpu_memory() + + except Exception as e: + logger.error(f"Evaluation failed for {model_type.upper()} model: {e}") + import traceback + + logger.error(traceback.format_exc()) + continue diff --git a/projects/CalibrationStatusClassification/deploy/exporters.py b/projects/CalibrationStatusClassification/deploy/exporters.py new file mode 100644 index 00000000..6d5655ff --- /dev/null +++ b/projects/CalibrationStatusClassification/deploy/exporters.py @@ -0,0 +1,680 @@ +""" +Model export and inference verification utilities. +""" + +import gc +import logging +import os +import os.path as osp +import pickle +import time +from typing import Any, Dict, Optional, Tuple + +import numpy as np +import onnx +import onnxruntime as ort +import onnxsim +import pycuda.autoinit +import pycuda.driver as cuda +import tensorrt as trt +import torch +from config import DEFAULT_VERIFICATION_TOLERANCE, LABELS, DeploymentConfig +from mmengine.config import Config + +from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import ( + CalibrationClassificationTransform, +) + +# ============================================================================ +# Data Loading Utilities +# ============================================================================ + + +def _load_info_pkl_file(info_pkl_path: str) -> list: + """ + Load and parse info.pkl file. + + Args: + info_pkl_path: Path to the info.pkl file + + Returns: + List of samples from data_list + + Raises: + FileNotFoundError: If info.pkl file doesn't exist + ValueError: If data format is unexpected + """ + if not os.path.exists(info_pkl_path): + raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}") + + try: + with open(info_pkl_path, "rb") as f: + info_data = pickle.load(f) + except Exception as e: + raise ValueError(f"Failed to load info.pkl file: {e}") + + # Extract samples from info.pkl + if isinstance(info_data, dict): + if "data_list" in info_data: + samples_list = info_data["data_list"] + else: + raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}") + else: + raise ValueError(f"Expected dict format, got {type(info_data)}") + + if not samples_list: + raise ValueError("No samples found in info.pkl") + + return samples_list + + +def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]: + """ + Load a single sample from info.pkl file. + + Args: + info_pkl_path: Path to the info.pkl file + sample_idx: Index of the sample to load (default: 0) + + Returns: + Sample dictionary with the required structure for CalibrationClassificationTransform + + Raises: + FileNotFoundError: If info.pkl file doesn't exist + ValueError: If data format is unexpected or sample index is invalid + """ + samples_list = _load_info_pkl_file(info_pkl_path) + + if sample_idx >= len(samples_list): + raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})") + + sample = samples_list[sample_idx] + + # Validate sample structure + required_keys = ["image", "lidar_points"] + if not all(key in sample for key in required_keys): + raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}") + + return sample + + +def _create_transform_with_config( + model_cfg: Config, + miscalibration_probability: float = 0.0, + projection_vis_dir: Optional[str] = None, + results_vis_dir: Optional[str] = None, + binary_save_dir: Optional[str] = None, +) -> CalibrationClassificationTransform: + """ + Create CalibrationClassificationTransform with model configuration. + + Args: + model_cfg: Model configuration + miscalibration_probability: Probability of miscalibration + projection_vis_dir: Optional projection visualization directory + results_vis_dir: Optional results visualization directory + binary_save_dir: Optional binary save directory + + Returns: + Configured CalibrationClassificationTransform + """ + data_root = model_cfg.get("data_root") + if data_root is None: + raise ValueError("data_root not found in model configuration") + + transform_config = model_cfg.get("transform_config") + if transform_config is None: + raise ValueError("transform_config not found in model configuration") + + return CalibrationClassificationTransform( + transform_config=transform_config, + mode="test", + max_depth=model_cfg.get("max_depth", 128.0), + dilation_size=model_cfg.get("dilation_size", 1), + undistort=True, + miscalibration_probability=miscalibration_probability, + enable_augmentation=False, + data_root=data_root, + projection_vis_dir=projection_vis_dir, + results_vis_dir=results_vis_dir, + binary_save_dir=binary_save_dir, + ) + + +def _numpy_to_tensor(numpy_array: np.ndarray, device: str = "cpu") -> torch.Tensor: + """ + Convert numpy array (H, W, C) to tensor (1, C, H, W). + + Args: + numpy_array: Input numpy array with shape (H, W, C) + device: Device to load tensor on + + Returns: + Tensor with shape (1, C, H, W) + """ + tensor = torch.from_numpy(numpy_array).permute(2, 0, 1).float() # (C, H, W) + return tensor.unsqueeze(0).to(device) # (1, C, H, W) + + +def _clear_gpu_memory() -> None: + """Clear GPU cache and run garbage collection.""" + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + +def load_sample_data_from_info_pkl( + info_pkl_path: str, + model_cfg: Config, + miscalibration_probability: float, + sample_idx: int = 0, + device: str = "cpu", +) -> torch.Tensor: + """ + Load and preprocess sample data from info.pkl using CalibrationClassificationTransform. + + Args: + info_pkl_path: Path to the info.pkl file + model_cfg: Model configuration containing data_root setting + miscalibration_probability: Probability of loading a miscalibrated sample + sample_idx: Index of the sample to load (default: 0) + device: Device to load tensor on + + Returns: + Preprocessed tensor ready for model inference + """ + # Load sample data from info.pkl + sample_data = load_info_pkl_data(info_pkl_path, sample_idx) + + # Create transform + transform = _create_transform_with_config( + model_cfg=model_cfg, + miscalibration_probability=miscalibration_probability, + projection_vis_dir=model_cfg.get("test_projection_vis_dir", None), + results_vis_dir=model_cfg.get("test_results_vis_dir", None), + binary_save_dir=model_cfg.get("binary_save_dir", None), + ) + + # Apply transform + results = transform.transform(sample_data) + input_data_processed = results["fused_img"] # (H, W, 5) + + # Convert to tensor + return _numpy_to_tensor(input_data_processed, device) + + +# ============================================================================ +# Model Export Functions +# ============================================================================ + + +def export_to_onnx( + model: torch.nn.Module, + input_tensor: torch.Tensor, + output_path: str, + config: DeploymentConfig, + logger: logging.Logger, +) -> None: + """Export model to ONNX format.""" + settings = config.onnx_settings + model.eval() + + logger.info("Exporting model to ONNX format...") + logger.info(f"Input shape: {input_tensor.shape}") + logger.info(f"Output path: {output_path}") + logger.info(f"ONNX opset version: {settings['opset_version']}") + + with torch.no_grad(): + torch.onnx.export( + model, + input_tensor, + output_path, + export_params=settings["export_params"], + keep_initializers_as_inputs=settings["keep_initializers_as_inputs"], + opset_version=settings["opset_version"], + do_constant_folding=settings["do_constant_folding"], + input_names=settings["input_names"], + output_names=settings["output_names"], + dynamic_axes=settings["dynamic_axes"], + verbose=False, + ) + + logger.info(f"ONNX export completed: {output_path}") + + # Optional model simplification + _optimize_onnx_model(output_path, logger) + + +def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None: + """Optimize ONNX model using onnxsim.""" + logger.info("Simplifying ONNX model...") + model_simplified, success = onnxsim.simplify(onnx_path) + if success: + onnx.save(model_simplified, onnx_path) + logger.info(f"ONNX model simplified successfully. Saved to {onnx_path}") + else: + logger.warning("ONNX model simplification failed") + + +def export_to_tensorrt( + onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger +) -> bool: + """Export ONNX model to a TensorRT engine with precision policy support.""" + settings = config.tensorrt_settings + precision_policy = settings["precision_policy"] + policy_flags = settings["policy_flags"] + + logger.info(f"Building TensorRT engine with precision policy: {precision_policy}") + + # Initialize TensorRT + trt_logger = trt.Logger(trt.Logger.WARNING) + trt.init_libnvinfer_plugins(trt_logger, "") + + builder = trt.Builder(trt_logger) + builder_config = builder.create_builder_config() + builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"]) + + # Create network with appropriate flags + flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + # Handle strongly typed flag (network creation flag, not builder flag) + if policy_flags.get("STRONGLY_TYPED"): + flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) + logger.info("Using strongly typed TensorRT network creation") + + network = builder.create_network(flags) + + # Apply precision flags to builder config + for flag_name, enabled in policy_flags.items(): + if flag_name == "STRONGLY_TYPED": + continue # Already handled as network creation flag above + if enabled and hasattr(trt.BuilderFlag, flag_name): + builder_config.set_flag(getattr(trt.BuilderFlag, flag_name)) + logger.info(f"BuilderFlag.{flag_name} enabled") + + # Setup optimization profile + profile = builder.create_optimization_profile() + _configure_input_shapes(profile, input_tensor, settings["model_inputs"], logger) + builder_config.add_optimization_profile(profile) + + # Parse ONNX model + parser = trt.OnnxParser(network, trt_logger) + with open(onnx_path, "rb") as f: + if not parser.parse(f.read()): + _log_parser_errors(parser, logger) + return False + logger.info("Successfully parsed the ONNX file") + + # Build engine + logger.info("Building TensorRT engine...") + serialized_engine = builder.build_serialized_network(network, builder_config) + + if serialized_engine is None: + logger.error("Failed to build TensorRT engine") + return False + + # Save engine + with open(output_path, "wb") as f: + f.write(serialized_engine) + + logger.info(f"TensorRT engine saved to {output_path}") + logger.info(f"Engine max workspace size: {settings['max_workspace_size'] / (1024**3):.2f} GB") + + return True + + +def _configure_input_shapes(profile, input_tensor: torch.Tensor, model_inputs: list, logger: logging.Logger) -> None: + """Configure input shapes for TensorRT optimization profile.""" + if model_inputs: + input_shapes = model_inputs[0].get("input_shapes", {}) + for input_name, shapes in input_shapes.items(): + min_shape = shapes.get("min_shape", list(input_tensor.shape)) + opt_shape = shapes.get("opt_shape", list(input_tensor.shape)) + max_shape = shapes.get("max_shape", list(input_tensor.shape)) + + logger.info(f"Setting input shapes - min: {min_shape}, opt: {opt_shape}, max: {max_shape}") + profile.set_shape(input_name, min_shape, opt_shape, max_shape) + else: + # Default shapes based on input tensor + input_shape = list(input_tensor.shape) + logger.info(f"Using default input shape: {input_shape}") + profile.set_shape("input", input_shape, input_shape, input_shape) + + +def _log_parser_errors(parser, logger: logging.Logger) -> None: + """Log TensorRT parser errors.""" + logger.error("Failed to parse ONNX model") + for error in range(parser.num_errors): + logger.error(f"Parser error: {parser.get_error(error)}") + + +# ============================================================================ +# Inference and Verification Functions +# ============================================================================ + + +def run_pytorch_inference( + model: torch.nn.Module, input_tensor: torch.Tensor, logger: logging.Logger +) -> Tuple[torch.Tensor, float]: + """Run PyTorch inference on CPU for verification and return output with latency.""" + # Move to CPU to avoid GPU memory issues + model_cpu = model.cpu() + input_cpu = input_tensor.cpu() + + model_cpu.eval() + with torch.no_grad(): + # Measure inference time + start_time = time.perf_counter() + output = model_cpu(input_cpu) + end_time = time.perf_counter() + + latency = (end_time - start_time) * 1000 # Convert to milliseconds + + # Handle different output formats + if hasattr(output, "output"): + output = output.output + elif isinstance(output, dict) and "output" in output: + output = output["output"] + + if not isinstance(output, torch.Tensor): + raise ValueError(f"Unexpected PyTorch output type: {type(output)}") + + logger.info(f"PyTorch inference latency: {latency:.2f} ms") + logger.info(f"Output verification:") + logger.info(f" Output: {output.cpu().numpy()}") + return output, latency + + +def run_onnx_inference( + onnx_path: str, + input_tensor: torch.Tensor, + ref_output: torch.Tensor, + logger: logging.Logger, +) -> bool: + """Verify ONNX model output against PyTorch model.""" + # Clear GPU cache + _clear_gpu_memory() + + # ONNX inference with timing + providers = ["CPUExecutionProvider"] + ort_session = ort.InferenceSession(onnx_path, providers=providers) + onnx_input = {ort_session.get_inputs()[0].name: input_tensor.cpu().numpy()} + + start_time = time.perf_counter() + onnx_output = ort_session.run(None, onnx_input)[0] + end_time = time.perf_counter() + onnx_latency = (end_time - start_time) * 1000 + + logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms") + + # Ensure onnx_output is numpy array before comparison + if not isinstance(onnx_output, np.ndarray): + logger.error(f"Unexpected ONNX output type: {type(onnx_output)}") + return False + + # Compare outputs + return _compare_outputs(ref_output.cpu().numpy(), onnx_output, "ONNX", logger) + + +def run_tensorrt_inference( + tensorrt_path: str, + input_tensor: torch.Tensor, + ref_output: torch.Tensor, + logger: logging.Logger, +) -> bool: + """Verify TensorRT model output against PyTorch model.""" + # Clear GPU cache + _clear_gpu_memory() + + # Load TensorRT engine + trt_logger = trt.Logger(trt.Logger.WARNING) + trt.init_libnvinfer_plugins(trt_logger, "") + runtime = trt.Runtime(trt_logger) + + with open(tensorrt_path, "rb") as f: + engine = runtime.deserialize_cuda_engine(f.read()) + + if engine is None: + logger.error("Failed to deserialize TensorRT engine") + return False + + # Run TensorRT inference with timing + trt_output, latency = _run_tensorrt_inference(engine, input_tensor.cpu(), logger) + logger.info(f"TensorRT inference latency: {latency:.2f} ms") + + # Compare outputs + return _compare_outputs(ref_output.cpu().numpy(), trt_output, "TensorRT", logger) + + +def _run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]: + """Run TensorRT inference and return output with timing.""" + context = engine.create_execution_context() + stream = cuda.Stream() + start = cuda.Event() + end = cuda.Event() + + # Get tensor names and shapes + input_name, output_name = None, None + for i in range(engine.num_io_tensors): + tensor_name = engine.get_tensor_name(i) + if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT: + input_name = tensor_name + elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT: + output_name = tensor_name + + if input_name is None or output_name is None: + raise RuntimeError("Could not find input/output tensor names") + + # Prepare arrays + input_np = input_tensor.numpy().astype(np.float32) + if not input_np.flags["C_CONTIGUOUS"]: + input_np = np.ascontiguousarray(input_np) + + context.set_input_shape(input_name, input_np.shape) + output_shape = context.get_tensor_shape(output_name) + output_np = np.empty(output_shape, dtype=np.float32) + if not output_np.flags["C_CONTIGUOUS"]: + output_np = np.ascontiguousarray(output_np) + + # Allocate GPU memory + d_input = cuda.mem_alloc(input_np.nbytes) + d_output = cuda.mem_alloc(output_np.nbytes) + + try: + # Set tensor addresses + context.set_tensor_address(input_name, int(d_input)) + context.set_tensor_address(output_name, int(d_output)) + + # Run inference with timing + cuda.memcpy_htod_async(d_input, input_np, stream) + start.record(stream) + context.execute_async_v3(stream_handle=stream.handle) + end.record(stream) + cuda.memcpy_dtoh_async(output_np, d_output, stream) + stream.synchronize() + + latency = end.time_since(start) + return output_np, latency + + finally: + # Cleanup + try: + d_input.free() + d_output.free() + except: + pass + + +def _compare_outputs( + pytorch_output: np.ndarray, backend_output: np.ndarray, backend_name: str, logger: logging.Logger +) -> bool: + """Compare outputs between PyTorch and backend.""" + if not isinstance(backend_output, np.ndarray): + logger.error(f"Unexpected {backend_name} output type: {type(backend_output)}") + return False + + max_diff = np.abs(pytorch_output - backend_output).max() + mean_diff = np.abs(pytorch_output - backend_output).mean() + + logger.info(f"Output verification:") + logger.info(f" {backend_name} output: {backend_output}") + logger.info(f" Max difference with PyTorch: {max_diff:.6f}") + logger.info(f" Mean difference with PyTorch: {mean_diff:.6f}") + + success = max_diff < DEFAULT_VERIFICATION_TOLERANCE + if not success: + logger.warning(f"Large difference detected: {max_diff:.6f}") + + return success + + +def run_verification( + model: torch.nn.Module, + onnx_path: str, + trt_path: Optional[str], + input_tensors: Dict[str, torch.Tensor], + logger: logging.Logger, +) -> None: + """Run model verification for available backends.""" + _clear_gpu_memory() + + for key, input_tensor in input_tensors.items(): + logger.info("=" * 50) + logger.info(f"Verifying {LABELS[key]} sample...") + logger.info("-" * 50) + logger.info("Verifying PyTorch model...") + pytorch_output, pytorch_latency = run_pytorch_inference(model, input_tensor, logger) + logger.info( + f"PyTorch output for {LABELS[key]}: [SCORE_MISCALIBRATED, SCORE_CALIBRATED] = {pytorch_output.cpu().numpy()}" + ) + score_calibrated = pytorch_output.cpu().numpy()[0, 1] - pytorch_output.cpu().numpy()[0, 0] + if key == "0" and score_calibrated < 0: + logger.info(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") + elif key == "0" and score_calibrated > 0: + logger.warning(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") + elif key == "1" and score_calibrated > 0: + logger.info(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") + elif key == "1" and score_calibrated < 0: + logger.warning(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") + + if onnx_path and osp.exists(onnx_path): + logger.info("-" * 50) + logger.info("Verifying ONNX model...") + if run_onnx_inference(onnx_path, input_tensor, pytorch_output, logger): + logger.info("ONNX model verification passed!") + else: + logger.error("ONNX model verification failed!") + + if trt_path and osp.exists(trt_path): + logger.info("-" * 50) + logger.info("Verifying TensorRT model...") + if run_tensorrt_inference(trt_path, input_tensor, pytorch_output, logger): + logger.info("TensorRT model verification passed!") + else: + logger.error("TensorRT model verification failed!") + logger.info("=" * 50) + + +# ============================================================================ +# Export Pipeline Helper Functions +# ============================================================================ + + +def validate_and_prepare_paths( + config: DeploymentConfig, work_dir: str, existing_onnx: Optional[str], logger: logging.Logger +) -> Tuple[Optional[str], Optional[str]]: + """Determine and validate export paths for ONNX and TensorRT models. + + Returns: + Tuple of (onnx_path, trt_path) + """ + onnx_path = None + trt_path = None + + if config.should_export_onnx: + onnx_settings = config.onnx_settings + onnx_path = osp.join(work_dir, onnx_settings["save_file"]) + + if config.should_export_tensorrt: + # Use existing ONNX if provided, otherwise use the one we'll export + if existing_onnx and not config.should_export_onnx: + onnx_path = existing_onnx + if not osp.exists(onnx_path): + logger.error(f"Provided ONNX file does not exist: {onnx_path}") + return None, None + logger.info(f"Using existing ONNX file: {onnx_path}") + elif not onnx_path: + # Need ONNX for TensorRT but neither export nor existing file specified + logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.") + return None, None + + # Set TensorRT output path + onnx_settings = config.onnx_settings + trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") + trt_path = osp.join(work_dir, trt_file) + + return onnx_path, trt_path + + +def export_models( + model: torch.nn.Module, + config: DeploymentConfig, + onnx_path: Optional[str], + trt_path: Optional[str], + input_tensor_calibrated: torch.Tensor, + device: torch.device, + logger: logging.Logger, +) -> Tuple[bool, torch.device]: + """Export models to ONNX and/or TensorRT formats. + + Returns: + Tuple of (success, updated_device) + """ + # Export ONNX + if config.should_export_onnx and onnx_path: + export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger) + + # Export TensorRT + if config.should_export_tensorrt and trt_path and onnx_path: + logger.info("Converting ONNX to TensorRT...") + + # Ensure CUDA device for TensorRT + if device.type != "cuda": + logger.warning("TensorRT requires CUDA device, switching to cuda") + device = torch.device("cuda") + + success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger) + if success: + logger.info(f"TensorRT conversion successful: {trt_path}") + else: + logger.error("TensorRT conversion failed") + return False, device + + return True, device + + +def run_model_verification( + model: torch.nn.Module, + config: DeploymentConfig, + onnx_path: Optional[str], + trt_path: Optional[str], + input_tensor_calibrated: torch.Tensor, + input_tensor_miscalibrated: torch.Tensor, + existing_onnx: Optional[str], + logger: logging.Logger, +) -> None: + """Run model verification for exported formats.""" + if not config.should_verify: + return + + logger.info( + "Running verification for miscalibrated and calibrated samples with " + "an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..." + ) + input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated} + + # Only verify formats that were exported or provided + onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None + trt_path_for_verify = trt_path if config.should_export_tensorrt else None + + run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger) diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index 5efcffba..918bafe7 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -13,940 +13,21 @@ - Confusion matrix and per-class accuracy analysis """ -import argparse -import gc -import logging -import os import os.path as osp -import pickle -import time -from typing import Any, Dict, Optional, Tuple import mmengine -import numpy as np -import onnx -import onnxruntime as ort -import onnxsim -import pycuda.autoinit -import pycuda.driver as cuda -import tensorrt as trt import torch -import torch.nn.functional as F +from config import DeploymentConfig, parse_args, setup_logging +from evaluator import get_models_to_evaluate, run_full_evaluation +from exporters import ( + export_models, + load_sample_data_from_info_pkl, + run_model_verification, + validate_and_prepare_paths, +) from mmengine.config import Config from mmpretrain.apis import get_model -from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import ( - CalibrationClassificationTransform, -) - -# Constants -DEFAULT_VERIFICATION_TOLERANCE = 1e-3 -DEFAULT_WORKSPACE_SIZE = 1 << 30 # 1 GB -EXPECTED_CHANNELS = 5 # RGB + Depth + Intensity -LABELS = {"0": "miscalibrated", "1": "calibrated"} - -# Precision policy mapping -PRECISION_POLICIES = { - "auto": {}, # No special flags, TensorRT decides - "fp16": {"FP16": True}, - "fp32_tf32": {"TF32": True}, # TF32 for FP32 operations - "explicit_int8": {"INT8": True}, - "strongly_typed": {"STRONGLY_TYPED": True}, # Network creation flag -} - - -def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]: - """ - Load a single sample from info.pkl file. - - Args: - info_pkl_path: Path to the info.pkl file - sample_idx: Index of the sample to load (default: 0) - - Returns: - Sample dictionary with the required structure for CalibrationClassificationTransform - - Raises: - FileNotFoundError: If info.pkl file doesn't exist - ValueError: If data format is unexpected or sample index is invalid - """ - if not os.path.exists(info_pkl_path): - raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}") - - try: - with open(info_pkl_path, "rb") as f: - info_data = pickle.load(f) - except Exception as e: - raise ValueError(f"Failed to load info.pkl file: {e}") - - # Extract samples from info.pkl - if isinstance(info_data, dict): - if "data_list" in info_data: - samples_list = info_data["data_list"] - else: - raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}") - else: - raise ValueError(f"Expected dict format, got {type(info_data)}") - - if not samples_list: - raise ValueError("No samples found in info.pkl") - - if sample_idx >= len(samples_list): - raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})") - - sample = samples_list[sample_idx] - - # Validate sample structure - required_keys = ["image", "lidar_points"] - if not all(key in sample for key in required_keys): - raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}") - - return sample - - -def load_sample_data_from_info_pkl( - info_pkl_path: str, - model_cfg: Config, - miscalibration_probability: float, - sample_idx: int = 0, - device: str = "cpu", -) -> torch.Tensor: - """ - Load and preprocess sample data from info.pkl using CalibrationClassificationTransform. - - Args: - info_pkl_path: Path to the info.pkl file - model_cfg: Model configuration containing data_root setting - miscalibration_probability: Probability of loading a miscalibrated sample - sample_idx: Index of the sample to load (default: 0) - device: Device to load tensor on - - Returns: - Preprocessed tensor ready for model inference - """ - # Load sample data from info.pkl - sample_data = load_info_pkl_data(info_pkl_path, sample_idx) - - # Get data_root from model config - data_root = model_cfg.get("data_root", None) - if data_root is None: - raise ValueError("data_root not found in model configuration") - - # Create transform for deployment - transform_config = model_cfg.get("transform_config", None) - if transform_config is None: - raise ValueError("transform_config not found in model configuration") - - transform = CalibrationClassificationTransform( - transform_config=transform_config, - mode="test", - max_depth=model_cfg.get("max_depth", 128.0), - dilation_size=model_cfg.get("dilation_size", 1), - undistort=True, - miscalibration_probability=miscalibration_probability, - enable_augmentation=False, - data_root=data_root, - projection_vis_dir=model_cfg.get("test_projection_vis_dir", None), - results_vis_dir=model_cfg.get("test_results_vis_dir", None), - binary_save_dir=model_cfg.get("binary_save_dir", None), - ) - - # Apply transform - results = transform.transform(sample_data) - input_data_processed = results["fused_img"] # (H, W, 5) - - # Convert to tensor - input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float() # (5, H, W) - input_tensor = input_tensor.unsqueeze(0).to(device) # (1, 5, H, W) - - return input_tensor - - -class DeploymentConfig: - """Configuration container for deployment settings.""" - - def __init__(self, deploy_cfg: Config): - self.deploy_cfg = deploy_cfg - self._validate_config() - - def _validate_config(self) -> None: - """Validate configuration structure and required fields.""" - if "export" not in self.deploy_cfg: - raise ValueError( - "Missing 'export' section in deploy config. " - "Please update your config to the new format with 'export', 'runtime_io' sections." - ) - - if "runtime_io" not in self.deploy_cfg: - raise ValueError("Missing 'runtime_io' section in deploy config.") - - # Validate export mode - valid_modes = ["onnx", "trt", "both", "none"] - mode = self.export_config.get("mode", "both") - if mode not in valid_modes: - raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}") - - # Validate precision policy if present - backend_cfg = self.deploy_cfg.get("backend_config", {}) - common_cfg = backend_cfg.get("common_config", {}) - precision_policy = common_cfg.get("precision_policy", "auto") - if precision_policy not in PRECISION_POLICIES: - raise ValueError( - f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}" - ) - - @property - def export_config(self) -> Dict: - """Get export configuration (mode, verify, device, work_dir).""" - return self.deploy_cfg.get("export", {}) - - @property - def runtime_io_config(self) -> Dict: - """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file).""" - return self.deploy_cfg.get("runtime_io", {}) - - @property - def evaluation_config(self) -> Dict: - """Get evaluation configuration (enabled, num_samples, verbose, model paths).""" - return self.deploy_cfg.get("evaluation", {}) - - @property - def should_export_onnx(self) -> bool: - """Check if ONNX export is requested.""" - mode = self.export_config.get("mode", "both") - return mode in ["onnx", "both"] - - @property - def should_export_tensorrt(self) -> bool: - """Check if TensorRT export is requested.""" - mode = self.export_config.get("mode", "both") - return mode in ["trt", "both"] - - @property - def should_verify(self) -> bool: - """Check if verification is requested.""" - return self.export_config.get("verify", False) - - @property - def device(self) -> str: - """Get device for export.""" - return self.export_config.get("device", "cuda:0") - - @property - def work_dir(self) -> str: - """Get working directory.""" - return self.export_config.get("work_dir", os.getcwd()) - - @property - def onnx_settings(self) -> Dict: - """Get ONNX export settings.""" - onnx_config = self.deploy_cfg.get("onnx_config", {}) - return { - "opset_version": onnx_config.get("opset_version", 16), - "do_constant_folding": onnx_config.get("do_constant_folding", True), - "input_names": onnx_config.get("input_names", ["input"]), - "output_names": onnx_config.get("output_names", ["output"]), - "dynamic_axes": onnx_config.get("dynamic_axes"), - "export_params": onnx_config.get("export_params", True), - "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False), - "save_file": onnx_config.get("save_file", "calibration_classifier.onnx"), - } - - @property - def tensorrt_settings(self) -> Dict: - """Get TensorRT export settings with precision policy support.""" - backend_config = self.deploy_cfg.get("backend_config", {}) - common_config = backend_config.get("common_config", {}) - - # Get precision policy - precision_policy = common_config.get("precision_policy", "auto") - policy_flags = PRECISION_POLICIES.get(precision_policy, {}) - - return { - "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE), - "precision_policy": precision_policy, - "policy_flags": policy_flags, - "model_inputs": backend_config.get("model_inputs", []), - } - - -def parse_args() -> argparse.Namespace: - """Parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Export CalibrationStatusClassification model to ONNX/TensorRT.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument("deploy_cfg", help="Deploy config path") - parser.add_argument("model_cfg", help="Model config path") - parser.add_argument( - "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')" - ) - - # Optional overrides - parser.add_argument("--work-dir", help="Override output directory from config") - parser.add_argument("--device", help="Override device from config") - parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level") - parser.add_argument("--info-pkl", help="Override info.pkl path from config") - - return parser.parse_args() - - -def setup_logging(level: str) -> logging.Logger: - """Setup logging configuration.""" - logging.basicConfig(level=getattr(logging, level), format="%(levelname)s:%(name)s:%(message)s") - return logging.getLogger("mmdeploy") - - -def export_to_onnx( - model: torch.nn.Module, - input_tensor: torch.Tensor, - output_path: str, - config: DeploymentConfig, - logger: logging.Logger, -) -> None: - """Export model to ONNX format.""" - settings = config.onnx_settings - model.eval() - - logger.info("Exporting model to ONNX format...") - logger.info(f"Input shape: {input_tensor.shape}") - logger.info(f"Output path: {output_path}") - logger.info(f"ONNX opset version: {settings['opset_version']}") - - with torch.no_grad(): - torch.onnx.export( - model, - input_tensor, - output_path, - export_params=settings["export_params"], - keep_initializers_as_inputs=settings["keep_initializers_as_inputs"], - opset_version=settings["opset_version"], - do_constant_folding=settings["do_constant_folding"], - input_names=settings["input_names"], - output_names=settings["output_names"], - dynamic_axes=settings["dynamic_axes"], - verbose=False, - ) - - logger.info(f"ONNX export completed: {output_path}") - - # Optional model simplification - _optimize_onnx_model(output_path, logger) - - -def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None: - """Optimize ONNX model using onnxsim.""" - logger.info("Simplifying ONNX model...") - model_simplified, success = onnxsim.simplify(onnx_path) - if success: - onnx.save(model_simplified, onnx_path) - logger.info(f"ONNX model simplified successfully. Saved to {onnx_path}") - else: - logger.warning("ONNX model simplification failed") - - -def export_to_tensorrt( - onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger -) -> bool: - """Export ONNX model to a TensorRT engine with precision policy support.""" - settings = config.tensorrt_settings - precision_policy = settings["precision_policy"] - policy_flags = settings["policy_flags"] - - logger.info(f"Building TensorRT engine with precision policy: {precision_policy}") - - # Initialize TensorRT - trt_logger = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(trt_logger, "") - - builder = trt.Builder(trt_logger) - builder_config = builder.create_builder_config() - builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"]) - - # Create network with appropriate flags - flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - # Handle strongly typed flag (network creation flag, not builder flag) - if policy_flags.get("STRONGLY_TYPED"): - flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) - logger.info("Using strongly typed TensorRT network creation") - - network = builder.create_network(flags) - - # Apply precision flags to builder config - for flag_name, enabled in policy_flags.items(): - if flag_name == "STRONGLY_TYPED": - continue # Already handled as network creation flag above - if enabled and hasattr(trt.BuilderFlag, flag_name): - builder_config.set_flag(getattr(trt.BuilderFlag, flag_name)) - logger.info(f"BuilderFlag.{flag_name} enabled") - - # Setup optimization profile - profile = builder.create_optimization_profile() - _configure_input_shapes(profile, input_tensor, settings["model_inputs"], logger) - builder_config.add_optimization_profile(profile) - - # Parse ONNX model - parser = trt.OnnxParser(network, trt_logger) - with open(onnx_path, "rb") as f: - if not parser.parse(f.read()): - _log_parser_errors(parser, logger) - return False - logger.info("Successfully parsed the ONNX file") - - # Build engine - logger.info("Building TensorRT engine...") - serialized_engine = builder.build_serialized_network(network, builder_config) - - if serialized_engine is None: - logger.error("Failed to build TensorRT engine") - return False - - # Save engine - with open(output_path, "wb") as f: - f.write(serialized_engine) - - logger.info(f"TensorRT engine saved to {output_path}") - logger.info(f"Engine max workspace size: {settings['max_workspace_size'] / (1024**3):.2f} GB") - - return True - - -def _configure_input_shapes(profile, input_tensor: torch.Tensor, model_inputs: list, logger: logging.Logger) -> None: - """Configure input shapes for TensorRT optimization profile.""" - if model_inputs: - input_shapes = model_inputs[0].get("input_shapes", {}) - for input_name, shapes in input_shapes.items(): - min_shape = shapes.get("min_shape", list(input_tensor.shape)) - opt_shape = shapes.get("opt_shape", list(input_tensor.shape)) - max_shape = shapes.get("max_shape", list(input_tensor.shape)) - - logger.info(f"Setting input shapes - min: {min_shape}, opt: {opt_shape}, max: {max_shape}") - profile.set_shape(input_name, min_shape, opt_shape, max_shape) - else: - # Default shapes based on input tensor - input_shape = list(input_tensor.shape) - logger.info(f"Using default input shape: {input_shape}") - profile.set_shape("input", input_shape, input_shape, input_shape) - - -def _log_parser_errors(parser, logger: logging.Logger) -> None: - """Log TensorRT parser errors.""" - logger.error("Failed to parse ONNX model") - for error in range(parser.num_errors): - logger.error(f"Parser error: {parser.get_error(error)}") - - -def run_pytorch_inference( - model: torch.nn.Module, input_tensor: torch.Tensor, logger: logging.Logger -) -> Tuple[torch.Tensor, float]: - """Run PyTorch inference on CPU for verification and return output with latency.""" - # Move to CPU to avoid GPU memory issues - model_cpu = model.cpu() - input_cpu = input_tensor.cpu() - - model_cpu.eval() - with torch.no_grad(): - # Measure inference time - start_time = time.perf_counter() - output = model_cpu(input_cpu) - end_time = time.perf_counter() - - latency = (end_time - start_time) * 1000 # Convert to milliseconds - - # Handle different output formats - if hasattr(output, "output"): - output = output.output - elif isinstance(output, dict) and "output" in output: - output = output["output"] - - if not isinstance(output, torch.Tensor): - raise ValueError(f"Unexpected PyTorch output type: {type(output)}") - - logger.info(f"PyTorch inference latency: {latency:.2f} ms") - logger.info(f"Output verification:") - logger.info(f" Output: {output.cpu().numpy()}") - return output, latency - - -def run_onnx_inference( - onnx_path: str, - input_tensor: torch.Tensor, - ref_output: torch.Tensor, - logger: logging.Logger, -) -> bool: - """Verify ONNX model output against PyTorch model.""" - # Clear GPU cache - if torch.cuda.is_available(): - torch.cuda.empty_cache() - - # ONNX inference with timing - providers = ["CPUExecutionProvider"] - ort_session = ort.InferenceSession(onnx_path, providers=providers) - onnx_input = {ort_session.get_inputs()[0].name: input_tensor.cpu().numpy()} - - start_time = time.perf_counter() - onnx_output = ort_session.run(None, onnx_input)[0] - end_time = time.perf_counter() - onnx_latency = (end_time - start_time) * 1000 - - logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms") - - # Ensure onnx_output is numpy array before comparison - if not isinstance(onnx_output, np.ndarray): - logger.error(f"Unexpected ONNX output type: {type(onnx_output)}") - return False - - # Compare outputs - return _compare_outputs(ref_output.cpu().numpy(), onnx_output, "ONNX", logger) - - -def run_tensorrt_inference( - tensorrt_path: str, - input_tensor: torch.Tensor, - ref_output: torch.Tensor, - logger: logging.Logger, -) -> bool: - """Verify TensorRT model output against PyTorch model.""" - # Clear GPU cache - if torch.cuda.is_available(): - torch.cuda.empty_cache() - - # Load TensorRT engine - trt_logger = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(trt_logger, "") - runtime = trt.Runtime(trt_logger) - - with open(tensorrt_path, "rb") as f: - engine = runtime.deserialize_cuda_engine(f.read()) - - if engine is None: - logger.error("Failed to deserialize TensorRT engine") - return False - - # Run TensorRT inference with timing - trt_output, latency = _run_tensorrt_inference(engine, input_tensor.cpu(), logger) - logger.info(f"TensorRT inference latency: {latency:.2f} ms") - - # Compare outputs - return _compare_outputs(ref_output.cpu().numpy(), trt_output, "TensorRT", logger) - - -def _run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]: - """Run TensorRT inference and return output with timing.""" - context = engine.create_execution_context() - stream = cuda.Stream() - start = cuda.Event() - end = cuda.Event() - - # Get tensor names and shapes - input_name, output_name = None, None - for i in range(engine.num_io_tensors): - tensor_name = engine.get_tensor_name(i) - if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT: - input_name = tensor_name - elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT: - output_name = tensor_name - - if input_name is None or output_name is None: - raise RuntimeError("Could not find input/output tensor names") - - # Prepare arrays - input_np = input_tensor.numpy().astype(np.float32) - if not input_np.flags["C_CONTIGUOUS"]: - input_np = np.ascontiguousarray(input_np) - - context.set_input_shape(input_name, input_np.shape) - output_shape = context.get_tensor_shape(output_name) - output_np = np.empty(output_shape, dtype=np.float32) - if not output_np.flags["C_CONTIGUOUS"]: - output_np = np.ascontiguousarray(output_np) - - # Allocate GPU memory - d_input = cuda.mem_alloc(input_np.nbytes) - d_output = cuda.mem_alloc(output_np.nbytes) - - try: - # Set tensor addresses - context.set_tensor_address(input_name, int(d_input)) - context.set_tensor_address(output_name, int(d_output)) - - # Run inference with timing - cuda.memcpy_htod_async(d_input, input_np, stream) - start.record(stream) - context.execute_async_v3(stream_handle=stream.handle) - end.record(stream) - cuda.memcpy_dtoh_async(output_np, d_output, stream) - stream.synchronize() - - latency = end.time_since(start) - return output_np, latency - - finally: - # Cleanup - try: - d_input.free() - d_output.free() - except: - pass - - -def _compare_outputs( - pytorch_output: np.ndarray, backend_output: np.ndarray, backend_name: str, logger: logging.Logger -) -> bool: - """Compare outputs between PyTorch and backend.""" - if not isinstance(backend_output, np.ndarray): - logger.error(f"Unexpected {backend_name} output type: {type(backend_output)}") - return False - - max_diff = np.abs(pytorch_output - backend_output).max() - mean_diff = np.abs(pytorch_output - backend_output).mean() - - logger.info(f"Output verification:") - logger.info(f" {backend_name} output: {backend_output}") - logger.info(f" Max difference with PyTorch: {max_diff:.6f}") - logger.info(f" Mean difference with PyTorch: {mean_diff:.6f}") - - success = max_diff < DEFAULT_VERIFICATION_TOLERANCE - if not success: - logger.warning(f"Large difference detected: {max_diff:.6f}") - - return success - - -def run_verification( - model: torch.nn.Module, - onnx_path: str, - trt_path: Optional[str], - input_tensors: Dict[str, torch.Tensor], - logger: logging.Logger, -) -> None: - """Run model verification for available backends.""" - if torch.cuda.is_available(): - torch.cuda.empty_cache() - - for key, input_tensor in input_tensors.items(): - logger.info("=" * 50) - logger.info(f"Verifying {LABELS[key]} sample...") - logger.info("-" * 50) - logger.info("Verifying PyTorch model...") - pytorch_output, pytorch_latency = run_pytorch_inference(model, input_tensor, logger) - logger.info( - f"PyTorch output for {LABELS[key]}: [SCORE_MISCALIBRATED, SCORE_CALIBRATED] = {pytorch_output.cpu().numpy()}" - ) - score_calibrated = pytorch_output.cpu().numpy()[0, 1] - pytorch_output.cpu().numpy()[0, 0] - if key == "0" and score_calibrated < 0: - logger.info(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") - elif key == "0" and score_calibrated > 0: - logger.warning(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") - elif key == "1" and score_calibrated > 0: - logger.info(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") - elif key == "1" and score_calibrated < 0: - logger.warning(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") - - if onnx_path and osp.exists(onnx_path): - logger.info("-" * 50) - logger.info("Verifying ONNX model...") - if run_onnx_inference(onnx_path, input_tensor, pytorch_output, logger): - logger.info("ONNX model verification passed!") - else: - logger.error("ONNX model verification failed!") - - if trt_path and osp.exists(trt_path): - logger.info("-" * 50) - logger.info("Verifying TensorRT model...") - if run_tensorrt_inference(trt_path, input_tensor, pytorch_output, logger): - logger.info("TensorRT model verification passed!") - else: - logger.error("TensorRT model verification failed!") - logger.info("=" * 50) - - -def evaluate_exported_model( - model_path: str, - model_type: str, - model_cfg: Config, - info_pkl_path: str, - logger: logging.Logger, - device: str = "cpu", - num_samples: int = 10, - verbose: bool = False, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """ - Evaluate exported model (ONNX or TensorRT) using info.pkl data. - - Args: - model_path: Path to model file (.onnx or .engine) - model_type: Type of model ("onnx" or "tensorrt") - model_cfg: Model configuration - info_pkl_path: Path to info.pkl file - logger: Logger instance - device: Device for preprocessing - num_samples: Number of samples to evaluate - verbose: Enable verbose logging - - Returns: - Tuple of (predictions, ground_truth, probabilities, latencies) - """ - - # Load info.pkl data - try: - with open(info_pkl_path, "rb") as f: - info_data = pickle.load(f) - - if "data_list" not in info_data: - raise ValueError("Expected 'data_list' key in info.pkl") - - samples_list = info_data["data_list"] - logger.info(f"Loaded {len(samples_list)} samples from info.pkl") - - # Limit number of samples for evaluation - num_samples = min(num_samples, len(samples_list)) - logger.info(f"Evaluating {num_samples} samples") - - except Exception as e: - logger.error(f"Failed to load info.pkl: {e}") - raise - - # Initialize transform once and reuse it for all samples - data_root = model_cfg.get("data_root", None) - if data_root is None: - raise ValueError("data_root not found in model configuration") - - transform_config = model_cfg.get("transform_config", None) - if transform_config is None: - raise ValueError("transform_config not found in model configuration") - - transform = CalibrationClassificationTransform( - transform_config=transform_config, - mode="test", - max_depth=model_cfg.get("max_depth", 128.0), - dilation_size=model_cfg.get("dilation_size", 1), - undistort=True, - miscalibration_probability=0.0, # Will be updated per sample - enable_augmentation=False, - data_root=data_root, - projection_vis_dir=None, - results_vis_dir=None, - binary_save_dir=None, - ) - - # Initialize inference engine - if model_type == "onnx": - logger.info(f"Using ONNX model: {model_path}") - logger.info("Creating ONNX InferenceSession...") - - # Configure execution providers - if device.startswith("cuda"): - providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] - logger.info("Attempting to use CUDA acceleration for ONNX inference...") - try: - available_providers = ort.get_available_providers() - if "CUDAExecutionProvider" not in available_providers: - logger.warning( - f"CUDAExecutionProvider not available. Available: {available_providers}. " - "Install onnxruntime-gpu for CUDA acceleration" - ) - providers = ["CPUExecutionProvider"] - else: - logger.info("CUDAExecutionProvider is available") - except Exception as e: - logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU") - providers = ["CPUExecutionProvider"] - else: - providers = ["CPUExecutionProvider"] - logger.info("Using CPU for ONNX inference") - - # Create session once and reuse it - ort_session = ort.InferenceSession(model_path, providers=providers) - logger.info(f"ONNX session using providers: {ort_session.get_providers()}") - - # Define inference function - def inference_func(input_tensor): - input_name = ort_session.get_inputs()[0].name - onnx_input = {input_name: input_tensor.cpu().numpy()} - start_time = time.perf_counter() - onnx_output = ort_session.run(None, onnx_input)[0] - latency = (time.perf_counter() - start_time) * 1000 - return onnx_output, latency - - elif model_type == "tensorrt": - logger.info(f"Using TensorRT model: {model_path}") - - # Load TensorRT engine - trt_logger = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(trt_logger, "") - runtime = trt.Runtime(trt_logger) - - with open(model_path, "rb") as f: - engine = runtime.deserialize_cuda_engine(f.read()) - - if engine is None: - raise RuntimeError("Failed to deserialize TensorRT engine") - - # Define inference function - def inference_func(input_tensor): - return _run_tensorrt_inference(engine, input_tensor.cpu(), logger) - - else: - raise ValueError(f"Unsupported model type: {model_type}") - - # Lists to store results - all_predictions = [] - all_ground_truth = [] - all_probabilities = [] - all_latencies = [] - - # Evaluate samples - for sample_idx in range(num_samples): - if sample_idx % 100 == 0: - logger.info(f"Processing sample {sample_idx + 1}/{num_samples}") - - try: - # Test both calibrated and miscalibrated versions - for miscalibration_prob, expected_label in [(0.0, 1), (1.0, 0)]: - # Update transform's miscalibration probability - transform.miscalibration_probability = miscalibration_prob - - # Load and preprocess sample - sample_data = load_info_pkl_data(info_pkl_path, sample_idx) - sample_data["sample_idx"] = sample_idx - - results = transform.transform(sample_data) - input_data_processed = results["fused_img"] - gt_label = results["gt_label"] - - # Convert to tensor - input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float() - input_tensor = input_tensor.unsqueeze(0).to(device) - - # Run inference - output_np, latency = inference_func(input_tensor) - - if output_np is None: - logger.error(f"Failed to get output for sample {sample_idx}") - continue - - # Convert logits to probabilities - logits = torch.from_numpy(output_np) - probabilities = F.softmax(logits, dim=-1) - predicted_class = torch.argmax(probabilities, dim=-1).item() - confidence = probabilities.max().item() - - # Store results - all_predictions.append(predicted_class) - all_ground_truth.append(gt_label) - all_probabilities.append(probabilities.cpu().numpy()) - all_latencies.append(latency) - - # Print sample results only in verbose mode - if verbose: - logger.info( - f"Sample {sample_idx + 1} (GT={gt_label}): " - f"Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms" - ) - - # Clear GPU memory periodically - if model_type == "tensorrt" and sample_idx % 10 == 0: - if torch.cuda.is_available(): - torch.cuda.empty_cache() - gc.collect() - - except Exception as e: - logger.error(f"Error processing sample {sample_idx}: {e}") - continue - - return ( - np.array(all_predictions), - np.array(all_ground_truth), - np.array(all_probabilities), - np.array(all_latencies), - ) - - -def print_evaluation_results( - all_predictions: np.ndarray, - all_ground_truth: np.ndarray, - all_probabilities: np.ndarray, - all_latencies: np.ndarray, - model_type: str, - logger: logging.Logger, -) -> None: - """Print comprehensive evaluation results with metrics and statistics.""" - - if len(all_predictions) == 0: - logger.error("No samples were processed successfully.") - return - - correct_predictions = (all_predictions == all_ground_truth).sum() - total_samples = len(all_predictions) - accuracy = correct_predictions / total_samples - avg_latency = np.mean(all_latencies) - - # Print header - logger.info(f"\n{'='*60}") - logger.info(f"{model_type.upper()} Model Evaluation Results") - logger.info(f"{'='*60}") - logger.info(f"Total samples: {total_samples}") - logger.info(f"Correct predictions: {correct_predictions}") - logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") - logger.info(f"Average latency: {avg_latency:.2f} ms") - - # Calculate per-class accuracy - unique_classes = np.unique(all_ground_truth) - logger.info(f"\nPer-class accuracy:") - for cls in unique_classes: - cls_mask = all_ground_truth == cls - cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum() - cls_total = cls_mask.sum() - cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0 - logger.info( - f" Class {cls} ({LABELS[str(cls)]}): " - f"{cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)" - ) - - # Calculate average confidence - avg_confidence = np.mean([prob.max() for prob in all_probabilities]) - logger.info(f"\nAverage confidence: {avg_confidence:.4f}") - - # Calculate latency statistics - min_latency = np.min(all_latencies) - max_latency = np.max(all_latencies) - std_latency = np.std(all_latencies) - p50_latency = np.percentile(all_latencies, 50) - p95_latency = np.percentile(all_latencies, 95) - p99_latency = np.percentile(all_latencies, 99) - - logger.info(f"\nLatency Statistics:") - logger.info(f" Average: {avg_latency:.2f} ms") - logger.info(f" Median (P50): {p50_latency:.2f} ms") - logger.info(f" P95: {p95_latency:.2f} ms") - logger.info(f" P99: {p99_latency:.2f} ms") - logger.info(f" Min: {min_latency:.2f} ms") - logger.info(f" Max: {max_latency:.2f} ms") - logger.info(f" Std: {std_latency:.2f} ms") - - # Show confusion matrix - logger.info(f"\nConfusion Matrix:") - logger.info(f"{'':>10} Predicted") - logger.info(f"{'Actual':>10} {'0 (misc)':>10} {'1 (calib)':>10}") - logger.info(f"{'-'*32}") - for true_cls in unique_classes: - row_label = f"{true_cls} ({LABELS[str(true_cls)][:4]})" - row = [f"{row_label:>10}"] - for pred_cls in unique_classes: - count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum() - row.append(f"{count:>10}") - logger.info(" ".join(row)) - - logger.info(f"\n{'='*60}") - logger.info(f"Evaluation Summary:") - logger.info(f" Model Type: {model_type.upper()}") - logger.info(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") - logger.info(f" Avg Latency: {avg_latency:.2f} ms") - logger.info(f" Throughput: {1000/avg_latency:.2f} samples/sec") - logger.info(f"{'='*60}\n") - def main(): """Main deployment function.""" @@ -961,11 +42,13 @@ def main(): logger.info(f"Loading model config from: {args.model_cfg}") model_cfg = Config.fromfile(args.model_cfg) + # Extract runtime configuration work_dir = config.work_dir device = config.device info_pkl = config.runtime_io_config.get("info_pkl") sample_idx = config.runtime_io_config.get("sample_idx", 0) existing_onnx = config.runtime_io_config.get("onnx_file") + export_mode = config.export_config.get("mode", "both") # Validate required parameters if not info_pkl: @@ -976,43 +59,33 @@ def main(): mmengine.mkdir_or_exist(osp.abspath(work_dir)) logger.info(f"Working directory: {work_dir}") logger.info(f"Device: {device}") - export_mode = config.export_config.get("mode", "both") logger.info(f"Export mode: {export_mode}") # Check if eval-only mode is_eval_only = export_mode == "none" + # Validate eval-only mode configuration + if is_eval_only: + eval_enabled = config.evaluation_config.get("enabled", False) + if not eval_enabled: + logger.error( + "Configuration error: export mode is 'none' (evaluation-only mode) " + "but evaluation.enabled is False. " + "Please set evaluation.enabled = True in your config." + ) + return + # Validate checkpoint requirement if not is_eval_only and not args.checkpoint: logger.error("Checkpoint is required when export mode is not 'none'") return - # Determine export paths - onnx_path = None - trt_path = None - + # Export phase if not is_eval_only: - if config.should_export_onnx: - onnx_settings = config.onnx_settings - onnx_path = osp.join(work_dir, onnx_settings["save_file"]) - - if config.should_export_tensorrt: - # Use existing ONNX if provided, otherwise use the one we'll export - if existing_onnx and not config.should_export_onnx: - onnx_path = existing_onnx - if not osp.exists(onnx_path): - logger.error(f"Provided ONNX file does not exist: {onnx_path}") - return - logger.info(f"Using existing ONNX file: {onnx_path}") - elif not onnx_path: - # Need ONNX for TensorRT but neither export nor existing file specified - logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.") - return - - # Set TensorRT output path - onnx_settings = config.onnx_settings - trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") - trt_path = osp.join(work_dir, trt_file) + # Determine export paths + onnx_path, trt_path = validate_and_prepare_paths(config, work_dir, existing_onnx, logger) + if onnx_path is None and trt_path is None: + return # Load model logger.info(f"Loading model from checkpoint: {args.checkpoint}") @@ -1027,135 +100,62 @@ def main(): ) # Export models - if config.should_export_onnx: - export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger) + success, device = export_models(model, config, onnx_path, trt_path, input_tensor_calibrated, device, logger) + if not success: + logger.error("Export failed") + return + + # Update tensors if device changed + if device.type == "cuda": + input_tensor_calibrated = input_tensor_calibrated.to(device) + input_tensor_miscalibrated = input_tensor_miscalibrated.to(device) + + # Run verification + run_model_verification( + model, + config, + onnx_path, + trt_path, + input_tensor_calibrated, + input_tensor_miscalibrated, + existing_onnx, + logger, + ) + # Log exported formats + exported_formats = [] + if config.should_export_onnx: + exported_formats.append("ONNX") if config.should_export_tensorrt: - logger.info("Converting ONNX to TensorRT...") - - # Ensure CUDA device for TensorRT - if device.type != "cuda": - logger.warning("TensorRT requires CUDA device, switching to cuda") - device = torch.device("cuda") - input_tensor_calibrated = input_tensor_calibrated.to(device) - input_tensor_miscalibrated = input_tensor_miscalibrated.to(device) - - success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger) - if success: - logger.info(f"TensorRT conversion successful: {trt_path}") - else: - logger.error("TensorRT conversion failed") - - # Run verification if requested - if config.should_verify: - logger.info( - "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..." - ) - input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated} - - # Only verify formats that were exported or provided - onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None - trt_path_for_verify = trt_path if config.should_export_tensorrt else None + exported_formats.append("TensorRT") + if exported_formats: + logger.info(f"Exported formats: {', '.join(exported_formats)}") - run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger) + logger.info("Deployment completed successfully!") else: logger.info("Evaluation-only mode: Skipping model loading and export") - # Get evaluation settings from config and CLI + # Evaluation phase eval_cfg = config.evaluation_config should_evaluate = eval_cfg.get("enabled", False) num_samples = eval_cfg.get("num_samples", 10) verbose_mode = eval_cfg.get("verbose", False) - # Run full evaluation if requested if should_evaluate: logger.info(f"\n{'='*60}") logger.info("Starting full model evaluation...") logger.info(f"{'='*60}") - # Determine which models to evaluate - models_to_evaluate = [] - - # Get model paths from config - eval_onnx_path = eval_cfg.get("onnx_model") - eval_trt_path = eval_cfg.get("tensorrt_model") - - # If both paths are None, skip evaluation (no auto-detection) - if eval_onnx_path is None and eval_trt_path is None: - logger.warning("Both onnx_model and tensorrt_model are None. Skipping evaluation.") - logger.warning("To enable evaluation, specify at least one model path in the config.") - else: - # Only evaluate models that are explicitly specified (not None) - if eval_onnx_path is not None: - # Evaluate ONNX model - if osp.exists(eval_onnx_path): - models_to_evaluate.append(("onnx", eval_onnx_path)) - logger.info(f"Using config-specified ONNX model: {eval_onnx_path}") - else: - logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}") - - if eval_trt_path is not None: - # Evaluate TensorRT model - if osp.exists(eval_trt_path): - models_to_evaluate.append(("tensorrt", eval_trt_path)) - logger.info(f"Using config-specified TensorRT model: {eval_trt_path}") - else: - logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}") - - if not models_to_evaluate: - logger.error( - "No models available for evaluation. Please export models first or specify model paths in config." - ) - else: - # Evaluate each model - for model_type, model_path in models_to_evaluate: - logger.info(f"\nEvaluating {model_type.upper()} model...") - logger.info(f"Model path: {model_path}") - logger.info(f"Number of samples: {num_samples}") - logger.info(f"Verbose mode: {verbose_mode}") - - try: - # Run evaluation - predictions, ground_truth, probabilities, latencies = evaluate_exported_model( - model_path=model_path, - model_type=model_type, - model_cfg=model_cfg, - info_pkl_path=info_pkl, - logger=logger, - device=device.type if isinstance(device, torch.device) else device, - num_samples=num_samples, - verbose=verbose_mode, - ) - - # Print results - print_evaluation_results(predictions, ground_truth, probabilities, latencies, model_type, logger) - - # Cleanup - if torch.cuda.is_available(): - torch.cuda.empty_cache() - gc.collect() - - except Exception as e: - logger.error(f"Evaluation failed for {model_type.upper()} model: {e}") - import traceback - - logger.error(traceback.format_exc()) - continue - - logger.info("Deployment completed successfully!") - - # Log what was exported - if not is_eval_only: - exported_formats = [] - if config.should_export_onnx: - exported_formats.append("ONNX") - if config.should_export_tensorrt: - exported_formats.append("TensorRT") - - if exported_formats: - logger.info(f"Exported formats: {', '.join(exported_formats)}") - else: - logger.info("Evaluation-only mode: No models were exported") + models_to_evaluate = get_models_to_evaluate(eval_cfg, logger) + run_full_evaluation( + models_to_evaluate, + model_cfg, + info_pkl, + device, + num_samples, + verbose_mode, + logger, + ) if __name__ == "__main__": diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index 713238e2..f170e90d 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -212,9 +212,7 @@ tensorboard --logdir work_dirs --bind_all ``` ## 5. Evaluation -### 5.1. PyTorch Model Evaluation - -- Evaluation +PyTorch Model Evaluation ```sh python tools/calibration_classification/test.py {config_file} {checkpoint_file} @@ -224,6 +222,7 @@ python tools/calibration_classification/test.py {config_file} {checkpoint_file} python tools/calibration_classification/test.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py epoch_25.pth --out {output_file} ``` +For ONNX and TensorRT Evaluation check [Section 6.4](#64-evaluate-onnx-and-tensorrt-models). ## 6. Deployment @@ -425,8 +424,7 @@ backend_config = dict( ```sh python projects/CalibrationStatusClassification/deploy/main.py \ projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \ - projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \ - epoch_25.pth + projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py ``` **Output**: `work_dirs/end2end.engine` (INT8 TensorRT engine) diff --git a/tools/calibration_classification/visualize_lidar_camera_projection.py b/tools/calibration_classification/visualize_lidar_camera_projection.py index 2a2bf85d..2beb3a7f 100644 --- a/tools/calibration_classification/visualize_lidar_camera_projection.py +++ b/tools/calibration_classification/visualize_lidar_camera_projection.py @@ -15,9 +15,9 @@ ) -class CalibrationToolkit: +class CalibrationClassifierToolkit: """ - A comprehensive tool for processing LiDAR-camera calibration data. + A tool for processing LiDAR-camera calibration data. This class provides functionality to load calibration data from info.pkl files and optionally generate visualizations or save results as NPZ files. Attributes: @@ -30,7 +30,7 @@ class CalibrationToolkit: def __init__(self, model_cfg: Config, data_root: Optional[str] = None, output_dir: Optional[str] = None): """ - Initialize the CalibrationToolkit. + Initialize the CalibrationClassifierToolkit. Args: model_cfg: Model configuration data_root: Root directory for data files. If None, absolute paths are used. @@ -40,7 +40,7 @@ def __init__(self, model_cfg: Config, data_root: Optional[str] = None, output_di self.data_root = data_root self.output_dir = output_dir self.transform = None - self.logger = MMLogger.get_instance(name="calibration_toolkit") + self.logger = MMLogger.get_instance(name="calibration_classifier_toolkit") self.collected_results = [] self._initialize_transform() @@ -50,7 +50,6 @@ def _initialize_transform(self) -> None: if transform_config is None: raise ValueError("transform_config not found in model configuration") - # Only set projection_vis_dir if output_dir is provided (for visualization) projection_vis_dir = self.output_dir if self.output_dir else None self.transform = CalibrationClassificationTransform( @@ -339,7 +338,7 @@ def main() -> None: model_cfg = Config.fromfile(args.model_cfg) # Initialize toolkit - toolkit = CalibrationToolkit(model_cfg=model_cfg, data_root=args.data_root, output_dir=args.output_dir) + toolkit = CalibrationClassifierToolkit(model_cfg=model_cfg, data_root=args.data_root, output_dir=args.output_dir) # Process indices argument processed_indices = None From a29452e533da35a0e9bead43cedc7ce43dc0b6a7 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 8 Oct 2025 14:28:18 +0900 Subject: [PATCH 17/19] feat: new design for deployment Signed-off-by: vividf --- autoware_ml/deployment/__init__.py | 20 + autoware_ml/deployment/backends/__init__.py | 13 + .../deployment/backends/base_backend.py | 85 ++ .../deployment/backends/onnx_backend.py | 80 ++ .../deployment/backends/pytorch_backend.py | 79 ++ .../deployment/backends/tensorrt_backend.py | 135 ++++ autoware_ml/deployment/cli/__init__.py | 3 + autoware_ml/deployment/core/__init__.py | 25 + autoware_ml/deployment/core/base_config.py | 213 +++++ .../deployment/core/base_data_loader.py | 118 +++ autoware_ml/deployment/core/base_evaluator.py | 138 ++++ autoware_ml/deployment/core/verification.py | 178 +++++ autoware_ml/deployment/exporters/__init__.py | 11 + .../deployment/exporters/base_exporter.py | 67 ++ .../deployment/exporters/onnx_exporter.py | 121 +++ .../deployment/exporters/tensorrt_exporter.py | 157 ++++ .../configs/deploy/resnet18_5ch.py | 13 +- .../deploy/__init__.py | 26 +- .../deploy/config.py | 161 ---- .../deploy/data_loader.py | 183 +++++ .../deploy/evaluator.py | 741 +++++++++--------- .../deploy/exporters.py | 680 ---------------- .../deploy/main.py | 223 ++++-- tools/calibration_classification/README.md | 6 +- 24 files changed, 2163 insertions(+), 1313 deletions(-) create mode 100644 autoware_ml/deployment/__init__.py create mode 100644 autoware_ml/deployment/backends/__init__.py create mode 100644 autoware_ml/deployment/backends/base_backend.py create mode 100644 autoware_ml/deployment/backends/onnx_backend.py create mode 100644 autoware_ml/deployment/backends/pytorch_backend.py create mode 100644 autoware_ml/deployment/backends/tensorrt_backend.py create mode 100644 autoware_ml/deployment/cli/__init__.py create mode 100644 autoware_ml/deployment/core/__init__.py create mode 100644 autoware_ml/deployment/core/base_config.py create mode 100644 autoware_ml/deployment/core/base_data_loader.py create mode 100644 autoware_ml/deployment/core/base_evaluator.py create mode 100644 autoware_ml/deployment/core/verification.py create mode 100644 autoware_ml/deployment/exporters/__init__.py create mode 100644 autoware_ml/deployment/exporters/base_exporter.py create mode 100644 autoware_ml/deployment/exporters/onnx_exporter.py create mode 100644 autoware_ml/deployment/exporters/tensorrt_exporter.py delete mode 100644 projects/CalibrationStatusClassification/deploy/config.py create mode 100644 projects/CalibrationStatusClassification/deploy/data_loader.py delete mode 100644 projects/CalibrationStatusClassification/deploy/exporters.py diff --git a/autoware_ml/deployment/__init__.py b/autoware_ml/deployment/__init__.py new file mode 100644 index 00000000..01d77ae9 --- /dev/null +++ b/autoware_ml/deployment/__init__.py @@ -0,0 +1,20 @@ +""" +Autoware ML Unified Deployment Framework + +This package provides a unified, task-agnostic deployment framework for +exporting, verifying, and evaluating machine learning models across different +tasks (classification, detection, segmentation, etc.) and backends (ONNX, +TensorRT, TorchScript, etc.). +""" + +from .core.base_config import BaseDeploymentConfig +from .core.base_data_loader import BaseDataLoader +from .core.base_evaluator import BaseEvaluator + +__all__ = [ + "BaseDeploymentConfig", + "BaseDataLoader", + "BaseEvaluator", +] + +__version__ = "1.0.0" diff --git a/autoware_ml/deployment/backends/__init__.py b/autoware_ml/deployment/backends/__init__.py new file mode 100644 index 00000000..59e790d7 --- /dev/null +++ b/autoware_ml/deployment/backends/__init__.py @@ -0,0 +1,13 @@ +"""Inference backends for different model formats.""" + +from .base_backend import BaseBackend +from .onnx_backend import ONNXBackend +from .pytorch_backend import PyTorchBackend +from .tensorrt_backend import TensorRTBackend + +__all__ = [ + "BaseBackend", + "PyTorchBackend", + "ONNXBackend", + "TensorRTBackend", +] diff --git a/autoware_ml/deployment/backends/base_backend.py b/autoware_ml/deployment/backends/base_backend.py new file mode 100644 index 00000000..b57669c3 --- /dev/null +++ b/autoware_ml/deployment/backends/base_backend.py @@ -0,0 +1,85 @@ +""" +Abstract base class for inference backends. + +Provides a unified interface for running inference across different +backend formats (PyTorch, ONNX, TensorRT, etc.). +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, Tuple + +import numpy as np +import torch + + +class BaseBackend(ABC): + """ + Abstract base class for inference backends. + + This class defines a unified interface for running inference + across different model formats and runtime engines. + """ + + def __init__(self, model_path: str, device: str = "cpu"): + """ + Initialize backend. + + Args: + model_path: Path to model file + device: Device to run inference on ('cpu', 'cuda', 'cuda:0', etc.) + """ + self.model_path = model_path + self.device = device + self._model = None + + @abstractmethod + def load_model(self) -> None: + """ + Load model from file. + + Raises: + FileNotFoundError: If model file doesn't exist + RuntimeError: If model loading fails + """ + pass + + @abstractmethod + def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]: + """ + Run inference on input tensor. + + Args: + input_tensor: Input tensor for inference + + Returns: + Tuple of (output_array, latency_ms): + - output_array: Model output as numpy array + - latency_ms: Inference time in milliseconds + + Raises: + RuntimeError: If inference fails + ValueError: If input format is invalid + """ + pass + + def __enter__(self): + """Context manager entry.""" + self.load_model() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + self.cleanup() + + def cleanup(self) -> None: + """ + Clean up resources. + + Override this method in subclasses to release backend-specific resources. + """ + pass + + @property + def is_loaded(self) -> bool: + """Check if model is loaded.""" + return self._model is not None diff --git a/autoware_ml/deployment/backends/onnx_backend.py b/autoware_ml/deployment/backends/onnx_backend.py new file mode 100644 index 00000000..3fa51d82 --- /dev/null +++ b/autoware_ml/deployment/backends/onnx_backend.py @@ -0,0 +1,80 @@ +"""ONNX Runtime inference backend.""" + +import os +import time +from typing import Tuple + +import numpy as np +import onnxruntime as ort +import torch + +from .base_backend import BaseBackend + + +class ONNXBackend(BaseBackend): + """ + ONNX Runtime inference backend. + + Runs inference using ONNX Runtime on CPU or CUDA. + """ + + def __init__(self, model_path: str, device: str = "cpu"): + """ + Initialize ONNX backend. + + Args: + model_path: Path to ONNX model file + device: Device to run inference on ('cpu' or 'cuda') + """ + super().__init__(model_path, device) + self._session = None + + def load_model(self) -> None: + """Load ONNX model.""" + if not os.path.exists(self.model_path): + raise FileNotFoundError(f"ONNX model not found: {self.model_path}") + + # Select execution provider based on device + if self.device.startswith("cuda"): + providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] + else: + providers = ["CPUExecutionProvider"] + + try: + self._session = ort.InferenceSession(self.model_path, providers=providers) + self._model = self._session # For is_loaded check + except Exception as e: + raise RuntimeError(f"Failed to load ONNX model: {e}") + + def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]: + """ + Run inference on input tensor. + + Args: + input_tensor: Input tensor for inference + + Returns: + Tuple of (output_array, latency_ms) + """ + if not self.is_loaded: + raise RuntimeError("Model not loaded. Call load_model() first.") + + input_array = input_tensor.cpu().numpy() + + # Prepare input dictionary + input_name = self._session.get_inputs()[0].name + onnx_input = {input_name: input_array} + + # Run inference + start_time = time.perf_counter() + output = self._session.run(None, onnx_input)[0] + end_time = time.perf_counter() + + latency_ms = (end_time - start_time) * 1000 + + return output, latency_ms + + def cleanup(self) -> None: + """Clean up ONNX Runtime resources.""" + self._session = None + self._model = None diff --git a/autoware_ml/deployment/backends/pytorch_backend.py b/autoware_ml/deployment/backends/pytorch_backend.py new file mode 100644 index 00000000..83d9f703 --- /dev/null +++ b/autoware_ml/deployment/backends/pytorch_backend.py @@ -0,0 +1,79 @@ +"""PyTorch inference backend.""" + +import time +from typing import Tuple + +import numpy as np +import torch + +from .base_backend import BaseBackend + + +class PyTorchBackend(BaseBackend): + """ + PyTorch inference backend. + + Runs inference using native PyTorch models. + """ + + def __init__(self, model: torch.nn.Module, device: str = "cpu"): + """ + Initialize PyTorch backend. + + Args: + model: PyTorch model instance (already loaded) + device: Device to run inference on + """ + super().__init__(model_path="", device=device) + self._model = model + self._torch_device = torch.device(device) + self._model.to(self._torch_device) + self._model.eval() + + def load_model(self) -> None: + """Model is already loaded in __init__.""" + if self._model is None: + raise RuntimeError("Model was not provided during initialization") + + def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]: + """ + Run inference on input tensor. + + Args: + input_tensor: Input tensor for inference + + Returns: + Tuple of (output_array, latency_ms) + """ + if not self.is_loaded: + raise RuntimeError("Model not loaded. Call load_model() first.") + + # Move input to correct device + input_tensor = input_tensor.to(self._torch_device) + + # Run inference with timing + with torch.no_grad(): + start_time = time.perf_counter() + output = self._model(input_tensor) + end_time = time.perf_counter() + + latency_ms = (end_time - start_time) * 1000 + + # Handle different output formats + if hasattr(output, "output"): + output = output.output + elif isinstance(output, dict) and "output" in output: + output = output["output"] + + if not isinstance(output, torch.Tensor): + raise ValueError(f"Unexpected PyTorch output type: {type(output)}") + + # Convert to numpy + output_array = output.cpu().numpy() + + return output_array, latency_ms + + def cleanup(self) -> None: + """Clean up PyTorch resources.""" + if torch.cuda.is_available(): + torch.cuda.empty_cache() diff --git a/autoware_ml/deployment/backends/tensorrt_backend.py b/autoware_ml/deployment/backends/tensorrt_backend.py new file mode 100644 index 00000000..366ac2a1 --- /dev/null +++ b/autoware_ml/deployment/backends/tensorrt_backend.py @@ -0,0 +1,135 @@ +"""TensorRT inference backend.""" + +import os +import time +from typing import Tuple + +import numpy as np +import pycuda.autoinit # noqa: F401 +import pycuda.driver as cuda +import tensorrt as trt +import torch + +from .base_backend import BaseBackend + + +class TensorRTBackend(BaseBackend): + """ + TensorRT inference backend. + + Runs inference using NVIDIA TensorRT engine. + """ + + def __init__(self, model_path: str, device: str = "cuda:0"): + """ + Initialize TensorRT backend. + + Args: + model_path: Path to TensorRT engine file + device: CUDA device to use (ignored, TensorRT uses current CUDA context) + """ + super().__init__(model_path, device) + self._engine = None + self._context = None + self._logger = trt.Logger(trt.Logger.WARNING) + + def load_model(self) -> None: + """Load TensorRT engine.""" + if not os.path.exists(self.model_path): + raise FileNotFoundError(f"TensorRT engine not found: {self.model_path}") + + # Initialize TensorRT + trt.init_libnvinfer_plugins(self._logger, "") + runtime = trt.Runtime(self._logger) + + # Load engine + try: + with open(self.model_path, "rb") as f: + self._engine = runtime.deserialize_cuda_engine(f.read()) + + if self._engine is None: + raise RuntimeError("Failed to deserialize TensorRT engine") + + self._context = self._engine.create_execution_context() + self._model = self._engine # For is_loaded check + except Exception as e: + raise RuntimeError(f"Failed to load TensorRT engine: {e}") + + def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]: + """ + Run inference on input tensor. + + Args: + input_tensor: Input tensor for inference + + Returns: + Tuple of (output_array, latency_ms) + """ + if not self.is_loaded: + raise RuntimeError("Model not loaded. Call load_model() first.") + + # Convert to numpy and ensure correct format + input_array = input_tensor.cpu().numpy().astype(np.float32) + if not input_array.flags["C_CONTIGUOUS"]: + input_array = np.ascontiguousarray(input_array) + + # Get tensor names + input_name = None + output_name = None + for i in range(self._engine.num_io_tensors): + tensor_name = self._engine.get_tensor_name(i) + if self._engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT: + input_name = tensor_name + elif self._engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT: + output_name = tensor_name + + if input_name is None or output_name is None: + raise RuntimeError("Could not find input/output tensor names") + + # Set input shape and get output shape + self._context.set_input_shape(input_name, input_array.shape) + output_shape = self._context.get_tensor_shape(output_name) + output_array = np.empty(output_shape, dtype=np.float32) + if not output_array.flags["C_CONTIGUOUS"]: + output_array = np.ascontiguousarray(output_array) + + # Allocate GPU memory + d_input = cuda.mem_alloc(input_array.nbytes) + d_output = cuda.mem_alloc(output_array.nbytes) + + # Create CUDA stream and events for timing + stream = cuda.Stream() + start = cuda.Event() + end = cuda.Event() + + try: + # Set tensor addresses + self._context.set_tensor_address(input_name, int(d_input)) + self._context.set_tensor_address(output_name, int(d_output)) + + # Run inference with timing + cuda.memcpy_htod_async(d_input, input_array, stream) + start.record(stream) + self._context.execute_async_v3(stream_handle=stream.handle) + end.record(stream) + cuda.memcpy_dtoh_async(output_array, d_output, stream) + stream.synchronize() + + latency_ms = end.time_since(start) + + return output_array, latency_ms + + finally: + # Cleanup GPU memory + try: + d_input.free() + d_output.free() + except Exception: + # Silently ignore cleanup errors + pass + + def cleanup(self) -> None: + """Clean up TensorRT resources.""" + self._context = None + self._engine = None + self._model = None diff --git a/autoware_ml/deployment/cli/__init__.py b/autoware_ml/deployment/cli/__init__.py new file mode 100644 index 00000000..3036280e --- /dev/null +++ b/autoware_ml/deployment/cli/__init__.py @@ -0,0 +1,3 @@ +"""Command-line interface for deployment.""" + +__all__ = [] diff --git a/autoware_ml/deployment/core/__init__.py b/autoware_ml/deployment/core/__init__.py new file mode 100644 index 00000000..f5969ce4 --- /dev/null +++ b/autoware_ml/deployment/core/__init__.py @@ -0,0 +1,25 @@ +"""Core components for deployment framework.""" + +from .base_config import ( + BackendConfig, + BaseDeploymentConfig, + ExportConfig, + RuntimeConfig, + parse_base_args, + setup_logging, +) +from .base_data_loader import BaseDataLoader +from .base_evaluator import BaseEvaluator +from .verification import verify_model_outputs + +__all__ = [ + "BaseDeploymentConfig", + "ExportConfig", + "RuntimeConfig", + "BackendConfig", + "setup_logging", + "parse_base_args", + "BaseDataLoader", + "BaseEvaluator", + "verify_model_outputs", +] diff --git a/autoware_ml/deployment/core/base_config.py b/autoware_ml/deployment/core/base_config.py new file mode 100644 index 00000000..9c88a1be --- /dev/null +++ b/autoware_ml/deployment/core/base_config.py @@ -0,0 +1,213 @@ +""" +Base configuration classes for deployment framework. + +This module provides the foundation for task-agnostic deployment configuration. +Task-specific deployment configs should extend BaseDeploymentConfig. +""" + +import argparse +import logging +from typing import Any, Dict, List, Optional + +from mmengine.config import Config + +# Constants +DEFAULT_VERIFICATION_TOLERANCE = 1e-3 +DEFAULT_WORKSPACE_SIZE = 1 << 30 # 1 GB + +# Precision policy mapping for TensorRT +PRECISION_POLICIES = { + "auto": {}, # No special flags, TensorRT decides + "fp16": {"FP16": True}, + "fp32_tf32": {"TF32": True}, # TF32 for FP32 operations + "explicit_int8": {"INT8": True}, + "strongly_typed": {"STRONGLY_TYPED": True}, # Network creation flag +} + + +class ExportConfig: + """Configuration for model export settings.""" + + def __init__(self, config_dict: Dict[str, Any]): + self.mode = config_dict.get("mode", "both") + self.verify = config_dict.get("verify", False) + self.device = config_dict.get("device", "cuda:0") + self.work_dir = config_dict.get("work_dir", "work_dirs") + + def should_export_onnx(self) -> bool: + """Check if ONNX export is requested.""" + return self.mode in ["onnx", "both"] + + def should_export_tensorrt(self) -> bool: + """Check if TensorRT export is requested.""" + return self.mode in ["trt", "both"] + + +class RuntimeConfig: + """Configuration for runtime I/O settings.""" + + def __init__(self, config_dict: Dict[str, Any]): + self._config = config_dict + + def get(self, key: str, default: Any = None) -> Any: + """Get a runtime configuration value.""" + return self._config.get(key, default) + + def __getitem__(self, key: str) -> Any: + """Dictionary-style access to runtime config.""" + return self._config[key] + + +class BackendConfig: + """Configuration for backend-specific settings.""" + + def __init__(self, config_dict: Dict[str, Any]): + self.common_config = config_dict.get("common_config", {}) + self.model_inputs = config_dict.get("model_inputs", []) + + def get_precision_policy(self) -> str: + """Get precision policy name.""" + return self.common_config.get("precision_policy", "auto") + + def get_precision_flags(self) -> Dict[str, bool]: + """Get TensorRT precision flags for the configured policy.""" + policy = self.get_precision_policy() + return PRECISION_POLICIES.get(policy, {}) + + def get_max_workspace_size(self) -> int: + """Get maximum workspace size for TensorRT.""" + return self.common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE) + + +class BaseDeploymentConfig: + """ + Base configuration container for deployment settings. + + This class provides a task-agnostic interface for deployment configuration. + Task-specific configs should extend this class and add task-specific settings. + """ + + def __init__(self, deploy_cfg: Config): + """ + Initialize deployment configuration. + + Args: + deploy_cfg: MMEngine Config object containing deployment settings + """ + self.deploy_cfg = deploy_cfg + self._validate_config() + + # Initialize config sections + self.export_config = ExportConfig(deploy_cfg.get("export", {})) + self.runtime_config = RuntimeConfig(deploy_cfg.get("runtime_io", {})) + self.backend_config = BackendConfig(deploy_cfg.get("backend_config", {})) + + def _validate_config(self) -> None: + """Validate configuration structure and required fields.""" + # Validate required sections + if "export" not in self.deploy_cfg: + raise ValueError( + "Missing 'export' section in deploy config. " "Please update your config to include 'export' section." + ) + + # Validate export mode + valid_modes = ["onnx", "trt", "both", "none"] + mode = self.deploy_cfg.get("export", {}).get("mode", "both") + if mode not in valid_modes: + raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}") + + # Validate precision policy if present + backend_cfg = self.deploy_cfg.get("backend_config", {}) + common_cfg = backend_cfg.get("common_config", {}) + precision_policy = common_cfg.get("precision_policy", "auto") + if precision_policy not in PRECISION_POLICIES: + raise ValueError( + f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}" + ) + + @property + def evaluation_config(self) -> Dict: + """Get evaluation configuration.""" + return self.deploy_cfg.get("evaluation", {}) + + @property + def onnx_config(self) -> Dict: + """Get ONNX configuration.""" + return self.deploy_cfg.get("onnx_config", {}) + + def get_onnx_settings(self) -> Dict[str, Any]: + """ + Get ONNX export settings. + + Returns: + Dictionary containing ONNX export parameters + """ + onnx_config = self.onnx_config + return { + "opset_version": onnx_config.get("opset_version", 16), + "do_constant_folding": onnx_config.get("do_constant_folding", True), + "input_names": onnx_config.get("input_names", ["input"]), + "output_names": onnx_config.get("output_names", ["output"]), + "dynamic_axes": onnx_config.get("dynamic_axes"), + "export_params": onnx_config.get("export_params", True), + "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False), + "save_file": onnx_config.get("save_file", "model.onnx"), + } + + def get_tensorrt_settings(self) -> Dict[str, Any]: + """ + Get TensorRT export settings with precision policy support. + + Returns: + Dictionary containing TensorRT export parameters + """ + return { + "max_workspace_size": self.backend_config.get_max_workspace_size(), + "precision_policy": self.backend_config.get_precision_policy(), + "policy_flags": self.backend_config.get_precision_flags(), + "model_inputs": self.backend_config.model_inputs, + } + + +def setup_logging(level: str = "INFO") -> logging.Logger: + """ + Setup logging configuration. + + Args: + level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + + Returns: + Configured logger instance + """ + logging.basicConfig(level=getattr(logging, level), format="%(levelname)s:%(name)s:%(message)s") + return logging.getLogger("deployment") + + +def parse_base_args(parser: Optional[argparse.ArgumentParser] = None) -> argparse.ArgumentParser: + """ + Create argument parser with common deployment arguments. + + Args: + parser: Optional existing ArgumentParser to add arguments to + + Returns: + ArgumentParser with deployment arguments + """ + if parser is None: + parser = argparse.ArgumentParser( + description="Deploy model to ONNX/TensorRT", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument("deploy_cfg", help="Deploy config path") + parser.add_argument("model_cfg", help="Model config path") + parser.add_argument( + "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')" + ) + + # Optional overrides + parser.add_argument("--work-dir", help="Override output directory from config") + parser.add_argument("--device", help="Override device from config") + parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level") + + return parser diff --git a/autoware_ml/deployment/core/base_data_loader.py b/autoware_ml/deployment/core/base_data_loader.py new file mode 100644 index 00000000..b0ee3313 --- /dev/null +++ b/autoware_ml/deployment/core/base_data_loader.py @@ -0,0 +1,118 @@ +""" +Abstract base class for data loading in deployment. + +Each task (classification, detection, segmentation, etc.) must implement +a concrete DataLoader that extends this base class. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict + +import torch + + +class BaseDataLoader(ABC): + """ + Abstract base class for task-specific data loaders. + + This class defines the interface that all task-specific data loaders + must implement. It handles loading raw data from disk and preprocessing + it into a format suitable for model inference. + """ + + def __init__(self, config: Dict[str, Any]): + """ + Initialize data loader. + + Args: + config: Configuration dictionary containing task-specific settings + """ + self.config = config + + @abstractmethod + def load_sample(self, index: int) -> Dict[str, Any]: + """ + Load a single sample from the dataset. + + Args: + index: Sample index to load + + Returns: + Dictionary containing raw sample data. Structure is task-specific, + but should typically include: + - Raw input data (image, point cloud, etc.) + - Ground truth labels/annotations (if available) + - Any metadata needed for evaluation + + Raises: + IndexError: If index is out of range + FileNotFoundError: If sample data files don't exist + """ + pass + + @abstractmethod + def preprocess(self, sample: Dict[str, Any]) -> torch.Tensor: + """ + Preprocess raw sample data into model input format. + + Args: + sample: Raw sample data returned by load_sample() + + Returns: + Preprocessed tensor ready for model inference. + Shape and format depend on the specific task. + + Raises: + ValueError: If sample format is invalid + """ + pass + + @abstractmethod + def get_num_samples(self) -> int: + """ + Get total number of samples in the dataset. + + Returns: + Total number of samples available + """ + pass + + def load_and_preprocess(self, index: int) -> torch.Tensor: + """ + Convenience method to load and preprocess a sample in one call. + + Args: + index: Sample index to load + + Returns: + Preprocessed tensor ready for inference + """ + sample = self.load_sample(index) + return self.preprocess(sample) + + def get_batch(self, indices: list) -> torch.Tensor: + """ + Load and preprocess multiple samples into a batch. + + Args: + indices: List of sample indices to load + + Returns: + Batched tensor with shape [batch_size, ...] + """ + tensors = [self.load_and_preprocess(idx) for idx in indices] + return torch.stack(tensors) + + def validate_sample(self, sample: Dict[str, Any]) -> bool: + """ + Validate that a sample has the expected structure. + + Override this method in task-specific loaders to add validation. + + Args: + sample: Sample to validate + + Returns: + True if valid, False otherwise + """ + return True diff --git a/autoware_ml/deployment/core/base_evaluator.py b/autoware_ml/deployment/core/base_evaluator.py new file mode 100644 index 00000000..9d9aa1ae --- /dev/null +++ b/autoware_ml/deployment/core/base_evaluator.py @@ -0,0 +1,138 @@ +""" +Abstract base class for model evaluation in deployment. + +Each task (classification, detection, segmentation, etc.) must implement +a concrete Evaluator that extends this base class to compute task-specific metrics. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict + +import numpy as np + +from .base_data_loader import BaseDataLoader + + +class BaseEvaluator(ABC): + """ + Abstract base class for task-specific evaluators. + + This class defines the interface that all task-specific evaluators + must implement. It handles running inference on a dataset and computing + evaluation metrics appropriate for the task. + """ + + def __init__(self, config: Dict[str, Any]): + """ + Initialize evaluator. + + Args: + config: Configuration dictionary containing evaluation settings + """ + self.config = config + + @abstractmethod + def evaluate( + self, + model_path: str, + data_loader: BaseDataLoader, + num_samples: int, + backend: str = "pytorch", + device: str = "cpu", + verbose: bool = False, + ) -> Dict[str, Any]: + """ + Run full evaluation on a model. + + Args: + model_path: Path to model checkpoint/weights + data_loader: DataLoader for loading samples + num_samples: Number of samples to evaluate + backend: Backend to use ('pytorch', 'onnx', 'tensorrt') + device: Device to run inference on + verbose: Whether to print detailed progress + + Returns: + Dictionary containing evaluation metrics. The exact metrics + depend on the task, but should include: + - Primary metric(s) for the task + - Per-class metrics (if applicable) + - Inference latency statistics + - Any other relevant metrics + + Example: + For classification: + { + "accuracy": 0.95, + "precision": 0.94, + "recall": 0.96, + "per_class_accuracy": {...}, + "confusion_matrix": [...], + "avg_latency_ms": 5.2, + } + + For detection: + { + "mAP": 0.72, + "mAP_50": 0.85, + "mAP_75": 0.68, + "per_class_ap": {...}, + "avg_latency_ms": 15.3, + } + """ + pass + + @abstractmethod + def print_results(self, results: Dict[str, Any]) -> None: + """ + Pretty print evaluation results. + + Args: + results: Results dictionary returned by evaluate() + """ + pass + + def compute_latency_stats(self, latencies: list) -> Dict[str, float]: + """ + Compute latency statistics from a list of latency measurements. + + Args: + latencies: List of latency values in milliseconds + + Returns: + Dictionary with latency statistics + """ + if not latencies: + return { + "mean_ms": 0.0, + "std_ms": 0.0, + "min_ms": 0.0, + "max_ms": 0.0, + "median_ms": 0.0, + } + + latencies_array = np.array(latencies) + + return { + "mean_ms": float(np.mean(latencies_array)), + "std_ms": float(np.std(latencies_array)), + "min_ms": float(np.min(latencies_array)), + "max_ms": float(np.max(latencies_array)), + "median_ms": float(np.median(latencies_array)), + } + + def format_latency_stats(self, stats: Dict[str, float]) -> str: + """ + Format latency statistics as a readable string. + + Args: + stats: Latency statistics dictionary + + Returns: + Formatted string + """ + return ( + f"Latency: {stats['mean_ms']:.2f} ± {stats['std_ms']:.2f} ms " + f"(min: {stats['min_ms']:.2f}, max: {stats['max_ms']:.2f}, " + f"median: {stats['median_ms']:.2f})" + ) diff --git a/autoware_ml/deployment/core/verification.py b/autoware_ml/deployment/core/verification.py new file mode 100644 index 00000000..c1cfe87c --- /dev/null +++ b/autoware_ml/deployment/core/verification.py @@ -0,0 +1,178 @@ +""" +Unified model verification module. + +Provides utilities for verifying exported models against reference PyTorch outputs. +""" + +import logging +from typing import Dict, List, Optional + +import numpy as np +import torch + +from ..backends import BaseBackend, ONNXBackend, PyTorchBackend, TensorRTBackend + +DEFAULT_TOLERANCE = 1e-3 + + +def verify_model_outputs( + pytorch_model: torch.nn.Module, + test_inputs: Dict[str, torch.Tensor], + onnx_path: Optional[str] = None, + tensorrt_path: Optional[str] = None, + device: str = "cpu", + tolerance: float = DEFAULT_TOLERANCE, + logger: logging.Logger = None, +) -> Dict[str, bool]: + """ + Verify exported models against PyTorch reference. + + Args: + pytorch_model: Reference PyTorch model + test_inputs: Dictionary of test inputs (e.g., {'sample1': tensor1, ...}) + onnx_path: Optional path to ONNX model + tensorrt_path: Optional path to TensorRT engine + device: Device for PyTorch inference + tolerance: Maximum allowed difference + logger: Optional logger instance + + Returns: + Dictionary with verification results for each backend + """ + if logger is None: + logger = logging.getLogger(__name__) + + results = {} + + # Run PyTorch inference to get reference outputs + logger.info("=" * 60) + logger.info("Running verification...") + logger.info("=" * 60) + + pytorch_backend = PyTorchBackend(pytorch_model, device=device) + pytorch_backend.load_model() + + # Verify each backend + for sample_name, input_tensor in test_inputs.items(): + logger.info(f"\n{'='*60}") + logger.info(f"Verifying sample: {sample_name}") + logger.info(f"{'='*60}") + + # Get PyTorch reference + logger.info("Running PyTorch inference...") + pytorch_output, pytorch_latency = pytorch_backend.infer(input_tensor) + logger.info(f" PyTorch latency: {pytorch_latency:.2f} ms") + logger.info(f" PyTorch output: {pytorch_output}") + + # Verify ONNX + if onnx_path: + logger.info("\nVerifying ONNX model...") + onnx_success = _verify_backend( + ONNXBackend(onnx_path, device="cpu"), + input_tensor, + pytorch_output, + tolerance, + "ONNX", + logger, + ) + results[f"{sample_name}_onnx"] = onnx_success + + # Verify TensorRT + if tensorrt_path: + logger.info("\nVerifying TensorRT model...") + trt_success = _verify_backend( + TensorRTBackend(tensorrt_path, device="cuda"), + input_tensor, + pytorch_output, + tolerance, + "TensorRT", + logger, + ) + results[f"{sample_name}_tensorrt"] = trt_success + + logger.info(f"\n{'='*60}") + logger.info("Verification Summary") + logger.info(f"{'='*60}") + for key, success in results.items(): + status = "✓ PASSED" if success else "✗ FAILED" + logger.info(f" {key}: {status}") + logger.info(f"{'='*60}") + + return results + + +def _verify_backend( + backend: BaseBackend, + input_tensor: torch.Tensor, + reference_output: np.ndarray, + tolerance: float, + backend_name: str, + logger: logging.Logger, +) -> bool: + """ + Verify a single backend against reference output. + + Args: + backend: Backend instance to verify + input_tensor: Input tensor + reference_output: Reference output from PyTorch + tolerance: Maximum allowed difference + backend_name: Name of backend for logging + logger: Logger instance + + Returns: + True if verification passed + """ + try: + with backend: + output, latency = backend.infer(input_tensor) + + logger.info(f" {backend_name} latency: {latency:.2f} ms") + logger.info(f" {backend_name} output: {output}") + + # Compare outputs + max_diff = np.abs(reference_output - output).max() + mean_diff = np.abs(reference_output - output).mean() + + logger.info(f" Max difference: {max_diff:.6f}") + logger.info(f" Mean difference: {mean_diff:.6f}") + + if max_diff < tolerance: + logger.info(f" {backend_name} verification PASSED ✓") + return True + else: + logger.warning( + f" {backend_name} verification FAILED ✗ " f"(max diff: {max_diff:.6f} > tolerance: {tolerance:.6f})" + ) + return False + + except Exception as e: + logger.error(f" {backend_name} verification failed with error: {e}") + return False + + +def compare_outputs( + output1: np.ndarray, + output2: np.ndarray, + tolerance: float = DEFAULT_TOLERANCE, +) -> Dict[str, float]: + """ + Compare two model outputs and return difference statistics. + + Args: + output1: First output array + output2: Second output array + tolerance: Tolerance for comparison + + Returns: + Dictionary with comparison statistics + """ + diff = np.abs(output1 - output2) + + return { + "max_diff": float(np.max(diff)), + "mean_diff": float(np.mean(diff)), + "median_diff": float(np.median(diff)), + "std_diff": float(np.std(diff)), + "passed": float(np.max(diff)) < tolerance, + } diff --git a/autoware_ml/deployment/exporters/__init__.py b/autoware_ml/deployment/exporters/__init__.py new file mode 100644 index 00000000..e9e1bbc9 --- /dev/null +++ b/autoware_ml/deployment/exporters/__init__.py @@ -0,0 +1,11 @@ +"""Model exporters for different backends.""" + +from .base_exporter import BaseExporter +from .onnx_exporter import ONNXExporter +from .tensorrt_exporter import TensorRTExporter + +__all__ = [ + "BaseExporter", + "ONNXExporter", + "TensorRTExporter", +] diff --git a/autoware_ml/deployment/exporters/base_exporter.py b/autoware_ml/deployment/exporters/base_exporter.py new file mode 100644 index 00000000..73a89026 --- /dev/null +++ b/autoware_ml/deployment/exporters/base_exporter.py @@ -0,0 +1,67 @@ +""" +Abstract base class for model exporters. + +Provides a unified interface for exporting models to different formats. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict + +import torch + + +class BaseExporter(ABC): + """ + Abstract base class for model exporters. + + This class defines a unified interface for exporting models + to different backend formats (ONNX, TensorRT, TorchScript, etc.). + """ + + def __init__(self, config: Dict[str, Any]): + """ + Initialize exporter. + + Args: + config: Configuration dictionary for export settings + """ + self.config = config + + @abstractmethod + def export( + self, + model: torch.nn.Module, + sample_input: torch.Tensor, + output_path: str, + ) -> bool: + """ + Export model to target format. + + Args: + model: PyTorch model to export + sample_input: Sample input tensor for tracing/shape inference + output_path: Path to save exported model + + Returns: + True if export succeeded, False otherwise + + Raises: + RuntimeError: If export fails + """ + pass + + def validate_export(self, output_path: str) -> bool: + """ + Validate that the exported model file is valid. + + Override this in subclasses to add format-specific validation. + + Args: + output_path: Path to exported model file + + Returns: + True if valid, False otherwise + """ + import os + + return os.path.exists(output_path) and os.path.getsize(output_path) > 0 diff --git a/autoware_ml/deployment/exporters/onnx_exporter.py b/autoware_ml/deployment/exporters/onnx_exporter.py new file mode 100644 index 00000000..67660c2e --- /dev/null +++ b/autoware_ml/deployment/exporters/onnx_exporter.py @@ -0,0 +1,121 @@ +"""ONNX model exporter.""" + +import logging +from typing import Any, Dict + +import onnx +import onnxsim +import torch + +from .base_exporter import BaseExporter + + +class ONNXExporter(BaseExporter): + """ + ONNX model exporter. + + Exports PyTorch models to ONNX format with optional simplification. + """ + + def __init__(self, config: Dict[str, Any], logger: logging.Logger = None): + """ + Initialize ONNX exporter. + + Args: + config: ONNX export configuration + logger: Optional logger instance + """ + super().__init__(config) + self.logger = logger or logging.getLogger(__name__) + + def export( + self, + model: torch.nn.Module, + sample_input: torch.Tensor, + output_path: str, + ) -> bool: + """ + Export model to ONNX format. + + Args: + model: PyTorch model to export + sample_input: Sample input tensor + output_path: Path to save ONNX model + + Returns: + True if export succeeded + """ + model.eval() + + self.logger.info("Exporting model to ONNX format...") + self.logger.info(f" Input shape: {sample_input.shape}") + self.logger.info(f" Output path: {output_path}") + self.logger.info(f" Opset version: {self.config.get('opset_version', 16)}") + + try: + with torch.no_grad(): + torch.onnx.export( + model, + sample_input, + output_path, + export_params=self.config.get("export_params", True), + keep_initializers_as_inputs=self.config.get("keep_initializers_as_inputs", False), + opset_version=self.config.get("opset_version", 16), + do_constant_folding=self.config.get("do_constant_folding", True), + input_names=self.config.get("input_names", ["input"]), + output_names=self.config.get("output_names", ["output"]), + dynamic_axes=self.config.get("dynamic_axes"), + verbose=False, + ) + + self.logger.info(f"ONNX export completed: {output_path}") + + # Optional model simplification + if self.config.get("simplify", True): + self._simplify_model(output_path) + + return True + + except Exception as e: + self.logger.error(f"ONNX export failed: {e}") + return False + + def _simplify_model(self, onnx_path: str) -> None: + """ + Simplify ONNX model using onnxsim. + + Args: + onnx_path: Path to ONNX model file + """ + self.logger.info("Simplifying ONNX model...") + try: + model_simplified, success = onnxsim.simplify(onnx_path) + if success: + onnx.save(model_simplified, onnx_path) + self.logger.info(f"ONNX model simplified successfully") + else: + self.logger.warning("ONNX model simplification failed") + except Exception as e: + self.logger.warning(f"ONNX simplification error: {e}") + + def validate_export(self, output_path: str) -> bool: + """ + Validate ONNX model. + + Args: + output_path: Path to ONNX model file + + Returns: + True if valid + """ + if not super().validate_export(output_path): + return False + + try: + model = onnx.load(output_path) + onnx.checker.check_model(model) + self.logger.info("ONNX model validation passed") + return True + except Exception as e: + self.logger.error(f"ONNX model validation failed: {e}") + return False diff --git a/autoware_ml/deployment/exporters/tensorrt_exporter.py b/autoware_ml/deployment/exporters/tensorrt_exporter.py new file mode 100644 index 00000000..fc0c4d04 --- /dev/null +++ b/autoware_ml/deployment/exporters/tensorrt_exporter.py @@ -0,0 +1,157 @@ +"""TensorRT model exporter.""" + +import logging +from typing import Any, Dict + +import tensorrt as trt +import torch + +from .base_exporter import BaseExporter + + +class TensorRTExporter(BaseExporter): + """ + TensorRT model exporter. + + Converts ONNX models to TensorRT engine format with precision policy support. + """ + + def __init__(self, config: Dict[str, Any], logger: logging.Logger = None): + """ + Initialize TensorRT exporter. + + Args: + config: TensorRT export configuration + logger: Optional logger instance + """ + super().__init__(config) + self.logger = logger or logging.getLogger(__name__) + + def export( + self, + model: torch.nn.Module, # Not used for TensorRT, kept for interface compatibility + sample_input: torch.Tensor, + output_path: str, + onnx_path: str = None, + ) -> bool: + """ + Export ONNX model to TensorRT engine. + + Args: + model: Not used (TensorRT converts from ONNX) + sample_input: Sample input for shape configuration + output_path: Path to save TensorRT engine + onnx_path: Path to source ONNX model + + Returns: + True if export succeeded + """ + if onnx_path is None: + self.logger.error("onnx_path is required for TensorRT export") + return False + + precision_policy = self.config.get("precision_policy", "auto") + policy_flags = self.config.get("policy_flags", {}) + + self.logger.info(f"Building TensorRT engine with precision policy: {precision_policy}") + self.logger.info(f" ONNX source: {onnx_path}") + self.logger.info(f" Engine output: {output_path}") + + # Initialize TensorRT + trt_logger = trt.Logger(trt.Logger.WARNING) + trt.init_libnvinfer_plugins(trt_logger, "") + + builder = trt.Builder(trt_logger) + builder_config = builder.create_builder_config() + + max_workspace_size = self.config.get("max_workspace_size", 1 << 30) + builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=max_workspace_size) + + # Create network with appropriate flags + flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + # Handle strongly typed flag (network creation flag) + if policy_flags.get("STRONGLY_TYPED"): + flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) + self.logger.info("Using strongly typed TensorRT network creation") + + network = builder.create_network(flags) + + # Apply precision flags to builder config + for flag_name, enabled in policy_flags.items(): + if flag_name == "STRONGLY_TYPED": + continue # Already handled + if enabled and hasattr(trt.BuilderFlag, flag_name): + builder_config.set_flag(getattr(trt.BuilderFlag, flag_name)) + self.logger.info(f"BuilderFlag.{flag_name} enabled") + + # Setup optimization profile + profile = builder.create_optimization_profile() + self._configure_input_shapes(profile, sample_input) + builder_config.add_optimization_profile(profile) + + # Parse ONNX model + parser = trt.OnnxParser(network, trt_logger) + + try: + with open(onnx_path, "rb") as f: + if not parser.parse(f.read()): + self._log_parser_errors(parser) + return False + self.logger.info("Successfully parsed ONNX file") + + # Build engine + self.logger.info("Building TensorRT engine (this may take a while)...") + serialized_engine = builder.build_serialized_network(network, builder_config) + + if serialized_engine is None: + self.logger.error("Failed to build TensorRT engine") + return False + + # Save engine + with open(output_path, "wb") as f: + f.write(serialized_engine) + + self.logger.info(f"TensorRT engine saved to {output_path}") + self.logger.info(f"Engine max workspace size: {max_workspace_size / (1024**3):.2f} GB") + + return True + + except Exception as e: + self.logger.error(f"TensorRT export failed: {e}") + return False + + def _configure_input_shapes( + self, + profile: trt.IOptimizationProfile, + sample_input: torch.Tensor, + ) -> None: + """ + Configure input shapes for TensorRT optimization profile. + + Args: + profile: TensorRT optimization profile + sample_input: Sample input tensor + """ + model_inputs = self.config.get("model_inputs", []) + + if model_inputs: + input_shapes = model_inputs[0].get("input_shapes", {}) + for input_name, shapes in input_shapes.items(): + min_shape = shapes.get("min_shape", list(sample_input.shape)) + opt_shape = shapes.get("opt_shape", list(sample_input.shape)) + max_shape = shapes.get("max_shape", list(sample_input.shape)) + + self.logger.info(f"Setting input shapes - min: {min_shape}, " f"opt: {opt_shape}, max: {max_shape}") + profile.set_shape(input_name, min_shape, opt_shape, max_shape) + else: + # Default shapes based on input tensor + input_shape = list(sample_input.shape) + self.logger.info(f"Using default input shape: {input_shape}") + profile.set_shape("input", input_shape, input_shape, input_shape) + + def _log_parser_errors(self, parser: trt.OnnxParser) -> None: + """Log TensorRT parser errors.""" + self.logger.error("Failed to parse ONNX model") + for error in range(parser.num_errors): + self.logger.error(f"Parser error: {parser.get_error(error)}") diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py index 0c4673d8..9aa85cb7 100644 --- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py +++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py @@ -8,7 +8,7 @@ # Export Configuration # ============================================================================== export = dict( - mode="both", # Export mode: "onnx", "trt", "both", or "none" + mode="none", # Export mode: "onnx", "trt", "both", or "none" # - "onnx": Export to ONNX only # - "trt": Convert to TensorRT only (requires onnx_file in runtime_io) # - "both": Export to ONNX then convert to TensorRT @@ -38,11 +38,12 @@ enabled=True, # Enable full model evaluation (set to True to run evaluation) num_samples=10, # Number of samples to evaluate from info.pkl verbose=True, # Enable verbose logging showing per-sample results - # Optional: Specify models to evaluate (if None, uses exported models from work_dir) - onnx_model="/workspace/work_dirs/end2end.onnx", # Path to ONNX model file to evaluate (e.g., "/path/to/model.onnx") - tensorrt_model="/workspace/work_dirs/end2end.engine", # Path to TensorRT engine file to evaluate (e.g., "/path/to/model.engine") - # Note: If models are None, will automatically detect and evaluate exported models - # Note: Command line args (--evaluate, --num-samples, --verbose) override these settings + # Specify models to evaluate + models=dict( + onnx="/workspace/work_dirs/end2end.onnx", # Path to ONNX model file + tensorrt="/workspace/work_dirs/end2end.engine", # Path to TensorRT engine file + # pytorch="/workspace/work_dirs/best_accuracy_top1_epoch_28.pth", # Optional: PyTorch checkpoint + ), ) # ============================================================================== diff --git a/projects/CalibrationStatusClassification/deploy/__init__.py b/projects/CalibrationStatusClassification/deploy/__init__.py index 38b7c326..d2c31034 100644 --- a/projects/CalibrationStatusClassification/deploy/__init__.py +++ b/projects/CalibrationStatusClassification/deploy/__init__.py @@ -1,21 +1,21 @@ """ -Calibration Status Classification Model Deployment Package +Calibration Status Classification Model Deployment Package (Refactored) This package provides utilities for exporting, verifying, and evaluating -CalibrationStatusClassification models in ONNX and TensorRT formats. +CalibrationStatusClassification models in ONNX and TensorRT formats using +the unified deployment framework. """ -from .config import DeploymentConfig, parse_args, setup_logging -from .evaluator import evaluate_exported_model, print_evaluation_results -from .exporters import export_to_onnx, export_to_tensorrt, run_verification +from .data_loader import CalibrationDataLoader +from .evaluator import ( + ClassificationEvaluator, + get_models_to_evaluate, + run_full_evaluation, +) __all__ = [ - "DeploymentConfig", - "parse_args", - "setup_logging", - "export_to_onnx", - "export_to_tensorrt", - "run_verification", - "evaluate_exported_model", - "print_evaluation_results", + "CalibrationDataLoader", + "ClassificationEvaluator", + "get_models_to_evaluate", + "run_full_evaluation", ] diff --git a/projects/CalibrationStatusClassification/deploy/config.py b/projects/CalibrationStatusClassification/deploy/config.py deleted file mode 100644 index e0071066..00000000 --- a/projects/CalibrationStatusClassification/deploy/config.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -Configuration classes and constants for deployment. -""" - -import argparse -import logging -import os -import os.path as osp -from typing import Dict - -from mmengine.config import Config - -# Constants -DEFAULT_VERIFICATION_TOLERANCE = 1e-3 -DEFAULT_WORKSPACE_SIZE = 1 << 30 # 1 GB -EXPECTED_CHANNELS = 5 # RGB + Depth + Intensity -LABELS = {"0": "miscalibrated", "1": "calibrated"} - -# Precision policy mapping -PRECISION_POLICIES = { - "auto": {}, # No special flags, TensorRT decides - "fp16": {"FP16": True}, - "fp32_tf32": {"TF32": True}, # TF32 for FP32 operations - "explicit_int8": {"INT8": True}, - "strongly_typed": {"STRONGLY_TYPED": True}, # Network creation flag -} - - -class DeploymentConfig: - """Configuration container for deployment settings.""" - - def __init__(self, deploy_cfg: Config): - self.deploy_cfg = deploy_cfg - self._validate_config() - - def _validate_config(self) -> None: - """Validate configuration structure and required fields.""" - if "export" not in self.deploy_cfg: - raise ValueError( - "Missing 'export' section in deploy config. " - "Please update your config to the new format with 'export', 'runtime_io' sections." - ) - - if "runtime_io" not in self.deploy_cfg: - raise ValueError("Missing 'runtime_io' section in deploy config.") - - # Validate export mode - valid_modes = ["onnx", "trt", "both", "none"] - mode = self.export_config.get("mode", "both") - if mode not in valid_modes: - raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}") - - # Validate precision policy if present - backend_cfg = self.deploy_cfg.get("backend_config", {}) - common_cfg = backend_cfg.get("common_config", {}) - precision_policy = common_cfg.get("precision_policy", "auto") - if precision_policy not in PRECISION_POLICIES: - raise ValueError( - f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}" - ) - - @property - def export_config(self) -> Dict: - """Get export configuration (mode, verify, device, work_dir).""" - return self.deploy_cfg.get("export", {}) - - @property - def runtime_io_config(self) -> Dict: - """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file).""" - return self.deploy_cfg.get("runtime_io", {}) - - @property - def evaluation_config(self) -> Dict: - """Get evaluation configuration (enabled, num_samples, verbose, model paths).""" - return self.deploy_cfg.get("evaluation", {}) - - @property - def should_export_onnx(self) -> bool: - """Check if ONNX export is requested.""" - mode = self.export_config.get("mode", "both") - return mode in ["onnx", "both"] - - @property - def should_export_tensorrt(self) -> bool: - """Check if TensorRT export is requested.""" - mode = self.export_config.get("mode", "both") - return mode in ["trt", "both"] - - @property - def should_verify(self) -> bool: - """Check if verification is requested.""" - return self.export_config.get("verify", False) - - @property - def device(self) -> str: - """Get device for export.""" - return self.export_config.get("device", "cuda:0") - - @property - def work_dir(self) -> str: - """Get working directory.""" - return self.export_config.get("work_dir", os.getcwd()) - - @property - def onnx_settings(self) -> Dict: - """Get ONNX export settings.""" - onnx_config = self.deploy_cfg.get("onnx_config", {}) - return { - "opset_version": onnx_config.get("opset_version", 16), - "do_constant_folding": onnx_config.get("do_constant_folding", True), - "input_names": onnx_config.get("input_names", ["input"]), - "output_names": onnx_config.get("output_names", ["output"]), - "dynamic_axes": onnx_config.get("dynamic_axes"), - "export_params": onnx_config.get("export_params", True), - "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False), - "save_file": onnx_config.get("save_file", "calibration_classifier.onnx"), - } - - @property - def tensorrt_settings(self) -> Dict: - """Get TensorRT export settings with precision policy support.""" - backend_config = self.deploy_cfg.get("backend_config", {}) - common_config = backend_config.get("common_config", {}) - - # Get precision policy - precision_policy = common_config.get("precision_policy", "auto") - policy_flags = PRECISION_POLICIES.get(precision_policy, {}) - - return { - "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE), - "precision_policy": precision_policy, - "policy_flags": policy_flags, - "model_inputs": backend_config.get("model_inputs", []), - } - - -def parse_args() -> argparse.Namespace: - """Parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Export CalibrationStatusClassification model to ONNX/TensorRT.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument("deploy_cfg", help="Deploy config path") - parser.add_argument("model_cfg", help="Model config path") - parser.add_argument( - "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')" - ) - - # Optional overrides - parser.add_argument("--work-dir", help="Override output directory from config") - parser.add_argument("--device", help="Override device from config") - parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level") - parser.add_argument("--info-pkl", help="Override info.pkl path from config") - - return parser.parse_args() - - -def setup_logging(level: str) -> logging.Logger: - """Setup logging configuration.""" - logging.basicConfig(level=getattr(logging, level), format="%(levelname)s:%(name)s:%(message)s") - return logging.getLogger("mmdeploy") diff --git a/projects/CalibrationStatusClassification/deploy/data_loader.py b/projects/CalibrationStatusClassification/deploy/data_loader.py new file mode 100644 index 00000000..57e344e1 --- /dev/null +++ b/projects/CalibrationStatusClassification/deploy/data_loader.py @@ -0,0 +1,183 @@ +""" +CalibrationStatusClassification DataLoader for deployment. + +This module implements the BaseDataLoader interface for loading and preprocessing +calibration status classification data from info.pkl files. +""" + +import os +import pickle +from typing import Any, Dict, Optional + +import torch +from mmengine.config import Config + +from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import ( + CalibrationClassificationTransform, +) +from autoware_ml.deployment.core import BaseDataLoader + + +class CalibrationDataLoader(BaseDataLoader): + """ + DataLoader for CalibrationStatusClassification task. + + Loads samples from info.pkl files and preprocesses them using + CalibrationClassificationTransform. + """ + + def __init__( + self, + info_pkl_path: str, + model_cfg: Config, + miscalibration_probability: float = 0.0, + device: str = "cpu", + ): + """ + Initialize CalibrationDataLoader. + + Args: + info_pkl_path: Path to info.pkl file containing samples + model_cfg: Model configuration containing transform settings + miscalibration_probability: Probability of loading miscalibrated sample (0.0 or 1.0) + device: Device to load tensors on + """ + super().__init__( + config={ + "info_pkl_path": info_pkl_path, + "miscalibration_probability": miscalibration_probability, + "device": device, + } + ) + + self.info_pkl_path = info_pkl_path + self.model_cfg = model_cfg + self.miscalibration_probability = miscalibration_probability + self.device = device + + # Load samples list + self._samples_list = self._load_info_pkl_file() + + # Create transform + self._transform = self._create_transform() + + def _load_info_pkl_file(self) -> list: + """ + Load and parse info.pkl file. + + Returns: + List of samples from data_list + """ + if not os.path.exists(self.info_pkl_path): + raise FileNotFoundError(f"Info.pkl file not found: {self.info_pkl_path}") + + try: + with open(self.info_pkl_path, "rb") as f: + info_data = pickle.load(f) + except Exception as e: + raise ValueError(f"Failed to load info.pkl file: {e}") + + # Extract samples from info.pkl + if isinstance(info_data, dict): + if "data_list" in info_data: + samples_list = info_data["data_list"] + else: + raise ValueError(f"Expected 'data_list' key in info_data, " f"found keys: {list(info_data.keys())}") + else: + raise ValueError(f"Expected dict format, got {type(info_data)}") + + if not samples_list: + raise ValueError("No samples found in info.pkl") + + return samples_list + + def _create_transform(self) -> CalibrationClassificationTransform: + """ + Create CalibrationClassificationTransform with model configuration. + + Returns: + Configured transform instance + """ + data_root = self.model_cfg.get("data_root") + if data_root is None: + raise ValueError("data_root not found in model configuration") + + transform_config = self.model_cfg.get("transform_config") + if transform_config is None: + raise ValueError("transform_config not found in model configuration") + + return CalibrationClassificationTransform( + transform_config=transform_config, + mode="test", + max_depth=self.model_cfg.get("max_depth", 128.0), + dilation_size=self.model_cfg.get("dilation_size", 1), + undistort=True, + miscalibration_probability=self.miscalibration_probability, + enable_augmentation=False, + data_root=data_root, + projection_vis_dir=self.model_cfg.get("test_projection_vis_dir", None), + results_vis_dir=self.model_cfg.get("test_results_vis_dir", None), + binary_save_dir=self.model_cfg.get("binary_save_dir", None), + ) + + def load_sample(self, index: int) -> Dict[str, Any]: + """ + Load a single sample from info.pkl. + + Args: + index: Sample index to load + + Returns: + Sample dictionary with 'image', 'lidar_points', etc. + """ + if index >= len(self._samples_list): + raise IndexError(f"Sample index {index} out of range (0-{len(self._samples_list)-1})") + + sample = self._samples_list[index] + + # Validate sample structure + required_keys = ["image", "lidar_points"] + if not all(key in sample for key in required_keys): + raise ValueError(f"Sample {index} has invalid structure. " f"Required keys: {required_keys}") + + return sample + + def preprocess(self, sample: Dict[str, Any]) -> torch.Tensor: + """ + Preprocess sample using CalibrationClassificationTransform. + + Args: + sample: Raw sample data from load_sample() + + Returns: + Preprocessed tensor with shape (1, C, H, W) + """ + # Apply transform + results = self._transform.transform(sample) + input_data_processed = results["fused_img"] # (H, W, 5) + + # Convert numpy array (H, W, C) to tensor (1, C, H, W) + tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float() + return tensor.unsqueeze(0).to(self.device) + + def get_num_samples(self) -> int: + """ + Get total number of samples. + + Returns: + Number of samples in info.pkl + """ + return len(self._samples_list) + + def validate_sample(self, sample: Dict[str, Any]) -> bool: + """ + Validate sample structure. + + Args: + sample: Sample to validate + + Returns: + True if valid + """ + required_keys = ["image", "lidar_points"] + return all(key in sample for key in required_keys) diff --git a/projects/CalibrationStatusClassification/deploy/evaluator.py b/projects/CalibrationStatusClassification/deploy/evaluator.py index efc7dd45..951f9aa2 100644 --- a/projects/CalibrationStatusClassification/deploy/evaluator.py +++ b/projects/CalibrationStatusClassification/deploy/evaluator.py @@ -1,386 +1,340 @@ """ -Model evaluation utilities for exported models. +Classification Evaluator for CalibrationStatusClassification. + +This module implements the BaseEvaluator interface for evaluating +calibration status classification models. """ import gc import logging -import os.path as osp -import time -from typing import Any, Tuple +from typing import Any, Dict import numpy as np -import onnxruntime as ort -import pycuda.autoinit -import pycuda.driver as cuda -import tensorrt as trt import torch -import torch.nn.functional as F -from config import LABELS -from exporters import ( - _clear_gpu_memory, - _create_transform_with_config, - _load_info_pkl_file, - _numpy_to_tensor, - _run_tensorrt_inference, - load_info_pkl_data, -) from mmengine.config import Config +from mmpretrain.apis import get_model +from autoware_ml.deployment.backends import ONNXBackend, PyTorchBackend, TensorRTBackend +from autoware_ml.deployment.core import BaseEvaluator -def _load_samples_from_info_pkl(info_pkl_path: str, num_samples: int, logger: logging.Logger) -> list: - """Load and validate samples from info.pkl file.""" - try: - samples_list = _load_info_pkl_file(info_pkl_path) - logger.info(f"Loaded {len(samples_list)} samples from info.pkl") - - # Limit number of samples - num_samples = min(num_samples, len(samples_list)) - logger.info(f"Evaluating {num_samples} samples") - - return samples_list[:num_samples] - - except Exception as e: - logger.error(f"Failed to load info.pkl: {e}") - raise - - -def _create_transform(model_cfg: Config): - """Create and configure CalibrationClassificationTransform for evaluation.""" - return _create_transform_with_config( - model_cfg=model_cfg, - miscalibration_probability=0.0, - projection_vis_dir=None, - results_vis_dir=None, - binary_save_dir=None, - ) - - -def _create_onnx_inference_func(model_path: str, device: str, logger: logging.Logger): - """Create ONNX inference function with configured providers.""" - logger.info(f"Using ONNX model: {model_path}") - logger.info("Creating ONNX InferenceSession...") - - # Configure execution providers - if device.startswith("cuda"): - providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] - logger.info("Attempting to use CUDA acceleration for ONNX inference...") - try: - available_providers = ort.get_available_providers() - if "CUDAExecutionProvider" not in available_providers: - logger.warning( - f"CUDAExecutionProvider not available. Available: {available_providers}. " - "Install onnxruntime-gpu for CUDA acceleration" - ) - providers = ["CPUExecutionProvider"] - else: - logger.info("CUDAExecutionProvider is available") - except Exception as e: - logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU") - providers = ["CPUExecutionProvider"] - else: - providers = ["CPUExecutionProvider"] - logger.info("Using CPU for ONNX inference") - - # Create session - ort_session = ort.InferenceSession(model_path, providers=providers) - logger.info(f"ONNX session using providers: {ort_session.get_providers()}") - - # Return inference function - def inference_func(input_tensor): - input_name = ort_session.get_inputs()[0].name - onnx_input = {input_name: input_tensor.cpu().numpy()} - start_time = time.perf_counter() - onnx_output = ort_session.run(None, onnx_input)[0] - latency = (time.perf_counter() - start_time) * 1000 - return onnx_output, latency - - return inference_func - - -def _create_tensorrt_inference_func(model_path: str, logger: logging.Logger): - """Create TensorRT inference function with loaded engine.""" - logger.info(f"Using TensorRT model: {model_path}") - - # Load TensorRT engine - trt_logger = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(trt_logger, "") - runtime = trt.Runtime(trt_logger) +from .data_loader import CalibrationDataLoader - with open(model_path, "rb") as f: - engine = runtime.deserialize_cuda_engine(f.read()) +# Label mapping +LABELS = {"0": "miscalibrated", "1": "calibrated"} - if engine is None: - raise RuntimeError("Failed to deserialize TensorRT engine") +# Constants for evaluation +LOG_INTERVAL = 100 # Log progress every N samples +GPU_CLEANUP_INTERVAL = 10 # Clear GPU memory every N samples for TensorRT - # Return inference function - def inference_func(input_tensor): - return _run_tensorrt_inference(engine, input_tensor.cpu(), logger) - return inference_func +class ClassificationEvaluator(BaseEvaluator): + """ + Evaluator for classification tasks. + Computes accuracy, per-class metrics, confusion matrix, and latency statistics. + """ -def _process_single_sample( - sample_idx: int, - info_pkl_path: str, - transform, - inference_func, - device: str, - logger: logging.Logger, - verbose: bool = False, -) -> Tuple[list, list, list, list]: - """Process a single sample with both calibrated and miscalibrated versions. + def __init__(self, model_cfg: Config): + """ + Initialize classification evaluator. + + Args: + model_cfg: Model configuration + """ + super().__init__(config={}) + self.model_cfg = model_cfg + + def evaluate( + self, + model_path: str, + data_loader: CalibrationDataLoader, + num_samples: int, + backend: str = "pytorch", + device: str = "cpu", + verbose: bool = False, + ) -> Dict[str, Any]: + """ + Run full evaluation on a model. + + Args: + model_path: Path to model checkpoint/weights + data_loader: DataLoader for loading samples (calibrated version) + num_samples: Number of samples to evaluate + backend: Backend to use ('pytorch', 'onnx', 'tensorrt') + device: Device to run inference on + verbose: Whether to print detailed progress + + Returns: + Dictionary containing evaluation metrics + """ + logger = logging.getLogger(__name__) + logger.info(f"\nEvaluating {backend.upper()} model: {model_path}") + logger.info(f"Number of samples: {num_samples}") - Returns: - Tuple of (predictions, ground_truth, probabilities, latencies) for this sample - """ - predictions = [] - ground_truth = [] - probabilities = [] - latencies = [] - - # Test both calibrated and miscalibrated versions - for miscalibration_prob, expected_label in [(0.0, 1), (1.0, 0)]: - # Update transform's miscalibration probability - transform.miscalibration_probability = miscalibration_prob - - # Load and preprocess sample - sample_data = load_info_pkl_data(info_pkl_path, sample_idx) - sample_data["sample_idx"] = sample_idx - - results = transform.transform(sample_data) - input_data_processed = results["fused_img"] - gt_label = results["gt_label"] - - # Convert to tensor - input_tensor = _numpy_to_tensor(input_data_processed, device) - - # Run inference - output_np, latency = inference_func(input_tensor) - - if output_np is None: - logger.error(f"Failed to get output for sample {sample_idx}") - continue - - # Convert logits to probabilities - logits = torch.from_numpy(output_np) - probs = F.softmax(logits, dim=-1) - predicted_class = torch.argmax(probs, dim=-1).item() - confidence = probs.max().item() - - # Store results - predictions.append(predicted_class) - ground_truth.append(gt_label) - probabilities.append(probs.cpu().numpy()) - latencies.append(latency) - - # Print sample results only in verbose mode - if verbose: - logger.info( - f"Sample {sample_idx + 1} (GT={gt_label}): " - f"Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms" - ) + # Limit num_samples to available data + total_samples = data_loader.get_num_samples() + num_samples = min(num_samples, total_samples) - return predictions, ground_truth, probabilities, latencies + # Create backend + inference_backend = self._create_backend(backend, model_path, device, logger) + # Create data loaders for both calibrated and miscalibrated versions + # This avoids reinitializing the transform for each sample + data_loader_miscalibrated = CalibrationDataLoader( + info_pkl_path=data_loader.info_pkl_path, + model_cfg=data_loader.model_cfg, + miscalibration_probability=1.0, + device=device, + ) + data_loader_calibrated = data_loader # Already calibrated (prob=0.0) + + # Run inference on all samples + predictions = [] + ground_truths = [] + probabilities = [] + latencies = [] + + with inference_backend: + for idx in range(num_samples): + if idx % LOG_INTERVAL == 0: + logger.info(f"Processing sample {idx + 1}/{num_samples}") + + try: + # Process both calibrated and miscalibrated versions + pred, gt, prob, lat = self._process_sample( + idx, data_loader_calibrated, data_loader_miscalibrated, inference_backend, verbose, logger + ) + + predictions.extend(pred) + ground_truths.extend(gt) + probabilities.extend(prob) + latencies.extend(lat) + + # Clear GPU memory periodically for TensorRT + if backend == "tensorrt" and idx % GPU_CLEANUP_INTERVAL == 0: + self._clear_gpu_memory() + + except Exception as e: + logger.error(f"Error processing sample {idx}: {e}") + continue + + # Convert to numpy arrays + predictions = np.array(predictions) + ground_truths = np.array(ground_truths) + probabilities = np.array(probabilities) + latencies = np.array(latencies) + + # Compute metrics + results = self._compute_metrics(predictions, ground_truths, probabilities, latencies) + results["backend"] = backend + results["num_samples"] = len(predictions) + + return results + + def _create_backend( + self, + backend: str, + model_path: str, + device: str, + logger: logging.Logger, + ): + """Create appropriate backend instance.""" + if backend == "pytorch": + # Load PyTorch model + logger.info(f"Loading PyTorch model from {model_path}") + model = get_model(self.model_cfg, model_path, device=device) + return PyTorchBackend(model, device=device) + elif backend == "onnx": + logger.info(f"Loading ONNX model from {model_path}") + return ONNXBackend(model_path, device=device) + elif backend == "tensorrt": + logger.info(f"Loading TensorRT engine from {model_path}") + return TensorRTBackend(model_path, device="cuda") + else: + raise ValueError(f"Unsupported backend: {backend}") + + def _process_sample( + self, + sample_idx: int, + data_loader_calibrated: CalibrationDataLoader, + data_loader_miscalibrated: CalibrationDataLoader, + backend, + verbose: bool, + logger: logging.Logger, + ): + """ + Process a single sample with both calibrated and miscalibrated versions. + + Args: + sample_idx: Index of sample to process + data_loader_calibrated: DataLoader for calibrated samples + data_loader_miscalibrated: DataLoader for miscalibrated samples + backend: Inference backend + verbose: Verbose logging + logger: Logger instance + + Returns: + Tuple of (predictions, ground_truths, probabilities, latencies) + """ + predictions = [] + ground_truths = [] + probabilities = [] + latencies = [] + + # Process both miscalibrated (0) and calibrated (1) versions + for gt_label, loader in [(0, data_loader_miscalibrated), (1, data_loader_calibrated)]: + # Load and preprocess using pre-created data loader + input_tensor = loader.load_and_preprocess(sample_idx) + + # Run inference + output, latency = backend.infer(input_tensor) + + # Get prediction + if output.shape[-1] == 2: # Binary classification + predicted_label = int(np.argmax(output[0])) + prob_scores = output[0] + else: + raise ValueError(f"Unexpected output shape: {output.shape}") + + predictions.append(predicted_label) + ground_truths.append(gt_label) + probabilities.append(prob_scores) + latencies.append(latency) + + if verbose: + logger.info( + f" Sample {sample_idx}, GT: {LABELS[str(gt_label)]}, " + f"Pred: {LABELS[str(predicted_label)]}, " + f"Scores: {prob_scores}, Latency: {latency:.2f}ms" + ) -def evaluate_exported_model( - model_path: str, - model_type: str, - model_cfg: Config, - info_pkl_path: str, - logger: logging.Logger, - device: str = "cpu", - num_samples: int = 10, - verbose: bool = False, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + return predictions, ground_truths, probabilities, latencies + + def _compute_metrics( + self, + predictions: np.ndarray, + ground_truths: np.ndarray, + probabilities: np.ndarray, + latencies: np.ndarray, + ) -> Dict[str, Any]: + """Compute all evaluation metrics.""" + if len(predictions) == 0: + return {"accuracy": 0.0, "error": "No samples were processed successfully"} + + # Overall accuracy + correct = (predictions == ground_truths).sum() + accuracy = correct / len(predictions) + + # Per-class metrics + per_class_acc = {} + per_class_count = {} + for cls in np.unique(ground_truths): + mask = ground_truths == cls + cls_correct = (predictions[mask] == ground_truths[mask]).sum() + cls_total = mask.sum() + per_class_acc[int(cls)] = cls_correct / cls_total if cls_total > 0 else 0.0 + per_class_count[int(cls)] = int(cls_total) + + # Confusion matrix + num_classes = len(np.unique(ground_truths)) + confusion_matrix = np.zeros((num_classes, num_classes), dtype=int) + for gt, pred in zip(ground_truths, predictions): + confusion_matrix[int(gt), int(pred)] += 1 + + # Latency statistics + latency_stats = self.compute_latency_stats(latencies.tolist()) + + return { + "accuracy": float(accuracy), + "correct_predictions": int(correct), + "total_samples": len(predictions), + "per_class_accuracy": per_class_acc, + "per_class_count": per_class_count, + "confusion_matrix": confusion_matrix.tolist(), + "latency_stats": latency_stats, + } + + def print_results(self, results: Dict[str, Any]) -> None: + """ + Pretty print evaluation results. + + Args: + results: Results dictionary from evaluate() + """ + logger = logging.getLogger(__name__) + + if "error" in results: + logger.error(f"Evaluation error: {results['error']}") + return + + backend = results.get("backend", "unknown") + + logger.info(f"\n{'='*70}") + logger.info(f"{backend.upper()} Model Evaluation Results") + logger.info(f"{'='*70}") + + # Overall metrics + logger.info(f"Total samples: {results['total_samples']}") + logger.info(f"Correct predictions: {results['correct_predictions']}") + logger.info(f"Accuracy: {results['accuracy']:.4f} ({results['accuracy']*100:.2f}%)") + + # Latency + logger.info(f"\n{self.format_latency_stats(results['latency_stats'])}") + + # Per-class accuracy + logger.info(f"\nPer-class accuracy:") + for cls, acc in results["per_class_accuracy"].items(): + count = results["per_class_count"][cls] + label = LABELS[str(cls)] + logger.info(f" Class {cls} ({label}): {acc:.4f} ({acc*100:.2f}%) - {count} samples") + + # Confusion matrix + logger.info(f"\nConfusion Matrix:") + cm = np.array(results["confusion_matrix"]) + logger.info(f" Predicted →") + logger.info(f" GT ↓ {' '.join([f'{i:>8}' for i in range(len(cm))])}") + for i, row in enumerate(cm): + logger.info(f" {i:>8} {' '.join([f'{val:>8}' for val in row])}") + + logger.info(f"{'='*70}\n") + + def _clear_gpu_memory(self) -> None: + """Clear GPU cache and run garbage collection.""" + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + +def get_models_to_evaluate(eval_cfg: Dict[str, Any], logger: logging.Logger) -> list: """ - Evaluate exported model (ONNX or TensorRT) using info.pkl data. + Get list of models to evaluate from config. Args: - model_path: Path to model file (.onnx or .engine) - model_type: Type of model ("onnx" or "tensorrt") - model_cfg: Model configuration - info_pkl_path: Path to info.pkl file + eval_cfg: Evaluation configuration logger: Logger instance - device: Device for preprocessing - num_samples: Number of samples to evaluate - verbose: Enable verbose logging Returns: - Tuple of (predictions, ground_truth, probabilities, latencies) - """ - # Load samples - samples_list = _load_samples_from_info_pkl(info_pkl_path, num_samples, logger) - - # Create transform - transform = _create_transform(model_cfg) - - # Create inference function based on model type - if model_type == "onnx": - inference_func = _create_onnx_inference_func(model_path, device, logger) - elif model_type == "tensorrt": - inference_func = _create_tensorrt_inference_func(model_path, logger) - else: - raise ValueError(f"Unsupported model type: {model_type}") - - # Process all samples - all_predictions = [] - all_ground_truth = [] - all_probabilities = [] - all_latencies = [] - - for sample_idx in range(len(samples_list)): - if sample_idx % 100 == 0: - logger.info(f"Processing sample {sample_idx + 1}/{len(samples_list)}") - - try: - predictions, ground_truth, probabilities, latencies = _process_single_sample( - sample_idx=sample_idx, - info_pkl_path=info_pkl_path, - transform=transform, - inference_func=inference_func, - device=device, - logger=logger, - verbose=verbose, - ) - - all_predictions.extend(predictions) - all_ground_truth.extend(ground_truth) - all_probabilities.extend(probabilities) - all_latencies.extend(latencies) - - # Clear GPU memory periodically - if model_type == "tensorrt" and sample_idx % 10 == 0: - _clear_gpu_memory() - - except Exception as e: - logger.error(f"Error processing sample {sample_idx}: {e}") - continue - - return ( - np.array(all_predictions), - np.array(all_ground_truth), - np.array(all_probabilities), - np.array(all_latencies), - ) - - -def print_evaluation_results( - all_predictions: np.ndarray, - all_ground_truth: np.ndarray, - all_probabilities: np.ndarray, - all_latencies: np.ndarray, - model_type: str, - logger: logging.Logger, -) -> None: - """Print comprehensive evaluation results with metrics and statistics.""" - - if len(all_predictions) == 0: - logger.error("No samples were processed successfully.") - return - - correct_predictions = (all_predictions == all_ground_truth).sum() - total_samples = len(all_predictions) - accuracy = correct_predictions / total_samples - avg_latency = np.mean(all_latencies) - - # Print header - logger.info(f"\n{'='*60}") - logger.info(f"{model_type.upper()} Model Evaluation Results") - logger.info(f"{'='*60}") - logger.info(f"Total samples: {total_samples}") - logger.info(f"Correct predictions: {correct_predictions}") - logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") - logger.info(f"Average latency: {avg_latency:.2f} ms") - - # Calculate per-class accuracy - unique_classes = np.unique(all_ground_truth) - logger.info(f"\nPer-class accuracy:") - for cls in unique_classes: - cls_mask = all_ground_truth == cls - cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum() - cls_total = cls_mask.sum() - cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0 - logger.info( - f" Class {cls} ({LABELS[str(cls)]}): " - f"{cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)" - ) - - # Calculate average confidence - avg_confidence = np.mean([prob.max() for prob in all_probabilities]) - logger.info(f"\nAverage confidence: {avg_confidence:.4f}") - - # Calculate latency statistics - min_latency = np.min(all_latencies) - max_latency = np.max(all_latencies) - std_latency = np.std(all_latencies) - p50_latency = np.percentile(all_latencies, 50) - p95_latency = np.percentile(all_latencies, 95) - p99_latency = np.percentile(all_latencies, 99) - - logger.info(f"\nLatency Statistics:") - logger.info(f" Average: {avg_latency:.2f} ms") - logger.info(f" Median (P50): {p50_latency:.2f} ms") - logger.info(f" P95: {p95_latency:.2f} ms") - logger.info(f" P99: {p99_latency:.2f} ms") - logger.info(f" Min: {min_latency:.2f} ms") - logger.info(f" Max: {max_latency:.2f} ms") - logger.info(f" Std: {std_latency:.2f} ms") - - # Show confusion matrix - logger.info(f"\nConfusion Matrix:") - logger.info(f"{'':>10} Predicted") - logger.info(f"{'Actual':>10} {'0 (misc)':>10} {'1 (calib)':>10}") - logger.info(f"{'-'*32}") - for true_cls in unique_classes: - row_label = f"{true_cls} ({LABELS[str(true_cls)][:4]})" - row = [f"{row_label:>10}"] - for pred_cls in unique_classes: - count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum() - row.append(f"{count:>10}") - logger.info(" ".join(row)) - - logger.info(f"\n{'='*60}") - logger.info(f"Evaluation Summary:") - logger.info(f" Model Type: {model_type.upper()}") - logger.info(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") - logger.info(f" Avg Latency: {avg_latency:.2f} ms") - logger.info(f" Throughput: {1000/avg_latency:.2f} samples/sec") - logger.info(f"{'='*60}\n") - - -def get_models_to_evaluate(eval_cfg: dict, logger: logging.Logger) -> list: - """Determine which models to evaluate based on config. - - Returns: - List of (model_type, model_path) tuples + List of tuples (backend_name, model_path) """ + models_config = eval_cfg.get("models", {}) models_to_evaluate = [] - # Get model paths from config - eval_onnx_path = eval_cfg.get("onnx_model") - eval_trt_path = eval_cfg.get("tensorrt_model") - - # If both paths are None, skip evaluation - if eval_onnx_path is None and eval_trt_path is None: - logger.warning("Both onnx_model and tensorrt_model are None. Skipping evaluation.") - logger.warning("To enable evaluation, specify at least one model path in the config.") - return models_to_evaluate - - # Only evaluate models that are explicitly specified (not None) - if eval_onnx_path is not None: - if osp.exists(eval_onnx_path): - models_to_evaluate.append(("onnx", eval_onnx_path)) - logger.info(f"Using config-specified ONNX model: {eval_onnx_path}") - else: - logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}") + backend_mapping = { + "pytorch": "pytorch", + "onnx": "onnx", + "tensorrt": "tensorrt", + } - if eval_trt_path is not None: - if osp.exists(eval_trt_path): - models_to_evaluate.append(("tensorrt", eval_trt_path)) - logger.info(f"Using config-specified TensorRT model: {eval_trt_path}") - else: - logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}") + for backend_key, model_path in models_config.items(): + backend_name = backend_mapping.get(backend_key.lower()) + if backend_name and model_path: + import os + + if os.path.exists(model_path): + models_to_evaluate.append((backend_name, model_path)) + logger.info(f" - {backend_name}: {model_path}") + else: + logger.warning(f" - {backend_name}: {model_path} (not found, skipping)") return models_to_evaluate @@ -389,47 +343,72 @@ def run_full_evaluation( models_to_evaluate: list, model_cfg: Config, info_pkl: str, - device: Any, + device: str, num_samples: int, - verbose_mode: bool, + verbose: bool, logger: logging.Logger, ) -> None: - """Run evaluation for all specified models.""" + """ + Run full evaluation on all specified models. + + Args: + models_to_evaluate: List of (backend, model_path) tuples + model_cfg: Model configuration + info_pkl: Path to info.pkl file + device: Device for inference + num_samples: Number of samples to evaluate + verbose: Verbose mode + logger: Logger instance + """ if not models_to_evaluate: - logger.error( - "No models available for evaluation. Please export models first or specify model paths in config." - ) + logger.warning("No models specified for evaluation") return - # Evaluate each model - for model_type, model_path in models_to_evaluate: - logger.info(f"\nEvaluating {model_type.upper()} model...") - logger.info(f"Model path: {model_path}") - logger.info(f"Number of samples: {num_samples}") - logger.info(f"Verbose mode: {verbose_mode}") + logger.info(f"\nModels to evaluate:") + for backend, path in models_to_evaluate: + logger.info(f" - {backend}: {path}") + # Create evaluator + evaluator = ClassificationEvaluator(model_cfg) + + # Create data loader (with miscalibration_probability=0.0 as default) + data_loader = CalibrationDataLoader( + info_pkl_path=info_pkl, + model_cfg=model_cfg, + miscalibration_probability=0.0, + device=device, + ) + + # Evaluate each model + all_results = {} + for backend, model_path in models_to_evaluate: try: - # Run evaluation - predictions, ground_truth, probabilities, latencies = evaluate_exported_model( + results = evaluator.evaluate( model_path=model_path, - model_type=model_type, - model_cfg=model_cfg, - info_pkl_path=info_pkl, - logger=logger, - device=device.type if isinstance(device, torch.device) else device, + data_loader=data_loader, num_samples=num_samples, - verbose=verbose_mode, + backend=backend, + device=device, + verbose=verbose, ) - - # Print results - print_evaluation_results(predictions, ground_truth, probabilities, latencies, model_type, logger) - - # Cleanup - _clear_gpu_memory() - + all_results[backend] = results + evaluator.print_results(results) except Exception as e: - logger.error(f"Evaluation failed for {model_type.upper()} model: {e}") + logger.error(f"Failed to evaluate {backend} model: {e}") import traceback logger.error(traceback.format_exc()) - continue + + # Print comparison summary if multiple models + if len(all_results) > 1: + logger.info(f"\n{'='*70}") + logger.info("Comparison Summary") + logger.info(f"{'='*70}") + logger.info(f"{'Backend':<15} {'Accuracy':<12} {'Avg Latency (ms)':<20}") + logger.info(f"{'-'*70}") + for backend, results in all_results.items(): + if "error" not in results: + acc = results["accuracy"] + lat = results["latency_stats"]["mean_ms"] + logger.info(f"{backend:<15} {acc:<12.4f} {lat:<20.2f}") + logger.info(f"{'='*70}\n") diff --git a/projects/CalibrationStatusClassification/deploy/exporters.py b/projects/CalibrationStatusClassification/deploy/exporters.py deleted file mode 100644 index 6d5655ff..00000000 --- a/projects/CalibrationStatusClassification/deploy/exporters.py +++ /dev/null @@ -1,680 +0,0 @@ -""" -Model export and inference verification utilities. -""" - -import gc -import logging -import os -import os.path as osp -import pickle -import time -from typing import Any, Dict, Optional, Tuple - -import numpy as np -import onnx -import onnxruntime as ort -import onnxsim -import pycuda.autoinit -import pycuda.driver as cuda -import tensorrt as trt -import torch -from config import DEFAULT_VERIFICATION_TOLERANCE, LABELS, DeploymentConfig -from mmengine.config import Config - -from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import ( - CalibrationClassificationTransform, -) - -# ============================================================================ -# Data Loading Utilities -# ============================================================================ - - -def _load_info_pkl_file(info_pkl_path: str) -> list: - """ - Load and parse info.pkl file. - - Args: - info_pkl_path: Path to the info.pkl file - - Returns: - List of samples from data_list - - Raises: - FileNotFoundError: If info.pkl file doesn't exist - ValueError: If data format is unexpected - """ - if not os.path.exists(info_pkl_path): - raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}") - - try: - with open(info_pkl_path, "rb") as f: - info_data = pickle.load(f) - except Exception as e: - raise ValueError(f"Failed to load info.pkl file: {e}") - - # Extract samples from info.pkl - if isinstance(info_data, dict): - if "data_list" in info_data: - samples_list = info_data["data_list"] - else: - raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}") - else: - raise ValueError(f"Expected dict format, got {type(info_data)}") - - if not samples_list: - raise ValueError("No samples found in info.pkl") - - return samples_list - - -def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]: - """ - Load a single sample from info.pkl file. - - Args: - info_pkl_path: Path to the info.pkl file - sample_idx: Index of the sample to load (default: 0) - - Returns: - Sample dictionary with the required structure for CalibrationClassificationTransform - - Raises: - FileNotFoundError: If info.pkl file doesn't exist - ValueError: If data format is unexpected or sample index is invalid - """ - samples_list = _load_info_pkl_file(info_pkl_path) - - if sample_idx >= len(samples_list): - raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})") - - sample = samples_list[sample_idx] - - # Validate sample structure - required_keys = ["image", "lidar_points"] - if not all(key in sample for key in required_keys): - raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}") - - return sample - - -def _create_transform_with_config( - model_cfg: Config, - miscalibration_probability: float = 0.0, - projection_vis_dir: Optional[str] = None, - results_vis_dir: Optional[str] = None, - binary_save_dir: Optional[str] = None, -) -> CalibrationClassificationTransform: - """ - Create CalibrationClassificationTransform with model configuration. - - Args: - model_cfg: Model configuration - miscalibration_probability: Probability of miscalibration - projection_vis_dir: Optional projection visualization directory - results_vis_dir: Optional results visualization directory - binary_save_dir: Optional binary save directory - - Returns: - Configured CalibrationClassificationTransform - """ - data_root = model_cfg.get("data_root") - if data_root is None: - raise ValueError("data_root not found in model configuration") - - transform_config = model_cfg.get("transform_config") - if transform_config is None: - raise ValueError("transform_config not found in model configuration") - - return CalibrationClassificationTransform( - transform_config=transform_config, - mode="test", - max_depth=model_cfg.get("max_depth", 128.0), - dilation_size=model_cfg.get("dilation_size", 1), - undistort=True, - miscalibration_probability=miscalibration_probability, - enable_augmentation=False, - data_root=data_root, - projection_vis_dir=projection_vis_dir, - results_vis_dir=results_vis_dir, - binary_save_dir=binary_save_dir, - ) - - -def _numpy_to_tensor(numpy_array: np.ndarray, device: str = "cpu") -> torch.Tensor: - """ - Convert numpy array (H, W, C) to tensor (1, C, H, W). - - Args: - numpy_array: Input numpy array with shape (H, W, C) - device: Device to load tensor on - - Returns: - Tensor with shape (1, C, H, W) - """ - tensor = torch.from_numpy(numpy_array).permute(2, 0, 1).float() # (C, H, W) - return tensor.unsqueeze(0).to(device) # (1, C, H, W) - - -def _clear_gpu_memory() -> None: - """Clear GPU cache and run garbage collection.""" - if torch.cuda.is_available(): - torch.cuda.empty_cache() - gc.collect() - - -def load_sample_data_from_info_pkl( - info_pkl_path: str, - model_cfg: Config, - miscalibration_probability: float, - sample_idx: int = 0, - device: str = "cpu", -) -> torch.Tensor: - """ - Load and preprocess sample data from info.pkl using CalibrationClassificationTransform. - - Args: - info_pkl_path: Path to the info.pkl file - model_cfg: Model configuration containing data_root setting - miscalibration_probability: Probability of loading a miscalibrated sample - sample_idx: Index of the sample to load (default: 0) - device: Device to load tensor on - - Returns: - Preprocessed tensor ready for model inference - """ - # Load sample data from info.pkl - sample_data = load_info_pkl_data(info_pkl_path, sample_idx) - - # Create transform - transform = _create_transform_with_config( - model_cfg=model_cfg, - miscalibration_probability=miscalibration_probability, - projection_vis_dir=model_cfg.get("test_projection_vis_dir", None), - results_vis_dir=model_cfg.get("test_results_vis_dir", None), - binary_save_dir=model_cfg.get("binary_save_dir", None), - ) - - # Apply transform - results = transform.transform(sample_data) - input_data_processed = results["fused_img"] # (H, W, 5) - - # Convert to tensor - return _numpy_to_tensor(input_data_processed, device) - - -# ============================================================================ -# Model Export Functions -# ============================================================================ - - -def export_to_onnx( - model: torch.nn.Module, - input_tensor: torch.Tensor, - output_path: str, - config: DeploymentConfig, - logger: logging.Logger, -) -> None: - """Export model to ONNX format.""" - settings = config.onnx_settings - model.eval() - - logger.info("Exporting model to ONNX format...") - logger.info(f"Input shape: {input_tensor.shape}") - logger.info(f"Output path: {output_path}") - logger.info(f"ONNX opset version: {settings['opset_version']}") - - with torch.no_grad(): - torch.onnx.export( - model, - input_tensor, - output_path, - export_params=settings["export_params"], - keep_initializers_as_inputs=settings["keep_initializers_as_inputs"], - opset_version=settings["opset_version"], - do_constant_folding=settings["do_constant_folding"], - input_names=settings["input_names"], - output_names=settings["output_names"], - dynamic_axes=settings["dynamic_axes"], - verbose=False, - ) - - logger.info(f"ONNX export completed: {output_path}") - - # Optional model simplification - _optimize_onnx_model(output_path, logger) - - -def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None: - """Optimize ONNX model using onnxsim.""" - logger.info("Simplifying ONNX model...") - model_simplified, success = onnxsim.simplify(onnx_path) - if success: - onnx.save(model_simplified, onnx_path) - logger.info(f"ONNX model simplified successfully. Saved to {onnx_path}") - else: - logger.warning("ONNX model simplification failed") - - -def export_to_tensorrt( - onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger -) -> bool: - """Export ONNX model to a TensorRT engine with precision policy support.""" - settings = config.tensorrt_settings - precision_policy = settings["precision_policy"] - policy_flags = settings["policy_flags"] - - logger.info(f"Building TensorRT engine with precision policy: {precision_policy}") - - # Initialize TensorRT - trt_logger = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(trt_logger, "") - - builder = trt.Builder(trt_logger) - builder_config = builder.create_builder_config() - builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"]) - - # Create network with appropriate flags - flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - # Handle strongly typed flag (network creation flag, not builder flag) - if policy_flags.get("STRONGLY_TYPED"): - flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) - logger.info("Using strongly typed TensorRT network creation") - - network = builder.create_network(flags) - - # Apply precision flags to builder config - for flag_name, enabled in policy_flags.items(): - if flag_name == "STRONGLY_TYPED": - continue # Already handled as network creation flag above - if enabled and hasattr(trt.BuilderFlag, flag_name): - builder_config.set_flag(getattr(trt.BuilderFlag, flag_name)) - logger.info(f"BuilderFlag.{flag_name} enabled") - - # Setup optimization profile - profile = builder.create_optimization_profile() - _configure_input_shapes(profile, input_tensor, settings["model_inputs"], logger) - builder_config.add_optimization_profile(profile) - - # Parse ONNX model - parser = trt.OnnxParser(network, trt_logger) - with open(onnx_path, "rb") as f: - if not parser.parse(f.read()): - _log_parser_errors(parser, logger) - return False - logger.info("Successfully parsed the ONNX file") - - # Build engine - logger.info("Building TensorRT engine...") - serialized_engine = builder.build_serialized_network(network, builder_config) - - if serialized_engine is None: - logger.error("Failed to build TensorRT engine") - return False - - # Save engine - with open(output_path, "wb") as f: - f.write(serialized_engine) - - logger.info(f"TensorRT engine saved to {output_path}") - logger.info(f"Engine max workspace size: {settings['max_workspace_size'] / (1024**3):.2f} GB") - - return True - - -def _configure_input_shapes(profile, input_tensor: torch.Tensor, model_inputs: list, logger: logging.Logger) -> None: - """Configure input shapes for TensorRT optimization profile.""" - if model_inputs: - input_shapes = model_inputs[0].get("input_shapes", {}) - for input_name, shapes in input_shapes.items(): - min_shape = shapes.get("min_shape", list(input_tensor.shape)) - opt_shape = shapes.get("opt_shape", list(input_tensor.shape)) - max_shape = shapes.get("max_shape", list(input_tensor.shape)) - - logger.info(f"Setting input shapes - min: {min_shape}, opt: {opt_shape}, max: {max_shape}") - profile.set_shape(input_name, min_shape, opt_shape, max_shape) - else: - # Default shapes based on input tensor - input_shape = list(input_tensor.shape) - logger.info(f"Using default input shape: {input_shape}") - profile.set_shape("input", input_shape, input_shape, input_shape) - - -def _log_parser_errors(parser, logger: logging.Logger) -> None: - """Log TensorRT parser errors.""" - logger.error("Failed to parse ONNX model") - for error in range(parser.num_errors): - logger.error(f"Parser error: {parser.get_error(error)}") - - -# ============================================================================ -# Inference and Verification Functions -# ============================================================================ - - -def run_pytorch_inference( - model: torch.nn.Module, input_tensor: torch.Tensor, logger: logging.Logger -) -> Tuple[torch.Tensor, float]: - """Run PyTorch inference on CPU for verification and return output with latency.""" - # Move to CPU to avoid GPU memory issues - model_cpu = model.cpu() - input_cpu = input_tensor.cpu() - - model_cpu.eval() - with torch.no_grad(): - # Measure inference time - start_time = time.perf_counter() - output = model_cpu(input_cpu) - end_time = time.perf_counter() - - latency = (end_time - start_time) * 1000 # Convert to milliseconds - - # Handle different output formats - if hasattr(output, "output"): - output = output.output - elif isinstance(output, dict) and "output" in output: - output = output["output"] - - if not isinstance(output, torch.Tensor): - raise ValueError(f"Unexpected PyTorch output type: {type(output)}") - - logger.info(f"PyTorch inference latency: {latency:.2f} ms") - logger.info(f"Output verification:") - logger.info(f" Output: {output.cpu().numpy()}") - return output, latency - - -def run_onnx_inference( - onnx_path: str, - input_tensor: torch.Tensor, - ref_output: torch.Tensor, - logger: logging.Logger, -) -> bool: - """Verify ONNX model output against PyTorch model.""" - # Clear GPU cache - _clear_gpu_memory() - - # ONNX inference with timing - providers = ["CPUExecutionProvider"] - ort_session = ort.InferenceSession(onnx_path, providers=providers) - onnx_input = {ort_session.get_inputs()[0].name: input_tensor.cpu().numpy()} - - start_time = time.perf_counter() - onnx_output = ort_session.run(None, onnx_input)[0] - end_time = time.perf_counter() - onnx_latency = (end_time - start_time) * 1000 - - logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms") - - # Ensure onnx_output is numpy array before comparison - if not isinstance(onnx_output, np.ndarray): - logger.error(f"Unexpected ONNX output type: {type(onnx_output)}") - return False - - # Compare outputs - return _compare_outputs(ref_output.cpu().numpy(), onnx_output, "ONNX", logger) - - -def run_tensorrt_inference( - tensorrt_path: str, - input_tensor: torch.Tensor, - ref_output: torch.Tensor, - logger: logging.Logger, -) -> bool: - """Verify TensorRT model output against PyTorch model.""" - # Clear GPU cache - _clear_gpu_memory() - - # Load TensorRT engine - trt_logger = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(trt_logger, "") - runtime = trt.Runtime(trt_logger) - - with open(tensorrt_path, "rb") as f: - engine = runtime.deserialize_cuda_engine(f.read()) - - if engine is None: - logger.error("Failed to deserialize TensorRT engine") - return False - - # Run TensorRT inference with timing - trt_output, latency = _run_tensorrt_inference(engine, input_tensor.cpu(), logger) - logger.info(f"TensorRT inference latency: {latency:.2f} ms") - - # Compare outputs - return _compare_outputs(ref_output.cpu().numpy(), trt_output, "TensorRT", logger) - - -def _run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]: - """Run TensorRT inference and return output with timing.""" - context = engine.create_execution_context() - stream = cuda.Stream() - start = cuda.Event() - end = cuda.Event() - - # Get tensor names and shapes - input_name, output_name = None, None - for i in range(engine.num_io_tensors): - tensor_name = engine.get_tensor_name(i) - if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT: - input_name = tensor_name - elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT: - output_name = tensor_name - - if input_name is None or output_name is None: - raise RuntimeError("Could not find input/output tensor names") - - # Prepare arrays - input_np = input_tensor.numpy().astype(np.float32) - if not input_np.flags["C_CONTIGUOUS"]: - input_np = np.ascontiguousarray(input_np) - - context.set_input_shape(input_name, input_np.shape) - output_shape = context.get_tensor_shape(output_name) - output_np = np.empty(output_shape, dtype=np.float32) - if not output_np.flags["C_CONTIGUOUS"]: - output_np = np.ascontiguousarray(output_np) - - # Allocate GPU memory - d_input = cuda.mem_alloc(input_np.nbytes) - d_output = cuda.mem_alloc(output_np.nbytes) - - try: - # Set tensor addresses - context.set_tensor_address(input_name, int(d_input)) - context.set_tensor_address(output_name, int(d_output)) - - # Run inference with timing - cuda.memcpy_htod_async(d_input, input_np, stream) - start.record(stream) - context.execute_async_v3(stream_handle=stream.handle) - end.record(stream) - cuda.memcpy_dtoh_async(output_np, d_output, stream) - stream.synchronize() - - latency = end.time_since(start) - return output_np, latency - - finally: - # Cleanup - try: - d_input.free() - d_output.free() - except: - pass - - -def _compare_outputs( - pytorch_output: np.ndarray, backend_output: np.ndarray, backend_name: str, logger: logging.Logger -) -> bool: - """Compare outputs between PyTorch and backend.""" - if not isinstance(backend_output, np.ndarray): - logger.error(f"Unexpected {backend_name} output type: {type(backend_output)}") - return False - - max_diff = np.abs(pytorch_output - backend_output).max() - mean_diff = np.abs(pytorch_output - backend_output).mean() - - logger.info(f"Output verification:") - logger.info(f" {backend_name} output: {backend_output}") - logger.info(f" Max difference with PyTorch: {max_diff:.6f}") - logger.info(f" Mean difference with PyTorch: {mean_diff:.6f}") - - success = max_diff < DEFAULT_VERIFICATION_TOLERANCE - if not success: - logger.warning(f"Large difference detected: {max_diff:.6f}") - - return success - - -def run_verification( - model: torch.nn.Module, - onnx_path: str, - trt_path: Optional[str], - input_tensors: Dict[str, torch.Tensor], - logger: logging.Logger, -) -> None: - """Run model verification for available backends.""" - _clear_gpu_memory() - - for key, input_tensor in input_tensors.items(): - logger.info("=" * 50) - logger.info(f"Verifying {LABELS[key]} sample...") - logger.info("-" * 50) - logger.info("Verifying PyTorch model...") - pytorch_output, pytorch_latency = run_pytorch_inference(model, input_tensor, logger) - logger.info( - f"PyTorch output for {LABELS[key]}: [SCORE_MISCALIBRATED, SCORE_CALIBRATED] = {pytorch_output.cpu().numpy()}" - ) - score_calibrated = pytorch_output.cpu().numpy()[0, 1] - pytorch_output.cpu().numpy()[0, 0] - if key == "0" and score_calibrated < 0: - logger.info(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") - elif key == "0" and score_calibrated > 0: - logger.warning(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") - elif key == "1" and score_calibrated > 0: - logger.info(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") - elif key == "1" and score_calibrated < 0: - logger.warning(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}") - - if onnx_path and osp.exists(onnx_path): - logger.info("-" * 50) - logger.info("Verifying ONNX model...") - if run_onnx_inference(onnx_path, input_tensor, pytorch_output, logger): - logger.info("ONNX model verification passed!") - else: - logger.error("ONNX model verification failed!") - - if trt_path and osp.exists(trt_path): - logger.info("-" * 50) - logger.info("Verifying TensorRT model...") - if run_tensorrt_inference(trt_path, input_tensor, pytorch_output, logger): - logger.info("TensorRT model verification passed!") - else: - logger.error("TensorRT model verification failed!") - logger.info("=" * 50) - - -# ============================================================================ -# Export Pipeline Helper Functions -# ============================================================================ - - -def validate_and_prepare_paths( - config: DeploymentConfig, work_dir: str, existing_onnx: Optional[str], logger: logging.Logger -) -> Tuple[Optional[str], Optional[str]]: - """Determine and validate export paths for ONNX and TensorRT models. - - Returns: - Tuple of (onnx_path, trt_path) - """ - onnx_path = None - trt_path = None - - if config.should_export_onnx: - onnx_settings = config.onnx_settings - onnx_path = osp.join(work_dir, onnx_settings["save_file"]) - - if config.should_export_tensorrt: - # Use existing ONNX if provided, otherwise use the one we'll export - if existing_onnx and not config.should_export_onnx: - onnx_path = existing_onnx - if not osp.exists(onnx_path): - logger.error(f"Provided ONNX file does not exist: {onnx_path}") - return None, None - logger.info(f"Using existing ONNX file: {onnx_path}") - elif not onnx_path: - # Need ONNX for TensorRT but neither export nor existing file specified - logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.") - return None, None - - # Set TensorRT output path - onnx_settings = config.onnx_settings - trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") - trt_path = osp.join(work_dir, trt_file) - - return onnx_path, trt_path - - -def export_models( - model: torch.nn.Module, - config: DeploymentConfig, - onnx_path: Optional[str], - trt_path: Optional[str], - input_tensor_calibrated: torch.Tensor, - device: torch.device, - logger: logging.Logger, -) -> Tuple[bool, torch.device]: - """Export models to ONNX and/or TensorRT formats. - - Returns: - Tuple of (success, updated_device) - """ - # Export ONNX - if config.should_export_onnx and onnx_path: - export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger) - - # Export TensorRT - if config.should_export_tensorrt and trt_path and onnx_path: - logger.info("Converting ONNX to TensorRT...") - - # Ensure CUDA device for TensorRT - if device.type != "cuda": - logger.warning("TensorRT requires CUDA device, switching to cuda") - device = torch.device("cuda") - - success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger) - if success: - logger.info(f"TensorRT conversion successful: {trt_path}") - else: - logger.error("TensorRT conversion failed") - return False, device - - return True, device - - -def run_model_verification( - model: torch.nn.Module, - config: DeploymentConfig, - onnx_path: Optional[str], - trt_path: Optional[str], - input_tensor_calibrated: torch.Tensor, - input_tensor_miscalibrated: torch.Tensor, - existing_onnx: Optional[str], - logger: logging.Logger, -) -> None: - """Run model verification for exported formats.""" - if not config.should_verify: - return - - logger.info( - "Running verification for miscalibrated and calibrated samples with " - "an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..." - ) - input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated} - - # Only verify formats that were exported or provided - onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None - trt_path_for_verify = trt_path if config.should_export_tensorrt else None - - run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger) diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py index 918bafe7..655d0be5 100644 --- a/projects/CalibrationStatusClassification/deploy/main.py +++ b/projects/CalibrationStatusClassification/deploy/main.py @@ -1,58 +1,70 @@ """ -CalibrationStatusClassification Model Deployment Script +CalibrationStatusClassification Model Deployment Script (Refactored) -This script exports CalibrationStatusClassification models to ONNX and TensorRT formats, -with comprehensive verification and performance benchmarking. +This script exports CalibrationStatusClassification models using the unified +deployment framework, with comprehensive verification and performance benchmarking. Features: - ONNX export with optimization - TensorRT conversion with precision policy support -- Dual verification (ONNX + TensorRT) on single samples -- Full model evaluation on multiple samples with metrics +- Unified verification across backends +- Full model evaluation with metrics - Performance benchmarking with latency statistics - Confusion matrix and per-class accuracy analysis """ +import logging import os.path as osp import mmengine import torch -from config import DeploymentConfig, parse_args, setup_logging -from evaluator import get_models_to_evaluate, run_full_evaluation -from exporters import ( - export_models, - load_sample_data_from_info_pkl, - run_model_verification, - validate_and_prepare_paths, -) from mmengine.config import Config from mmpretrain.apis import get_model +from autoware_ml.deployment.core import BaseDeploymentConfig, parse_base_args, setup_logging, verify_model_outputs +from autoware_ml.deployment.exporters import ONNXExporter, TensorRTExporter +from projects.CalibrationStatusClassification.deploy.data_loader import CalibrationDataLoader +from projects.CalibrationStatusClassification.deploy.evaluator import ( + ClassificationEvaluator, + get_models_to_evaluate, + run_full_evaluation, +) + + +class CalibrationDeploymentConfig(BaseDeploymentConfig): + """Extended configuration for CalibrationStatusClassification deployment.""" + + def __init__(self, deploy_cfg: Config): + super().__init__(deploy_cfg) + # CalibrationStatus-specific config can be added here if needed + def main(): """Main deployment function.""" - args = parse_args() + # Parse arguments + parser = parse_base_args() + args = parser.parse_args() logger = setup_logging(args.log_level) # Load configurations logger.info(f"Loading deploy config from: {args.deploy_cfg}") deploy_cfg = Config.fromfile(args.deploy_cfg) - config = DeploymentConfig(deploy_cfg) + config = CalibrationDeploymentConfig(deploy_cfg) logger.info(f"Loading model config from: {args.model_cfg}") model_cfg = Config.fromfile(args.model_cfg) - # Extract runtime configuration - work_dir = config.work_dir - device = config.device - info_pkl = config.runtime_io_config.get("info_pkl") - sample_idx = config.runtime_io_config.get("sample_idx", 0) - existing_onnx = config.runtime_io_config.get("onnx_file") - export_mode = config.export_config.get("mode", "both") + # Get configuration + work_dir = args.work_dir or config.export_config.work_dir + device = args.device or config.export_config.device + info_pkl = config.runtime_config.get("info_pkl") + sample_idx = config.runtime_config.get("sample_idx", 0) + existing_onnx = config.runtime_config.get("onnx_file") + export_mode = config.export_config.mode # Validate required parameters if not info_pkl: - logger.error("info_pkl path must be provided either in config or via --info-pkl") + logger.error("info_pkl path must be provided in config") return # Setup working directory @@ -69,8 +81,7 @@ def main(): eval_enabled = config.evaluation_config.get("enabled", False) if not eval_enabled: logger.error( - "Configuration error: export mode is 'none' (evaluation-only mode) " - "but evaluation.enabled is False. " + "Configuration error: export mode is 'none' but evaluation.enabled is False. " "Please set evaluation.enabled = True in your config." ) return @@ -82,58 +93,126 @@ def main(): # Export phase if not is_eval_only: + logger.info(f"\n{'='*70}") + logger.info("Starting model export...") + logger.info(f"{'='*70}\n") + # Determine export paths - onnx_path, trt_path = validate_and_prepare_paths(config, work_dir, existing_onnx, logger) - if onnx_path is None and trt_path is None: - return + onnx_path = None + trt_path = None + + onnx_settings = config.get_onnx_settings() + + if config.export_config.should_export_onnx(): + onnx_path = osp.join(work_dir, onnx_settings["save_file"]) + + if config.export_config.should_export_tensorrt(): + if existing_onnx and not config.export_config.should_export_onnx(): + onnx_path = existing_onnx + if not osp.exists(onnx_path): + logger.error(f"Provided ONNX file does not exist: {onnx_path}") + return + elif not onnx_path: + logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.") + return + + trt_file = onnx_settings["save_file"].replace(".onnx", ".engine") + trt_path = osp.join(work_dir, trt_file) # Load model logger.info(f"Loading model from checkpoint: {args.checkpoint}") - device = torch.device(device) - model = get_model(model_cfg, args.checkpoint, device=device) + torch_device = torch.device(device) + model = get_model(model_cfg, args.checkpoint, device=torch_device) - # Load sample data + # Create data loaders for calibrated and miscalibrated samples logger.info(f"Loading sample data from info.pkl: {info_pkl}") - input_tensor_calibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 0.0, sample_idx, device=device) - input_tensor_miscalibrated = load_sample_data_from_info_pkl( - info_pkl, model_cfg, 1.0, sample_idx, device=device + data_loader_calibrated = CalibrationDataLoader( + info_pkl_path=info_pkl, + model_cfg=model_cfg, + miscalibration_probability=0.0, + device=device, + ) + data_loader_miscalibrated = CalibrationDataLoader( + info_pkl_path=info_pkl, + model_cfg=model_cfg, + miscalibration_probability=1.0, + device=device, ) - # Export models - success, device = export_models(model, config, onnx_path, trt_path, input_tensor_calibrated, device, logger) - if not success: - logger.error("Export failed") - return + # Load sample inputs + input_tensor_calibrated = data_loader_calibrated.load_and_preprocess(sample_idx) + input_tensor_miscalibrated = data_loader_miscalibrated.load_and_preprocess(sample_idx) + + # Export ONNX + if config.export_config.should_export_onnx() and onnx_path: + logger.info(f"\nExporting to ONNX...") + onnx_exporter = ONNXExporter(onnx_settings, logger) + success = onnx_exporter.export(model, input_tensor_calibrated, onnx_path) + if success: + logger.info(f"✓ ONNX export successful: {onnx_path}") + onnx_exporter.validate_export(onnx_path) + else: + logger.error("✗ ONNX export failed") + return + + # Export TensorRT + if config.export_config.should_export_tensorrt() and trt_path and onnx_path: + logger.info(f"\nExporting to TensorRT...") + trt_settings = config.get_tensorrt_settings() + trt_exporter = TensorRTExporter(trt_settings, logger) + success = trt_exporter.export(model, input_tensor_calibrated, trt_path, onnx_path=onnx_path) + if success: + logger.info(f"✓ TensorRT export successful: {trt_path}") + else: + logger.error("✗ TensorRT export failed") + return + + # Run verification if requested + if config.export_config.verify: + logger.info(f"\n{'='*70}") + logger.info("Running model verification...") + logger.info(f"{'='*70}\n") + + test_inputs = { + "miscalibrated_sample": input_tensor_miscalibrated, + "calibrated_sample": input_tensor_calibrated, + } + + verify_onnx = ( + onnx_path if config.export_config.should_export_onnx() else (existing_onnx if existing_onnx else None) + ) + verify_trt = trt_path if config.export_config.should_export_tensorrt() else None + + verification_results = verify_model_outputs( + pytorch_model=model, + test_inputs=test_inputs, + onnx_path=verify_onnx, + tensorrt_path=verify_trt, + device=device, + logger=logger, + ) - # Update tensors if device changed - if device.type == "cuda": - input_tensor_calibrated = input_tensor_calibrated.to(device) - input_tensor_miscalibrated = input_tensor_miscalibrated.to(device) - - # Run verification - run_model_verification( - model, - config, - onnx_path, - trt_path, - input_tensor_calibrated, - input_tensor_miscalibrated, - existing_onnx, - logger, - ) + # Check if all verifications passed + all_passed = all(verification_results.values()) + if all_passed: + logger.info("✓ All verifications PASSED") + else: + logger.warning("⚠ Some verifications FAILED") # Log exported formats exported_formats = [] - if config.should_export_onnx: + if config.export_config.should_export_onnx(): exported_formats.append("ONNX") - if config.should_export_tensorrt: + if config.export_config.should_export_tensorrt(): exported_formats.append("TensorRT") if exported_formats: - logger.info(f"Exported formats: {', '.join(exported_formats)}") + logger.info(f"\nExported formats: {', '.join(exported_formats)}") + logger.info(f"\n{'='*70}") logger.info("Deployment completed successfully!") + logger.info(f"{'='*70}\n") else: - logger.info("Evaluation-only mode: Skipping model loading and export") + logger.info("Evaluation-only mode: Skipping model loading and export\n") # Evaluation phase eval_cfg = config.evaluation_config @@ -142,20 +221,24 @@ def main(): verbose_mode = eval_cfg.get("verbose", False) if should_evaluate: - logger.info(f"\n{'='*60}") + logger.info(f"\n{'='*70}") logger.info("Starting full model evaluation...") - logger.info(f"{'='*60}") + logger.info(f"{'='*70}\n") models_to_evaluate = get_models_to_evaluate(eval_cfg, logger) - run_full_evaluation( - models_to_evaluate, - model_cfg, - info_pkl, - device, - num_samples, - verbose_mode, - logger, - ) + + if models_to_evaluate: + run_full_evaluation( + models_to_evaluate, + model_cfg, + info_pkl, + device, + num_samples, + verbose_mode, + logger, + ) + else: + logger.warning("No models found for evaluation") if __name__ == "__main__": diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md index f170e90d..d80f8480 100644 --- a/tools/calibration_classification/README.md +++ b/tools/calibration_classification/README.md @@ -266,8 +266,10 @@ evaluation = dict( enabled=False, # Enable evaluation by default num_samples=100, # Number of samples to evaluate verbose=False, # Enable detailed logging - onnx_path=None, # Optional: path to existing ONNX model - tensorrt_path=None, # Optional: path to existing TensorRT engine + models=dict( + onnx=None, # Optional: path to existing ONNX model + tensorrt=None, # Optional: path to existing TensorRT engine + ), ) ``` From f6f0942e42160c7ebb59fcb48764cee6ca8341d6 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 8 Oct 2025 14:35:41 +0900 Subject: [PATCH 18/19] fix: fallback to cpu if cuda fail due to gpu issue Signed-off-by: vividf --- .../deployment/backends/onnx_backend.py | 43 ++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/autoware_ml/deployment/backends/onnx_backend.py b/autoware_ml/deployment/backends/onnx_backend.py index 3fa51d82..f8e0ed37 100644 --- a/autoware_ml/deployment/backends/onnx_backend.py +++ b/autoware_ml/deployment/backends/onnx_backend.py @@ -1,5 +1,6 @@ """ONNX Runtime inference backend.""" +import logging import os import time from typing import Tuple @@ -28,6 +29,8 @@ def __init__(self, model_path: str, device: str = "cpu"): """ super().__init__(model_path, device) self._session = None + self._fallback_attempted = False + self._logger = logging.getLogger(__name__) def load_model(self) -> None: """Load ONNX model.""" @@ -37,12 +40,15 @@ def load_model(self) -> None: # Select execution provider based on device if self.device.startswith("cuda"): providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] + self._logger.info("Attempting to use CUDA acceleration (will fallback to CPU if needed)...") else: providers = ["CPUExecutionProvider"] + self._logger.info("Using CPU for ONNX inference") try: self._session = ort.InferenceSession(self.model_path, providers=providers) self._model = self._session # For is_loaded check + self._logger.info(f"ONNX session using providers: {self._session.get_providers()}") except Exception as e: raise RuntimeError(f"Failed to load ONNX model: {e}") @@ -65,16 +71,41 @@ def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]: input_name = self._session.get_inputs()[0].name onnx_input = {input_name: input_array} - # Run inference - start_time = time.perf_counter() - output = self._session.run(None, onnx_input)[0] - end_time = time.perf_counter() + try: + # Run inference + start_time = time.perf_counter() + output = self._session.run(None, onnx_input)[0] + end_time = time.perf_counter() + + latency_ms = (end_time - start_time) * 1000 + + return output, latency_ms + except Exception as e: + # Check if this is a CUDA/PTX error and we haven't tried CPU fallback yet + if ("PTX" in str(e) or "CUDA" in str(e)) and not self._fallback_attempted: + self._logger.warning(f"CUDA runtime error detected: {e}") + self._logger.warning("Recreating session with CPU provider...") + self._fallback_attempted = True + + # Recreate session with CPU provider + self._session = ort.InferenceSession(self.model_path, providers=["CPUExecutionProvider"]) + self._logger.info(f"Session recreated with providers: {self._session.get_providers()}") + + # Retry inference with CPU + input_name = self._session.get_inputs()[0].name + onnx_input = {input_name: input_array} + start_time = time.perf_counter() + output = self._session.run(None, onnx_input)[0] + end_time = time.perf_counter() - latency_ms = (end_time - start_time) * 1000 + latency_ms = (end_time - start_time) * 1000 - return output, latency_ms + return output, latency_ms + else: + raise def cleanup(self) -> None: """Clean up ONNX Runtime resources.""" self._session = None self._model = None + self._fallback_attempted = False From c977363f6f0158b156cd8a9fc44d6f1a38914791 Mon Sep 17 00:00:00 2001 From: vividf Date: Wed, 8 Oct 2025 17:21:45 +0900 Subject: [PATCH 19/19] chore: add deployment readme Signed-off-by: vividf --- autoware_ml/deployment/README.md | 352 +++++++++++++++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100644 autoware_ml/deployment/README.md diff --git a/autoware_ml/deployment/README.md b/autoware_ml/deployment/README.md new file mode 100644 index 00000000..ddfe7936 --- /dev/null +++ b/autoware_ml/deployment/README.md @@ -0,0 +1,352 @@ +# Autoware ML Deployment Framework + +A unified, task-agnostic deployment framework for exporting, verifying, and evaluating machine learning models across different backends (ONNX, TensorRT) with comprehensive support for model validation and performance benchmarking. + +## Table of Contents + +- [Overview](#overview) +- [Features](#features) +- [Current Support](#current-support) +- [Architecture](#architecture) +- [Quick Start](#quick-start) +- [Usage Guide](#usage-guide) + - [Basic Export](#basic-export) + - [Export with Verification](#export-with-verification) + - [Export with Full Evaluation](#export-with-full-evaluation) + - [Evaluation Only Mode](#evaluation-only-mode) +- [Configuration Reference](#configuration-reference) + +## Overview + +The Autoware ML Deployment Framework provides a standardized pipeline for deploying trained models to production-ready inference backends. It handles the complete deployment workflow from model export to validation and performance analysis, with a focus on ensuring model quality and correctness across different deployment targets. + +### Key Capabilities + +- **Multi-Backend Export**: Export models to ONNX and TensorRT formats +- **Precision Policy Support**: Flexible precision policies (FP32, FP16, TF32, INT8) +- **Automated Verification**: Cross-backend output validation to ensure correctness +- **Performance Benchmarking**: Comprehensive latency and throughput analysis +- **Full Evaluation**: Complete model evaluation with metrics and confusion matrices +- **Modular Design**: Easy to extend for new tasks and backends + + + +## Current Support + +### Detection 3D +* [ ] BEVFusion +* [ ] CenterPoint +* [ ] TransFusion +* [ ] StreamPETR + +### Detection 2D +* [ ] YOLOX +* [ ] YOLOX_opt (Traffic Light Detection) +* [ ] FRNet +* [ ] GLIP (Grounded Language-Image Pre-training) + +### Classification +* [X] CalibrationStatusClassification +* [ ] MobileNetv2 (Traffic Light Classification) + +### Backbones & Components +* [ ] SwinTransformer +* [ ] ConvNeXt_PC (Point Cloud) +* [ ] SparseConvolution + +### Multimodal +* [ ] BLIP-2 (Vision-Language Model) + +> **Note**: Currently, only **CalibrationStatusClassification** has full deployment framework support. Other models may have custom deployment scripts in their respective project directories but are not yet integrated with the unified deployment framework. + +### Supported Backends + +| Backend | Export | Inference | Verification | +|---------|--------|-----------|--------------| +| **ONNX** | ✅ | ✅ | ✅ | +| **TensorRT** | ✅ | ✅ | ✅ | + +## Architecture + +The deployment framework follows a modular architecture: + +``` +autoware_ml/deployment/ +├── core/ # Core abstractions +│ ├── base_config.py # Configuration management +│ ├── base_data_loader.py # Data loading interface +│ ├── base_evaluator.py # Evaluation interface +│ └── verification.py # Cross-backend verification +├── backends/ # Backend implementations +│ ├── pytorch_backend.py # PyTorch inference +│ ├── onnx_backend.py # ONNX Runtime inference +│ └── tensorrt_backend.py # TensorRT inference +└── exporters/ # Export implementations + ├── onnx_exporter.py # ONNX export + └── tensorrt_exporter.py # TensorRT export +``` + +### Design Principles + +1. **Task-Agnostic Core**: Base classes are independent of specific tasks +2. **Backend Abstraction**: Unified interface across different inference backends +3. **Extensibility**: Easy to add new tasks, backends, or exporters +4. **Configuration-Driven**: All settings managed through Python config files +5. **Comprehensive Validation**: Built-in verification at every step + + +## Quick Start + +Here's a minimal example to export and verify a calibration classification model: + +```bash +# Export to both ONNX and TensorRT with verification +python projects/CalibrationStatusClassification/deploy/main.py \ + deploy_config.py \ + model_config.py \ + checkpoint.pth \ + --work-dir work_dirs/deployment +``` + + +## Usage Guide + +### Basic Export + +Export a model to ONNX format: + +**1. Create deployment config** (`deploy_config_onnx.py`): + +```python +export = dict( + mode='onnx', # Export mode: 'onnx', 'trt', 'both', 'none' + verify=False, # Skip verification + device='cuda:0', # Device for export + work_dir='work_dirs/deployment' +) + +# Runtime I/O settings +runtime_io = dict( + info_pkl='path/to/info.pkl', # Dataset info file + sample_idx=0 # Sample index for export +) + +# ONNX configuration +onnx_config = dict( + opset_version=16, + do_constant_folding=True, + input_names=['input'], + output_names=['output'], + save_file='model.onnx', + dynamic_axes={ + 'input': {0: 'batch_size'}, + 'output': {0: 'batch_size'} + } +) + +# Backend configuration +backend_config = dict( + common_config=dict( + precision_policy='auto', # Options: 'auto', 'fp16', 'fp32_tf32', 'int8' + max_workspace_size=1 << 30 # 1 GB for TensorRT + ) +) +``` + +**2. Run export**: + +```bash +python projects/CalibrationStatusClassification/deploy/main.py \ + deploy_config_onnx.py \ + path/to/model_config.py \ + path/to/checkpoint.pth +``` + +### Export with Verification + +Verify that exported models produce correct outputs: + +**Update config**: + +```python +export = dict( + mode='both', # Export to both ONNX and TensorRT + verify=True, # Enable verification + device='cuda:0', + work_dir='work_dirs/deployment' +) + +# ... rest of config ... +``` + +**Run with verification**: + +```bash +python projects/CalibrationStatusClassification/deploy/main.py \ + deploy_config_verify.py \ + path/to/model_config.py \ + path/to/checkpoint.pth +``` + +### Export with Full Evaluation + +Perform complete model evaluation on a validation dataset: + +**Update config** (`deploy_config_eval.py`): + +```python +export = dict( + mode='both', + verify=True, + device='cuda:0', + work_dir='work_dirs/deployment' +) + +# Enable evaluation +evaluation = dict( + enabled=True, + num_samples=1000, # Number of samples to evaluate + verbose=False, # Set True for detailed per-sample output + models_to_evaluate=[ + 'pytorch', # Evaluate PyTorch model + 'onnx', # Evaluate ONNX model + 'tensorrt' # Evaluate TensorRT model + ] +) + +# ... rest of config ... +``` + +**Run with evaluation**: + +```bash +python projects/CalibrationStatusClassification/deploy/main.py \ + deploy_config_eval.py \ + path/to/model_config.py \ + path/to/checkpoint.pth +``` + +**Output includes**: +- Per-model accuracy and performance metrics +- Confusion matrices for each backend +- Latency statistics (min, max, mean, median, p95, p99) +- Per-class accuracy breakdown + +### Evaluation Only Mode + +Run evaluation without exporting (useful for testing existing deployments): + +**Config** (`deploy_config_eval_only.py`): + +```python +export = dict( + mode='none', # Skip export + device='cuda:0', + work_dir='work_dirs/deployment' +) + +evaluation = dict( + enabled=True, + num_samples=1000, + models_to_evaluate=['onnx', 'tensorrt'] # Evaluate existing models +) + +runtime_io = dict( + info_pkl='path/to/info.pkl', + onnx_file='work_dirs/deployment/model.onnx' # Path to existing ONNX +) + +# ... rest of config ... +``` + +**Run**: + +```bash +# No checkpoint needed in eval-only mode +python projects/CalibrationStatusClassification/deploy/main.py \ + deploy_config_eval_only.py \ + path/to/model_config.py +``` + +## Configuration Reference + +### Export Configuration + +```python +export = dict( + mode='both', # 'onnx', 'trt', 'both', 'none' + verify=True, # Enable cross-backend verification + device='cuda:0', # Device for export/inference + work_dir='work_dirs' # Output directory +) +``` + +### Runtime I/O Configuration + +```python +runtime_io = dict( + info_pkl='path/to/dataset/info.pkl', # Required: dataset info file + sample_idx=0, # Sample index for export + onnx_file='path/to/existing/model.onnx' # Optional: use existing ONNX +) +``` + +### ONNX Configuration + +```python +onnx_config = dict( + opset_version=16, # ONNX opset version + do_constant_folding=True, # Enable constant folding optimization + input_names=['input'], # Input tensor names + output_names=['output'], # Output tensor names + save_file='model.onnx', # Output filename + export_params=True, # Export model parameters + dynamic_axes={ # Dynamic dimensions + 'input': {0: 'batch_size'}, + 'output': {0: 'batch_size'} + }, + keep_initializers_as_inputs=False # ONNX optimization +) +``` + +### Backend Configuration + +```python +backend_config = dict( + common_config=dict( + precision_policy='fp16', # Precision policy (see below) + max_workspace_size=1 << 30 # TensorRT workspace size (bytes) + ), + model_inputs=[ # Optional: input specifications + dict( + name='input', + shape=(1, 5, 512, 512), + dtype='float32' + ) + ] +) +``` + +### Precision Policies + +| Policy | Description | Use Case | +|--------|-------------|----------| +| `auto` | Let TensorRT decide | Default, balanced performance | +| `fp16` | Half precision (FP16) | 2x faster, ~same accuracy | +| `fp32_tf32` | TensorFlow 32 (TF32) | Good balance for Ampere+ GPUs | +| `strongly_typed` | Strict type enforcement | For debugging | + +### Evaluation Configuration + +```python +evaluation = dict( + enabled=True, # Enable evaluation + num_samples=1000, # Number of samples to evaluate + verbose=False, # Detailed per-sample output + models_to_evaluate=[ # Backends to evaluate + 'pytorch', + 'onnx', + 'tensorrt' + ] +) +```