From 9535bb24a797a1bff0830477a1ba0c4082dff014 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Fri, 26 Sep 2025 16:54:38 +0900
Subject: [PATCH 01/19] feat: add onnx and tensorrt evaluation and int8
 optimization

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../DockerfileOpt                             |  38 +
 .../configs/deploy/resnet18_5ch.py            |   6 +-
 .../deploy/main.py                            | 113 ++-
 tools/calibration_classification/README.md    |  72 +-
 .../evaluate_inference.py                     | 667 ++++++++++++++++++
 .../visualize_lidar_camera_projection.py      | 197 +++++-
 6 files changed, 1033 insertions(+), 60 deletions(-)
 create mode 100644 projects/CalibrationStatusClassification/DockerfileOpt
 create mode 100644 tools/calibration_classification/evaluate_inference.py

diff --git a/projects/CalibrationStatusClassification/DockerfileOpt b/projects/CalibrationStatusClassification/DockerfileOpt
new file mode 100644
index 00000000..e0510174
--- /dev/null
+++ b/projects/CalibrationStatusClassification/DockerfileOpt
@@ -0,0 +1,38 @@
+
+# Use an NVIDIA CUDA base image with Python
+FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
+
+# Prevents prompts during apt installs
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+# Update system and install basic dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 python3-dev python3-pip \
+    build-essential git curl ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+# Upgrade pip
+RUN python3 -m pip install --upgrade pip setuptools wheel
+
+
+RUN pip3 install --no-cache-dir \
+    "numpy<2" \
+    "protobuf<5" \
+    "onnx==1.16.2" \
+    "onnxruntime==1.23.0" \
+    "ml_dtypes==0.5.3"
+
+# Install nvidia-modelopt from NVIDIA PyPI
+# RUN pip3 install --no-cache-dir --extra-index-url https://pypi.nvidia.com nvidia-modelopt
+RUN pip3 install --no-cache-dir --extra-index-url https://pypi.nvidia.com \
+    onnx-graphsurgeon \
+    nvidia-modelopt==0.33.1
+
+
+# Set working directory
+WORKDIR /workspace
+
+# Default command
+CMD ["/bin/bash"]
diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
index 0318272b..a32b3823 100644
--- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
@@ -2,7 +2,11 @@
 
 backend_config = dict(
     type="tensorrt",
-    common_config=dict(max_workspace_size=1 << 30),
+    common_config=dict(
+        max_workspace_size=1 << 30,
+        fp16_mode=True,
+        strongly_typed=False,
+    ),
     model_inputs=[
         dict(
             input_shapes=dict(
diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index cef136d7..8a19b20a 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -179,6 +179,7 @@ def tensorrt_settings(self) -> Dict:
             "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE),
             "fp16_mode": common_config.get("fp16_mode", False),
             "model_inputs": self.backend_config.get("model_inputs", []),
+            "strongly_typed": common_config.get("strongly_typed", False),
         }
 
 
@@ -197,6 +198,11 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--device", default="cpu", help="device for conversion")
     parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="logging level")
     parser.add_argument("--verify", action="store_true", help="verify model outputs")
+    parser.add_argument("--onnx", action="store_true", help="export to ONNX format")
+    parser.add_argument("--tensorrt", action="store_true", help="export to TensorRT format")
+    parser.add_argument(
+        "--onnx-file", help="path to existing ONNX file (required when --tensorrt is specified without --onnx)"
+    )
     return parser.parse_args()
 
 
@@ -267,7 +273,17 @@ def export_to_tensorrt(
     builder = trt.Builder(TRT_LOGGER)
 
     # Create network and config
-    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+    strongly_typed = settings.get("strongly_typed", False)
+
+    if strongly_typed:
+        flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) | (
+            1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
+        )
+        network = builder.create_network(flags)
+        logger.info("Using strongly typed TensorRT network creation")
+    else:
+        network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+        logger.info("Using standard TensorRT network creation (EXPLICIT_BATCH only)")
     config_trt = builder.create_builder_config()
     config_trt.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"])
 
@@ -563,6 +579,18 @@ def main():
     args = parse_args()
     logger = setup_logging(args.log_level)
 
+    # Check if at least one export format is specified
+    if not args.onnx and not args.tensorrt:
+        logger.error("Please specify at least one export format: --onnx or --tensorrt")
+        return
+
+    # Check if TensorRT is requested but no ONNX file is provided and ONNX export is not requested
+    if args.tensorrt and not args.onnx and not args.onnx_file:
+        logger.error(
+            "TensorRT export requires either --onnx (to export ONNX first) or --onnx-file (to use existing ONNX file)"
+        )
+        return
+
     # Setup
     mmengine.mkdir_or_exist(osp.abspath(args.work_dir))
 
@@ -575,12 +603,35 @@ def main():
     model_cfg = Config.fromfile(args.model_cfg)
 
     # Setup file paths
-    onnx_settings = config.onnx_settings
-    onnx_path = osp.join(args.work_dir, onnx_settings["save_file"])
-    trt_path: Optional[str] = None
-    trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
-    trt_path = osp.join(args.work_dir, trt_file)
-
+    onnx_path = None
+    trt_path = None
+    trt_file = None
+
+    if args.onnx:
+        onnx_settings = config.onnx_settings
+        onnx_path = osp.join(args.work_dir, onnx_settings["save_file"])
+
+    if args.tensorrt:
+        if args.onnx_file:
+            # Use provided ONNX file path
+            onnx_path = args.onnx_file
+            if not osp.exists(onnx_path):
+                logger.error(f"Provided ONNX file does not exist: {onnx_path}")
+                return
+            logger.info(f"Using existing ONNX file: {onnx_path}")
+        elif args.onnx:
+            # ONNX will be exported first, use the path from config
+            onnx_settings = config.onnx_settings
+            onnx_path = osp.join(args.work_dir, onnx_settings["save_file"])
+        else:
+            # This should not happen due to validation above, but just in case
+            logger.error("No ONNX file path available for TensorRT conversion")
+            return
+
+        # Set TensorRT output path
+        onnx_settings = config.onnx_settings
+        trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
+        trt_path = osp.join(args.work_dir, trt_file)
     # Load model
     logger.info(f"Loading model from checkpoint: {args.checkpoint}")
     device = torch.device(args.device)
@@ -596,22 +647,23 @@ def main():
     )
 
     # Export models
-    export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger)
-
-    logger.info("Converting ONNX to TensorRT...")
-
-    # Ensure CUDA device for TensorRT
-    if args.device == "cpu":
-        logger.warning("TensorRT requires CUDA device, switching to cuda")
-        device = torch.device("cuda")
-        input_tensor_calibrated = input_tensor_calibrated.to(device)
-        input_tensor_miscalibrated = input_tensor_miscalibrated.to(device)
-
-    success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger)
-    if success:
-        logger.info(f"TensorRT conversion successful: {trt_path}")
-    else:
-        logger.error("TensorRT conversion failed, keeping ONNX model")
+    if args.onnx:
+        export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger)
+    if args.tensorrt:
+        logger.info("Converting ONNX to TensorRT...")
+
+        # Ensure CUDA device for TensorRT
+        if args.device == "cpu":
+            logger.warning("TensorRT requires CUDA device, switching to cuda")
+            device = torch.device("cuda")
+            input_tensor_calibrated = input_tensor_calibrated.to(device)
+            input_tensor_miscalibrated = input_tensor_miscalibrated.to(device)
+
+        success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger)
+        if success:
+            logger.info(f"TensorRT conversion successful: {trt_path}")
+        else:
+            logger.error("TensorRT conversion failed")
 
     # Run verification if requested
     if args.verify:
@@ -619,10 +671,23 @@ def main():
             "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..."
         )
         input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated}
-        run_verification(model, onnx_path, trt_path, input_tensors, logger)
+        # Only verify formats that were exported
+        onnx_path_for_verify = onnx_path if (args.onnx or (args.tensorrt and args.onnx_file)) else None
+        trt_path_for_verify = trt_path if args.tensorrt else None
+
+        run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger)
 
     logger.info("Deployment completed successfully!")
 
+    # Log what was exported
+    exported_formats = []
+    if args.onnx:
+        exported_formats.append("ONNX")
+    if args.tensorrt:
+        exported_formats.append("TensorRT")
+
+    logger.info(f"Exported formats: {', '.join(exported_formats)}")
+
 
 # TODO: make deployment script inherit from awml base deploy script or use awml deploy script directly
 if __name__ == "__main__":
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index 33fffda8..11126fdf 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -223,7 +223,7 @@ tensorboard --logdir work_dirs --bind_all
 ```
 
 ## 5. Analyze
-### 5.1. Evaluation
+### 5.1. Evaluation Pytorch
 
 - Evaluation
 
@@ -234,3 +234,73 @@ python tools/calibration_classification/test.py {config_file} {checkpoint_file}
 ```sh
 python tools/calibration_classification/test.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py  epoch_25.pth --out {output_file}
 ```
+
+
+
+## 6. Deployment
+This directory contains scripts for evaluating calibration classification models in different formats.
+
+### 6.1 Convert to ONNX and TensorRT
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt
+```
+
+### 6.1.1 Convert to ONNX
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx
+```
+
+### 6.1.2 Convert to TensorRT
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify  --tensorrt --onnx-file model.onnx
+```
+
+
+
+
+### 6.2 Evaluate ONNX and tensorrt
+
+##### ONNX Model Evaluation
+```bash
+python tools/calibration_classification/evaluate_inference.py --onnx work_dirs/end2end.onnx --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
+```
+
+##### TensorRT Model Evaluation
+```bash
+python tools/calibration_classification/evaluate_inference.py --tensorrt work_dirs/end2end.quant.engine --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
+```
+
+
+
+
+### 6.3 INT8 Optimization
+
+Set the number of image you want for calibration. Note that you need to consider the size of your memory.
+For 32 GB, the most you can do is 1860 x 2880 x 5 x 4 Bytes / 32 GB
+
+Thus, please limit the calibration data with the indices parameters
+
+```python
+python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200
+```
+
+
+
+```python
+DOCKER_BUILDKIT=1 docker build -t autoware-ml-calib-opt -f projects/CalibrationStatusClassification/DockerfileOpt .
+docker run -it --rm --gpus all --shm-size=32g --name awml-opt -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-calib-opt
+# Access the optimization docker
+python3 -m modelopt.onnx.quantization --onnx_path=end2end.onnx --quantize_mode=int8 --calibration_data_path=calibration_file.npz
+
+# After getting the end2en2.quant.onnx, you can evaluate with quant onnx or engine
+```
+
+
+
+#### Command Line Arguments
+
+- `--onnx`: Path to ONNX model file (mutually exclusive with `--tensorrt`)
+- `--tensorrt`: Path to TensorRT engine file (mutually exclusive with `--onnx`)
+- `--model-cfg`: Path to model config file (default: `projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py`)
+- `--device`: Device to use for inference (`cpu` or `cuda`, default: `cpu`)
+- `--log-level`: Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, default: `INFO`)
diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py
new file mode 100644
index 00000000..b367e0d6
--- /dev/null
+++ b/tools/calibration_classification/evaluate_inference.py
@@ -0,0 +1,667 @@
+#!/usr/bin/env python3
+"""
+Integrated model evaluation script for calibration classification.
+Supports both ONNX and TensorRT inference modes.
+Simplified version that directly uses info.pkl files instead of dataset.
+"""
+
+import argparse
+import gc
+import logging
+import os
+import pickle
+import signal
+import sys
+import time
+from typing import Any, Dict, Optional, Tuple
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from mmengine.config import Config
+
+# Now we can import the class directly
+from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import (
+    CalibrationClassificationTransform,
+)
+
+# TensorRT imports (only if needed) - match original test_tensorrt.py
+try:
+    import pycuda.autoinit
+    import pycuda.driver as cuda
+    import tensorrt as trt
+
+    TENSORRT_AVAILABLE = True
+except ImportError:
+    TENSORRT_AVAILABLE = False
+
+# Constants
+LABELS = {"0": "miscalibrated", "1": "calibrated"}
+
+
+def signal_handler(signum, frame):
+    """Handle segmentation faults and other signals gracefully."""
+    print(f"\nReceived signal {signum}. Cleaning up...")
+    try:
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        gc.collect()
+        print("Cleanup completed.")
+    except:
+        pass
+    sys.exit(1)
+
+
+# Register signal handlers
+signal.signal(signal.SIGSEGV, signal_handler)
+signal.signal(signal.SIGINT, signal_handler)
+
+
+def setup_logging(level: str = "INFO") -> logging.Logger:
+    """Setup logging configuration."""
+    logging.basicConfig(level=getattr(logging, level.upper()), format="%(asctime)s - %(levelname)s - %(message)s")
+    return logging.getLogger(__name__)
+
+
+def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]:
+    """
+    Load a single sample from info.pkl file.
+    Args:
+        info_pkl_path: Path to the info.pkl file
+        sample_idx: Index of the sample to load (default: 0)
+    Returns:
+        Sample dictionary with the required structure for CalibrationClassificationTransform
+    Raises:
+        FileNotFoundError: If info.pkl file doesn't exist
+        ValueError: If data format is unexpected or sample index is invalid
+    """
+    if not os.path.exists(info_pkl_path):
+        raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}")
+
+    try:
+        with open(info_pkl_path, "rb") as f:
+            info_data = pickle.load(f)
+    except Exception as e:
+        raise ValueError(f"Failed to load info.pkl file: {e}")
+
+    # Extract samples from info.pkl
+    if isinstance(info_data, dict):
+        if "data_list" in info_data:
+            samples_list = info_data["data_list"]
+        else:
+            raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}")
+    else:
+        raise ValueError(f"Expected dict format, got {type(info_data)}")
+
+    if not samples_list:
+        raise ValueError("No samples found in info.pkl")
+
+    if sample_idx >= len(samples_list):
+        raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})")
+
+    sample = samples_list[sample_idx]
+
+    # Validate sample structure
+    required_keys = ["image", "lidar_points"]
+    if not all(key in sample for key in required_keys):
+        raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}")
+
+    return sample
+
+
+def load_sample_data_from_info_pkl(
+    info_pkl_path: str,
+    model_cfg: Config,
+    sample_idx: int = 0,
+    force_generate_miscalibration: bool = False,
+    device: str = "cpu",
+    transform_test: Optional[CalibrationClassificationTransform] = None,
+) -> torch.Tensor:
+    """
+    Load and preprocess sample data from info.pkl using CalibrationClassificationTransform.
+    Args:
+        info_pkl_path: Path to the info.pkl file
+        model_cfg: Model configuration containing data_root setting
+        sample_idx: Index of the sample to load (default: 0)
+        force_generate_miscalibration: Whether to force generation of miscalibration
+        device: Device to load tensor on
+        transform_test: Pre-created test transform instance (optional)
+    Returns:
+        Preprocessed tensor ready for model inference
+    """
+    # Load sample data from info.pkl
+    sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
+
+    # Get data_root from model config
+    data_root = model_cfg.get("data_root", None)
+    if data_root is None:
+        raise ValueError("data_root not found in model configuration")
+
+    # Use pre-created transform or create new one (always use test mode)
+    if transform_test is None:
+        transform = CalibrationClassificationTransform(
+            transform_config=model_cfg.get("transform_config", None),
+            mode="test",
+            data_root=data_root,
+            projection_vis_dir=None,
+            results_vis_dir=None,
+            enable_augmentation=False,
+        )
+    else:
+        transform = transform_test
+
+    # Apply transform with miscalibration control
+    results = transform.transform(sample_data, force_generate_miscalibration=force_generate_miscalibration)
+    input_data_processed = results["fused_img"]  # (H, W, 5)
+
+    # Convert to tensor
+    input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float()  # (5, H, W)
+    input_tensor = input_tensor.unsqueeze(0).to(device)  # (1, 5, H, W)
+
+    return input_tensor
+
+
+def create_test_transform(model_cfg: Config) -> CalibrationClassificationTransform:
+    """Create test transform instance once for reuse."""
+    data_root = model_cfg.get("data_root", None)
+    if data_root is None:
+        raise ValueError("data_root not found in model configuration")
+
+    transform_test = CalibrationClassificationTransform(
+        transform_config=model_cfg.get("transform_config", None),
+        mode="test",
+        data_root=data_root,
+        projection_vis_dir=None,
+        results_vis_dir=None,
+        enable_augmentation=False,
+    )
+
+    return transform_test
+
+
+def run_onnx_inference(
+    onnx_path: str,
+    input_tensor: torch.Tensor,
+    logger: logging.Logger,
+) -> Tuple[np.ndarray, float]:
+    """Run ONNX inference directly and return output and latency."""
+    try:
+        import onnxruntime as ort
+    except ImportError:
+        raise ImportError(
+            "onnxruntime is required for ONNX inference. Please install it with: pip install onnxruntime"
+        )
+
+    # Clear GPU cache if available
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+
+    # Convert input tensor to float32
+    input_tensor = input_tensor.float()
+
+    # Debug: Print input tensor info before preprocessing
+    logger.debug(
+        f"Input tensor before preprocessing - Shape: {input_tensor.shape}, Dtype: {input_tensor.dtype}, Min: {input_tensor.min():.4f}, Max: {input_tensor.max():.4f}"
+    )
+
+    # Add batch dimension if needed (ONNX expects 4D input: batch, channels, height, width)
+    if input_tensor.dim() == 3:
+        input_tensor = input_tensor.unsqueeze(0)  # Add batch dimension
+        logger.debug(f"Added batch dimension - Shape: {input_tensor.shape}")
+
+    # ONNX inference with timing
+    providers = ["CPUExecutionProvider"]
+    ort_session = ort.InferenceSession(onnx_path, providers=providers)
+
+    # Debug: Print ONNX model input info
+    input_name = ort_session.get_inputs()[0].name
+    input_shape = ort_session.get_inputs()[0].shape
+    input_type = ort_session.get_inputs()[0].type
+    logger.debug(f"ONNX model expects - Input name: {input_name}, Shape: {input_shape}, Type: {input_type}")
+
+    onnx_input = {input_name: input_tensor.cpu().numpy().astype(np.float32)}
+
+    start_time = time.perf_counter()
+    onnx_output = ort_session.run(None, onnx_input)[0]
+    end_time = time.perf_counter()
+    onnx_latency = (end_time - start_time) * 1000
+
+    logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms")
+
+    # Ensure onnx_output is numpy array
+    if not isinstance(onnx_output, np.ndarray):
+        logger.error(f"Unexpected ONNX output type: {type(onnx_output)}")
+        return None, 0.0
+
+    return onnx_output, onnx_latency
+
+
+def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]:
+    """Run TensorRT inference and return output with timing."""
+    if not TENSORRT_AVAILABLE:
+        raise ImportError("TensorRT and PyCUDA are required for TensorRT inference. Please install them.")
+
+    context = None
+    stream = None
+    start = None
+    end = None
+    d_input = None
+    d_output = None
+
+    try:
+        context = engine.create_execution_context()
+        stream = cuda.Stream()
+        start = cuda.Event()
+        end = cuda.Event()
+
+        # Get tensor names and shapes
+        input_name, output_name = None, None
+        for i in range(engine.num_io_tensors):
+            tensor_name = engine.get_tensor_name(i)
+            if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
+                input_name = tensor_name
+            elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT:
+                output_name = tensor_name
+
+        if input_name is None or output_name is None:
+            raise RuntimeError("Could not find input/output tensor names")
+
+        # Prepare arrays
+        input_np = input_tensor.numpy().astype(np.float32)
+        if not input_np.flags["C_CONTIGUOUS"]:
+            input_np = np.ascontiguousarray(input_np)
+
+        # Validate input shape
+        expected_shape = engine.get_tensor_shape(input_name)
+        if input_np.shape != tuple(expected_shape):
+            logger.warning(f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}")
+            # Try to reshape if possible
+            if len(input_np.shape) == len(expected_shape):
+                try:
+                    input_np = input_np.reshape(expected_shape)
+                    logger.info(f"Reshaped input to {input_np.shape}")
+                except Exception as e:
+                    raise RuntimeError(f"Cannot reshape input from {input_np.shape} to {expected_shape}: {e}")
+            elif len(input_np.shape) == len(expected_shape) - 1 and expected_shape[0] == 1:
+                # Add batch dimension if missing
+                try:
+                    input_np = input_np.reshape(expected_shape)
+                    logger.info(f"Added batch dimension to input: {input_np.shape}")
+                except Exception as e:
+                    raise RuntimeError(
+                        f"Cannot add batch dimension to input from {input_np.shape} to {expected_shape}: {e}"
+                    )
+            else:
+                raise RuntimeError(
+                    f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}. Please ensure input has correct batch dimension."
+                )
+
+        context.set_input_shape(input_name, input_np.shape)
+        output_shape = context.get_tensor_shape(output_name)
+        output_np = np.empty(output_shape, dtype=np.float32)
+        if not output_np.flags["C_CONTIGUOUS"]:
+            output_np = np.ascontiguousarray(output_np)
+
+        # Allocate GPU memory
+        d_input = cuda.mem_alloc(input_np.nbytes)
+        d_output = cuda.mem_alloc(output_np.nbytes)
+
+        # Set tensor addresses
+        context.set_tensor_address(input_name, int(d_input))
+        context.set_tensor_address(output_name, int(d_output))
+
+        # Run inference with timing
+        cuda.memcpy_htod_async(d_input, input_np, stream)
+        start.record(stream)
+        context.execute_async_v3(stream_handle=stream.handle)
+        end.record(stream)
+        cuda.memcpy_dtoh_async(output_np, d_output, stream)
+        stream.synchronize()
+
+        latency = end.time_since(start)
+        return output_np, latency
+
+    except Exception as e:
+        logger.error(f"TensorRT inference failed: {e}")
+        raise
+    finally:
+        # Cleanup with better error handling
+        try:
+            if d_input is not None:
+                d_input.free()
+        except Exception as e:
+            logger.warning(f"Failed to free input memory: {e}")
+
+        try:
+            if d_output is not None:
+                d_output.free()
+        except Exception as e:
+            logger.warning(f"Failed to free output memory: {e}")
+
+        # Note: Don't try to free stream, start, end, or context as they are managed by TensorRT
+        # and may cause issues if freed manually
+
+
+def load_tensorrt_engine(engine_path: str, logger: logging.Logger):
+    """Load TensorRT engine from file."""
+    if not TENSORRT_AVAILABLE:
+        raise ImportError("TensorRT is required for TensorRT inference. Please install it.")
+
+    trt_logger = trt.Logger(trt.Logger.WARNING)
+
+    try:
+        with open(engine_path, "rb") as f:
+            engine_bytes = f.read()
+        engine = trt.Runtime(trt_logger).deserialize_cuda_engine(engine_bytes)
+        if engine is None:
+            raise RuntimeError("Failed to deserialize TensorRT engine")
+        return engine
+    except Exception as e:
+        logger.error(f"Error loading TensorRT engine: {e}")
+        logger.error("This might be due to TensorRT version incompatibility.")
+        logger.error("Please rebuild the TensorRT engine with the current TensorRT version.")
+        raise
+
+
+def evaluate_model(
+    model_path: str,
+    model_type: str,
+    model_cfg_path: str,
+    info_pkl_path: str,
+    logger: logging.Logger,
+    device: str = "cpu",
+    num_samples: int = 10,
+    verbose: bool = False,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Evaluate model using info.pkl data and return predictions, ground truth, probabilities, and latencies."""
+
+    # Load model config
+    model_cfg = Config.fromfile(model_cfg_path)
+
+    # Load info.pkl data
+    try:
+        with open(info_pkl_path, "rb") as f:
+            info_data = pickle.load(f)
+
+        if "data_list" not in info_data:
+            raise ValueError("Expected 'data_list' key in info.pkl")
+
+        samples_list = info_data["data_list"]
+        logger.info(f"Loaded {len(samples_list)} samples from info.pkl")
+
+        # Limit number of samples for evaluation
+        num_samples = min(num_samples, len(samples_list))
+        logger.info(f"Evaluating {num_samples} samples")
+
+    except Exception as e:
+        logger.error(f"Failed to load info.pkl: {e}")
+        raise
+
+    # Create transform instances once
+    transform_test = create_test_transform(model_cfg)
+
+    # Initialize inference engine
+    if model_type == "onnx":
+        logger.info(f"Using ONNX model: {model_path}")
+        inference_func = lambda input_tensor: run_onnx_inference(model_path, input_tensor, logger)
+    elif model_type == "tensorrt":
+        logger.info(f"Using TensorRT model: {model_path}")
+        engine = load_tensorrt_engine(model_path, logger)
+        inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor, logger)
+    else:
+        raise ValueError(f"Unsupported model type: {model_type}")
+
+    # Lists to store results
+    all_predictions = []
+    all_ground_truth = []
+    all_probabilities = []
+    all_latencies = []
+
+    # Evaluate samples
+    for sample_idx in range(num_samples):
+        if sample_idx % 5 == 0:
+            logger.info(f"Processing sample {sample_idx + 1}/{num_samples}")
+
+        try:
+            # Load sample data directly from info.pkl
+            input_tensor_calibrated = load_sample_data_from_info_pkl(
+                info_pkl_path,
+                model_cfg,
+                sample_idx,
+                force_generate_miscalibration=False,
+                device=device,
+                transform_test=transform_test,
+            )
+            input_tensor_miscalibrated = load_sample_data_from_info_pkl(
+                info_pkl_path,
+                model_cfg,
+                sample_idx,
+                force_generate_miscalibration=True,
+                device=device,
+                transform_test=transform_test,
+            )
+
+            # Test both calibrated and miscalibrated samples
+            test_samples = [
+                (input_tensor_calibrated, 1),  # calibrated sample
+                (input_tensor_miscalibrated, 0),  # miscalibrated sample
+            ]
+
+            for input_tensor, gt_label in test_samples:
+                # Debug: Print input tensor info only in verbose mode
+                if verbose:
+                    logger.info(f"Sample {sample_idx + 1} (GT={gt_label}) input tensor:")
+                    logger.info(f"  Dtype: {input_tensor.dtype}")
+
+                # Run inference
+                output_np, latency = inference_func(input_tensor)
+
+                if output_np is None:
+                    logger.error(f"Failed to get output for sample {sample_idx}")
+                    continue
+
+                # Convert logits to probabilities
+                logits = torch.from_numpy(output_np)
+                probabilities = F.softmax(logits, dim=-1)
+                predicted_class = torch.argmax(probabilities, dim=-1).item()
+                confidence = probabilities.max().item()
+
+                # Store results
+                all_predictions.append(predicted_class)
+                all_ground_truth.append(gt_label)
+                all_probabilities.append(probabilities.cpu().numpy())
+                all_latencies.append(latency)
+
+                # Print sample results only in verbose mode
+                if verbose:
+                    logger.info(
+                        f"Sample {sample_idx + 1} (GT={gt_label}): Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms"
+                    )
+
+            # Clear GPU memory periodically for TensorRT
+            if model_type == "tensorrt" and sample_idx % 10 == 0:
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                gc.collect()
+
+        except Exception as e:
+            logger.error(f"Error processing sample {sample_idx}: {e}")
+            continue
+
+    return (
+        np.array(all_predictions),
+        np.array(all_ground_truth),
+        np.array(all_probabilities),
+        np.array(all_latencies),
+    )
+
+
+def print_results(
+    all_predictions: np.ndarray,
+    all_ground_truth: np.ndarray,
+    all_probabilities: np.ndarray,
+    all_latencies: np.ndarray,
+    model_type: str,
+    logger: logging.Logger,
+):
+    """Print evaluation results."""
+    if len(all_predictions) == 0:
+        logger.error("No samples were processed successfully.")
+        return
+
+    correct_predictions = (all_predictions == all_ground_truth).sum()
+    total_samples = len(all_predictions)
+    accuracy = correct_predictions / total_samples
+    avg_latency = np.mean(all_latencies)
+
+    # Print results
+    logger.info(f"\n{'='*50}")
+    logger.info(f"{model_type.upper()} Model Evaluation Results")
+    logger.info(f"{'='*50}")
+    logger.info(f"Total samples: {total_samples}")
+    logger.info(f"Correct predictions: {correct_predictions}")
+    logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
+    logger.info(f"Average latency: {avg_latency:.2f} ms")
+
+    # Calculate per-class accuracy
+    unique_classes = np.unique(all_ground_truth)
+    logger.info(f"\nPer-class accuracy:")
+    for cls in unique_classes:
+        cls_mask = all_ground_truth == cls
+        cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum()
+        cls_total = cls_mask.sum()
+        cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0
+        logger.info(
+            f"  Class {cls} ({LABELS[str(cls)]}): {cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)"
+        )
+
+    # Calculate average confidence
+    avg_confidence = np.mean([prob.max() for prob in all_probabilities])
+    logger.info(f"\nAverage confidence: {avg_confidence:.4f}")
+
+    # Calculate latency statistics
+    min_latency = np.min(all_latencies)
+    max_latency = np.max(all_latencies)
+    std_latency = np.std(all_latencies)
+
+    logger.info(f"\nLatency Statistics:")
+    logger.info(f"  Average latency: {avg_latency:.2f} ms")
+    logger.info(f"  Min latency: {min_latency:.2f} ms")
+    logger.info(f"  Max latency: {max_latency:.2f} ms")
+    logger.info(f"  Std latency: {std_latency:.2f} ms")
+
+    # Show confusion matrix
+    logger.info(f"\nConfusion Matrix:")
+    logger.info(f"Predicted ->")
+    logger.info(f"Actual    0    1")
+    for true_cls in unique_classes:
+        row = []
+        for pred_cls in unique_classes:
+            count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum()
+            row.append(f"{count:4d}")
+        logger.info(f"  {true_cls}    {' '.join(row)}")
+
+    logger.info(f"\n{model_type.upper()} model evaluation completed successfully!")
+    logger.info(f"Model accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
+    logger.info(f"Average latency: {avg_latency:.2f} ms")
+
+
+def main():
+    """Main function."""
+    parser = argparse.ArgumentParser(description="Evaluate calibration classification model using info.pkl")
+    parser.add_argument("--onnx", type=str, help="Path to ONNX model file")
+    parser.add_argument("--tensorrt", type=str, help="Path to TensorRT engine file")
+    parser.add_argument(
+        "--model-cfg",
+        type=str,
+        default="projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py",
+        help="Path to model config file",
+    )
+    parser.add_argument(
+        "--info-pkl",
+        type=str,
+        required=True,
+        help="Path to info.pkl file containing calibration data",
+    )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        default=10,
+        help="Number of samples to evaluate (default: 10)",
+    )
+    parser.add_argument(
+        "--device", type=str, default="cpu", choices=["cpu", "cuda"], help="Device to use for inference"
+    )
+    parser.add_argument(
+        "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging level"
+    )
+    parser.add_argument(
+        "--verbose", action="store_true", help="Enable verbose logging for detailed sample information"
+    )
+
+    args = parser.parse_args()
+
+    # Validate arguments
+    if not args.onnx and not args.tensorrt:
+        parser.error("Either --onnx or --tensorrt must be specified")
+
+    if args.onnx and args.tensorrt:
+        parser.error("Only one of --onnx or --tensorrt can be specified")
+
+    # Setup logging
+    logger = setup_logging(args.log_level)
+
+    try:
+        # Determine model type and path
+        if args.onnx:
+            model_type = "onnx"
+            model_path = args.onnx
+        else:
+            model_type = "tensorrt"
+            model_path = args.tensorrt
+
+        # Check if files exist
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"Model file not found: {model_path}")
+        if not os.path.exists(args.model_cfg):
+            raise FileNotFoundError(f"Model config file not found: {args.model_cfg}")
+        if not os.path.exists(args.info_pkl):
+            raise FileNotFoundError(f"Info.pkl file not found: {args.info_pkl}")
+
+        logger.info(f"Starting {model_type.upper()} model evaluation...")
+        logger.info(f"Model path: {model_path}")
+        logger.info(f"Model config: {args.model_cfg}")
+        logger.info(f"Info.pkl: {args.info_pkl}")
+        logger.info(f"Device: {args.device}")
+        logger.info(f"Number of samples: {args.num_samples}")
+        logger.info(f"Verbose logging: {args.verbose}")
+
+        # Evaluate model
+        all_predictions, all_ground_truth, all_probabilities, all_latencies = evaluate_model(
+            model_path, model_type, args.model_cfg, args.info_pkl, logger, args.device, args.num_samples, args.verbose
+        )
+
+        # Print results
+        print_results(all_predictions, all_ground_truth, all_probabilities, all_latencies, model_type, logger)
+
+        # Cleanup
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        gc.collect()
+
+        logger.info("Script completed successfully!")
+
+    except Exception as e:
+        logger.error(f"Error in main execution: {e}")
+        # Cleanup on error
+        try:
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            gc.collect()
+        except:
+            pass
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/calibration_classification/visualize_lidar_camera_projection.py b/tools/calibration_classification/visualize_lidar_camera_projection.py
index d0739ff8..78e692df 100644
--- a/tools/calibration_classification/visualize_lidar_camera_projection.py
+++ b/tools/calibration_classification/visualize_lidar_camera_projection.py
@@ -6,6 +6,7 @@
 import traceback
 from typing import Any, Dict, List, Optional, Union
 
+import numpy as np
 from mmengine.config import Config
 from mmengine.logging import MMLogger
 
@@ -14,22 +15,24 @@
 )
 
 
-class CalibrationVisualizer:
+class CalibrationToolkit:
     """
-    A comprehensive tool for visualizing LiDAR-camera calibration data.
+    A comprehensive tool for processing LiDAR-camera calibration data.
     This class provides functionality to load calibration data from info.pkl files
-    and generate visualizations using the CalibrationClassificationTransform.
+    and optionally generate visualizations or save results as NPZ files.
     Attributes:
         transform: The calibration classification transform instance
         data_root: Root directory for data files
-        output_dir: Directory for saving visualizations
+        output_dir: Directory for saving visualizations (only used if visualize=True)
         logger: MMLogger instance for logging
+        collected_results: List to store processed results for NPZ saving
     """
 
     def __init__(self, model_cfg: Config, data_root: Optional[str] = None, output_dir: Optional[str] = None):
         """
-        Initialize the CalibrationVisualizer.
+        Initialize the CalibrationToolkit.
         Args:
+            model_cfg: Model configuration
             data_root: Root directory for data files. If None, absolute paths are used.
             output_dir: Directory for saving visualizations. If None, no visualizations are saved.
         """
@@ -37,7 +40,8 @@ def __init__(self, model_cfg: Config, data_root: Optional[str] = None, output_di
         self.data_root = data_root
         self.output_dir = output_dir
         self.transform = None
-        self.logger = MMLogger.get_instance(name="calibration_visualizer")
+        self.logger = MMLogger.get_instance(name="calibration_toolkit")
+        self.collected_results = []
         self._initialize_transform()
 
     def _initialize_transform(self) -> None:
@@ -46,12 +50,15 @@ def _initialize_transform(self) -> None:
         if transform_config is None:
             raise ValueError("transform_config not found in model configuration")
 
+        # Only set projection_vis_dir if output_dir is provided (for visualization)
+        projection_vis_dir = self.output_dir if self.output_dir else None
+
         self.transform = CalibrationClassificationTransform(
             transform_config=transform_config,
             mode="test",
             undistort=True,
             data_root=self.data_root,
-            projection_vis_dir=self.output_dir,
+            projection_vis_dir=projection_vis_dir,
             results_vis_dir=None,
             enable_augmentation=False,
         )
@@ -110,12 +117,15 @@ def _validate_sample_structure(self, sample: Dict[str, Any]) -> bool:
         required_keys = ["image", "lidar_points"]
         return all(key in sample for key in required_keys)
 
-    def process_single_sample(self, sample: Dict[str, Any], sample_idx: int) -> Optional[Dict[str, Any]]:
+    def process_single_sample(
+        self, sample: Dict[str, Any], sample_idx: int, save_npz: bool = False
+    ) -> Optional[Dict[str, Any]]:
         """
         Process a single sample using the transform.
         Args:
             sample: Sample dictionary to process.
             sample_idx: Index of the sample for logging purposes.
+            save_npz: Whether to collect result for NPZ saving.
         Returns:
             Transformed sample data if successful, None otherwise.
         """
@@ -125,6 +135,11 @@ def process_single_sample(self, sample: Dict[str, Any], sample_idx: int) -> Opti
                 return None
 
             result = self.transform(sample)
+
+            # Store the result for NPZ saving only if requested
+            if save_npz:
+                self.collected_results.append(result["fused_img"])
+
             self.logger.info(f"Successfully processed sample {sample_idx}")
             return result
 
@@ -133,12 +148,46 @@ def process_single_sample(self, sample: Dict[str, Any], sample_idx: int) -> Opti
             self.logger.debug(traceback.format_exc())
             return None
 
-    def visualize_samples(self, info_pkl_path: str, indices: Optional[List[int]] = None) -> None:
+    def save_npz_file(self, output_path: str) -> None:
         """
-        Visualize multiple samples from info.pkl file.
+        Save all collected results as an NPZ file with the correct structure.
+        Args:
+            output_path: Path where to save the NPZ file.
+        """
+        if not self.collected_results:
+            self.logger.warning("No results collected to save as NPZ")
+            return
+
+        try:
+            # Convert list of arrays to a single array with shape (number_of_samples, 5, 1860, 2880)
+            # Each result['fused_img'] has shape (1860, 2880, 5), so we need to transpose
+            input_array = np.array([result.transpose(2, 0, 1) for result in self.collected_results], dtype=np.float32)
+
+            # Save as NPZ file
+            np.savez(output_path, input=input_array)
+
+            self.logger.info(f"Saved NPZ file with shape {input_array.shape} to {output_path}")
+
+        except Exception as e:
+            self.logger.error(f"Failed to save NPZ file: {e}")
+            self.logger.debug(traceback.format_exc())
+
+    def process_samples(
+        self,
+        info_pkl_path: str,
+        indices: Optional[List[int]] = None,
+        visualize: bool = False,
+        save_npz: bool = False,
+        npz_output_path: Optional[str] = None,
+    ) -> None:
+        """
+        Process multiple samples from info.pkl file.
         Args:
             info_pkl_path: Path to the info.pkl file.
             indices: Optional list of sample indices to process. If None, all samples are processed.
+            visualize: Whether to generate visualizations (requires output_dir to be set).
+            save_npz: Whether to collect results for NPZ saving.
+            npz_output_path: Path for saving NPZ file (only used if save_npz=True).
         """
         try:
             samples_list = self.load_info_pkl(info_pkl_path)
@@ -147,6 +196,9 @@ def visualize_samples(self, info_pkl_path: str, indices: Optional[List[int]] = N
                 self.logger.warning("No samples found in info.pkl")
                 return
 
+            # Clear previous results
+            self.collected_results = []
+
             # Determine which samples to process
             if indices is not None:
                 samples_to_process = [samples_list[i] for i in indices if i < len(samples_list)]
@@ -154,23 +206,41 @@ def visualize_samples(self, info_pkl_path: str, indices: Optional[List[int]] = N
                 samples_to_process = samples_list
 
             self.logger.info(f"Processing {len(samples_to_process)} samples")
+            if visualize:
+                self.logger.info("Visualization enabled")
+            if save_npz:
+                self.logger.info("NPZ saving enabled")
 
             # Process each sample
             for i, sample in enumerate(samples_to_process):
-                self.process_single_sample(sample, i + 1)
+                self.process_single_sample(sample, i + 1, save_npz=save_npz)
+
+            # Save NPZ file if requested
+            if save_npz and npz_output_path:
+                self.save_npz_file(npz_output_path)
 
             self.logger.info("Finished processing all samples")
 
         except Exception as e:
-            self.logger.error(f"Failed to visualize samples: {e}")
+            self.logger.error(f"Failed to process samples: {e}")
             self.logger.debug(traceback.format_exc())
 
-    def visualize_single_sample(self, info_pkl_path: str, sample_idx: int) -> Optional[Dict[str, Any]]:
+    def process_single_sample_from_file(
+        self,
+        info_pkl_path: str,
+        sample_idx: int,
+        visualize: bool = False,
+        save_npz: bool = False,
+        npz_output_path: Optional[str] = None,
+    ) -> Optional[Dict[str, Any]]:
         """
-        Visualize a single sample from info.pkl file.
+        Process a single sample from info.pkl file.
         Args:
             info_pkl_path: Path to the info.pkl file.
             sample_idx: Index of the sample to process (0-based).
+            visualize: Whether to generate visualizations (requires output_dir to be set).
+            save_npz: Whether to collect results for NPZ saving.
+            npz_output_path: Path for saving NPZ file (only used if save_npz=True).
         Returns:
             Transformed sample data if successful, None otherwise.
         """
@@ -182,17 +252,27 @@ def visualize_single_sample(self, info_pkl_path: str, sample_idx: int) -> Option
                 return None
 
             self.logger.info(f"Processing sample {sample_idx} from {len(samples_list)} total samples")
+            if visualize:
+                self.logger.info("Visualization enabled")
+            if save_npz:
+                self.logger.info("NPZ saving enabled")
+
+            # Clear previous results for single sample processing
+            self.collected_results = []
 
             sample = samples_list[sample_idx]
-            result = self.process_single_sample(sample, sample_idx)
+            result = self.process_single_sample(sample, sample_idx, save_npz=save_npz)
 
-            if result and self.output_dir:
+            if result and visualize and self.output_dir:
                 self.logger.info(f"Visualizations saved to directory: {self.output_dir}")
 
+            # Save NPZ file if requested
+            if save_npz and npz_output_path and result:
+                self.save_npz_file(npz_output_path)
             return result
 
         except Exception as e:
-            self.logger.error(f"Failed to visualize single sample: {e}")
+            self.logger.error(f"Failed to process single sample: {e}")
             self.logger.debug(traceback.format_exc())
             return None
 
@@ -204,25 +284,42 @@ def create_argument_parser() -> argparse.ArgumentParser:
     """
     examples = """
 Examples:
-  # Process all samples
-  python visualize_calibration_and_image.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis
-  # Process specific sample
-  python visualize_calibration_and_image.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --sample_idx 0
-  # Process specific indices
-  python visualize_calibration_and_image.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --indices 0 1 2
+  # Process all samples without visualization or NPZ saving
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/
+  # Process all samples with visualization
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize
+  # Process all samples and save as NPZ
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --npz_output_path results.npz --save_npz
+  # Process all samples with both visualization and NPZ saving
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --npz_output_path results.npz --save_npz
+  # Process specific sample with visualization
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --sample_idx 0
+  # Process specific indices with NPZ saving
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --save_npz --npz_output_path results.npz --indices 0 1 2
+  # Process first 5 samples (indices 0, 1, 2, 3, 4) with both features
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --save_npz --npz_output_path results.npz --indices 5
 """
 
     parser = argparse.ArgumentParser(
-        description="Visualize LiDAR points projected on camera images using info.pkl",
+        description="Process LiDAR-camera calibration data with optional visualization and NPZ saving",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=examples,
     )
+
     parser.add_argument("model_cfg", help="model config path")
     parser.add_argument("--info_pkl", required=True, help="Path to info.pkl file containing calibration data")
-    parser.add_argument("--output_dir", help="Output directory for saving visualizations")
+    parser.add_argument("--output_dir", help="Output directory for saving visualizations (only used with --visualize)")
     parser.add_argument("--data_root", help="Root directory for data files (images, point clouds, etc.)")
     parser.add_argument("--sample_idx", type=int, help="Specific sample index to process (0-based)")
-    parser.add_argument("--indices", nargs="+", type=int, help="Specific sample indices to process (0-based)")
+    parser.add_argument(
+        "--indices",
+        nargs="+",
+        type=int,
+        help="Specific sample indices to process (0-based), or a single number N to process indices 0 to N-1",
+    )
+    parser.add_argument("--visualize", action="store_true", help="Enable visualization (requires --output_dir)")
+    parser.add_argument("--save_npz", action="store_true", help="Enable NPZ saving (requires --npz_output_path)")
+    parser.add_argument("--npz_output_path", help="Path for saving NPZ file (only used with --save_npz)")
     parser.add_argument(
         "--show_point_details", action="store_true", help="Show detailed point cloud field information"
     )
@@ -232,26 +329,58 @@ def create_argument_parser() -> argparse.ArgumentParser:
 
 def main() -> None:
     """
-    Main entry point for the calibration visualization script.
-    Parses command line arguments and runs the appropriate visualization mode.
-    Supports both single sample and batch processing modes.
+    Main entry point for the calibration toolkit script.
+    Parses command line arguments and runs the appropriate processing mode.
+    Supports both single sample and batch processing modes with optional features.
     """
     parser = create_argument_parser()
     args = parser.parse_args()
 
+    # Validate argument combinations
+    if args.visualize and not args.output_dir:
+        parser.error("--visualize requires --output_dir to be specified")
+
+    if args.save_npz and not args.npz_output_path:
+        parser.error("--save_npz requires --npz_output_path to be specified")
+
     # Load model configuration
     model_cfg = Config.fromfile(args.model_cfg)
 
-    # Initialize visualizer
-    visualizer = CalibrationVisualizer(model_cfg=model_cfg, data_root=args.data_root, output_dir=args.output_dir)
+    # Initialize toolkit
+    toolkit = CalibrationToolkit(model_cfg=model_cfg, data_root=args.data_root, output_dir=args.output_dir)
+
+    # Process indices argument
+    processed_indices = None
+    if args.indices is not None:
+        if len(args.indices) == 1:
+            # If only one number provided, treat it as range 0 to N-1
+            n = args.indices[0]
+            processed_indices = list(range(n))
+            toolkit.logger.info(f"Processing indices 0 to {n-1} (total: {n} samples)")
+        else:
+            # If multiple numbers provided, use them as specific indices
+            processed_indices = args.indices
+            toolkit.logger.info(f"Processing specific indices: {processed_indices}")
 
-    # Run appropriate visualization mode
+    # Run appropriate processing mode
     if args.sample_idx is not None:
         # Process single sample
-        visualizer.visualize_single_sample(args.info_pkl, args.sample_idx)
+        toolkit.process_single_sample_from_file(
+            args.info_pkl,
+            args.sample_idx,
+            visualize=args.visualize,
+            save_npz=args.save_npz,
+            npz_output_path=args.npz_output_path,
+        )
     else:
         # Process all samples or specific indices
-        visualizer.visualize_samples(args.info_pkl, args.indices)
+        toolkit.process_samples(
+            args.info_pkl,
+            processed_indices,
+            visualize=args.visualize,
+            save_npz=args.save_npz,
+            npz_output_path=args.npz_output_path,
+        )
 
 
 if __name__ == "__main__":

From de3400aabb99b3c9898b81cace6e08c6c4f73629 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Fri, 26 Sep 2025 17:08:38 +0900
Subject: [PATCH 02/19] chore: remove force miscalibration

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 tools/calibration_classification/evaluate_inference.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py
index b367e0d6..95f7fd1a 100644
--- a/tools/calibration_classification/evaluate_inference.py
+++ b/tools/calibration_classification/evaluate_inference.py
@@ -113,7 +113,6 @@ def load_sample_data_from_info_pkl(
     info_pkl_path: str,
     model_cfg: Config,
     sample_idx: int = 0,
-    force_generate_miscalibration: bool = False,
     device: str = "cpu",
     transform_test: Optional[CalibrationClassificationTransform] = None,
 ) -> torch.Tensor:
@@ -123,7 +122,6 @@ def load_sample_data_from_info_pkl(
         info_pkl_path: Path to the info.pkl file
         model_cfg: Model configuration containing data_root setting
         sample_idx: Index of the sample to load (default: 0)
-        force_generate_miscalibration: Whether to force generation of miscalibration
         device: Device to load tensor on
         transform_test: Pre-created test transform instance (optional)
     Returns:
@@ -151,7 +149,7 @@ def load_sample_data_from_info_pkl(
         transform = transform_test
 
     # Apply transform with miscalibration control
-    results = transform.transform(sample_data, force_generate_miscalibration=force_generate_miscalibration)
+    results = transform.transform(sample_data)
     input_data_processed = results["fused_img"]  # (H, W, 5)
 
     # Convert to tensor
@@ -428,7 +426,6 @@ def evaluate_model(
                 info_pkl_path,
                 model_cfg,
                 sample_idx,
-                force_generate_miscalibration=False,
                 device=device,
                 transform_test=transform_test,
             )
@@ -436,7 +433,6 @@ def evaluate_model(
                 info_pkl_path,
                 model_cfg,
                 sample_idx,
-                force_generate_miscalibration=True,
                 device=device,
                 transform_test=transform_test,
             )

From b25012a3e2cfc28e2a7a60a6b42bb66b89532b84 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 1 Oct 2025 14:01:21 +0900
Subject: [PATCH 03/19] chore: fix documentation and refactor export to
 tensorrt

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../deploy/main.py                            | 72 +++++++++++--------
 tools/calibration_classification/README.md    |  6 +-
 2 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index 8a19b20a..2c136afd 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -127,7 +127,7 @@ def load_sample_data_from_info_pkl(
     transform = CalibrationClassificationTransform(
         transform_config=transform_config,
         mode="test",
-        lidar_range=model_cfg.get("lidar_range", 128.0),
+        max_depth=model_cfg.get("max_depth", 128.0),
         dilation_size=model_cfg.get("dilation_size", 1),
         undistort=True,
         miscalibration_probability=miscalibration_probability,
@@ -263,42 +263,59 @@ def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None:
 def export_to_tensorrt(
     onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger
 ) -> bool:
-    """Export ONNX model to TensorRT format."""
+    """Export ONNX model to a TensorRT engine.
+
+    Notes:
+      - Always uses EXPLICIT_BATCH.
+      - If strongly_typed=True, disallows precision-changing builder flags (FP16/BF16/FP8/INT8/INT4/FP4).
+        Only TF32 is permitted (controls Tensor Cores for FP32, not dtype).
+    """
     settings = config.tensorrt_settings
+    strongly_typed = settings.get("strongly_typed", False)
+    fp16_mode = settings.get("fp16_mode", False)
+    tf32_mode = bool(settings.get("tf32_mode", False))
 
     # Initialize TensorRT
-    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
-    trt.init_libnvinfer_plugins(TRT_LOGGER, "")
-    runtime = trt.Runtime(TRT_LOGGER)
-    builder = trt.Builder(TRT_LOGGER)
+    trt_logger = trt.Logger(trt.Logger.WARNING)
+    trt.init_libnvinfer_plugins(trt_logger, "")
 
-    # Create network and config
-    strongly_typed = settings.get("strongly_typed", False)
+    builder = trt.Builder(trt_logger)
+    builder_config = builder.create_builder_config()
+    builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"])
 
+    # Create network and config
+    flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
     if strongly_typed:
-        flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) | (
-            1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
-        )
-        network = builder.create_network(flags)
+        flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
         logger.info("Using strongly typed TensorRT network creation")
+    network = builder.create_network(flags)
+
+    def _warn_if(flag_name: str, enabled: bool):
+        if enabled:
+            logger.warning(
+                f"'{flag_name}' requested but will be IGNORED because strongly_typed=True. "
+                "Strongly typed mode must derive dtypes from the model/Q-DQ/Cast."
+            )
+
+    if strongly_typed:
+        # Disallow precision-changing flags in strongly typed mode
+        _warn_if("FP16", fp16_mode)
     else:
-        network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
-        logger.info("Using standard TensorRT network creation (EXPLICIT_BATCH only)")
-    config_trt = builder.create_builder_config()
-    config_trt.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"])
+        if fp16_mode:
+            builder_config.set_flag(trt.BuilderFlag.FP16)
+            logger.info("BuilderFlag.FP16 enabled")
 
-    # Enable FP16 if specified
-    if settings["fp16_mode"]:
-        config_trt.set_flag(trt.BuilderFlag.FP16)
-        logger.info("FP16 mode enabled")
+    if tf32_mode and hasattr(trt.BuilderFlag, "TF32"):
+        builder_config.set_flag(trt.BuilderFlag.TF32)
+        logger.info("BuilderFlag.TF32 enabled")
 
     # Setup optimization profile
     profile = builder.create_optimization_profile()
     _configure_input_shapes(profile, input_tensor, settings["model_inputs"], logger)
-    config_trt.add_optimization_profile(profile)
+    builder_config.add_optimization_profile(profile)
 
     # Parse ONNX model
-    parser = trt.OnnxParser(network, TRT_LOGGER)
+    parser = trt.OnnxParser(network, trt_logger)
     with open(onnx_path, "rb") as f:
         if not parser.parse(f.read()):
             _log_parser_errors(parser, logger)
@@ -307,7 +324,7 @@ def export_to_tensorrt(
 
     # Build engine
     logger.info("Building TensorRT engine...")
-    serialized_engine = builder.build_serialized_network(network, config_trt)
+    serialized_engine = builder.build_serialized_network(network, builder_config)
 
     if serialized_engine is None:
         logger.error("Failed to build TensorRT engine")
@@ -317,9 +334,8 @@ def export_to_tensorrt(
     with open(output_path, "wb") as f:
         f.write(serialized_engine)
 
-    workspace_gb = settings["max_workspace_size"] / (1024**3)
     logger.info(f"TensorRT engine saved to {output_path}")
-    logger.info(f"Engine max workspace size: {workspace_gb:.2f} GB")
+    logger.info(f"Engine max workspace size: {settings['max_workspace_size'] / (1024**3):.2f} GB")
 
     return True
 
@@ -425,9 +441,9 @@ def run_tensorrt_inference(
         torch.cuda.empty_cache()
 
     # Load TensorRT engine
-    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
-    trt.init_libnvinfer_plugins(TRT_LOGGER, "")
-    runtime = trt.Runtime(TRT_LOGGER)
+    trt_logger = trt.Logger(trt.Logger.WARNING)
+    trt.init_libnvinfer_plugins(trt_logger, "")
+    runtime = trt.Runtime(trt_logger)
 
     with open(tensorrt_path, "rb") as f:
         engine = runtime.deserialize_cuda_engine(f.read())
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index 11126fdf..6c515f66 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -242,17 +242,17 @@ This directory contains scripts for evaluating calibration classification models
 
 ### 6.1 Convert to ONNX and TensorRT
 ```sh
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt
+python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt
 ```
 
 ### 6.1.1 Convert to ONNX
 ```sh
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx
+python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx
 ```
 
 ### 6.1.2 Convert to TensorRT
 ```sh
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify  --tensorrt --onnx-file model.onnx
+python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify  --tensorrt --onnx-file model.onnx
 ```
 
 

From dbe27cc0928c571d16a98baa98caed597af92f84 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 1 Oct 2025 15:30:22 +0900
Subject: [PATCH 04/19] chore: update readme

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../configs/deploy/resnet18_5ch.py                 |  3 ++-
 tools/calibration_classification/README.md         | 14 ++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
index a32b3823..b16c2346 100644
--- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
@@ -4,7 +4,8 @@
     type="tensorrt",
     common_config=dict(
         max_workspace_size=1 << 30,
-        fp16_mode=True,
+        fp16_mode=False,
+        tf32_mode=False,
         strongly_typed=False,
     ),
     model_inputs=[
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index 6c515f66..4ebcd141 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -240,21 +240,27 @@ python tools/calibration_classification/test.py projects/CalibrationStatusClassi
 ## 6. Deployment
 This directory contains scripts for evaluating calibration classification models in different formats.
 
-### 6.1 Convert to ONNX and TensorRT
+### 6.1 Convert from Pytorch to ONNX and TensorRT
 ```sh
 python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt
 ```
 
-### 6.1.1 Convert to ONNX
+### 6.1.1 Convert from Pytorch to ONNX
 ```sh
 python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx
 ```
 
-### 6.1.2 Convert to TensorRT
+### 6.1.2 Convert from ONNX to TensorRT
 ```sh
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify  --tensorrt --onnx-file model.onnx
+# Without verification (checkpoint not needed)
+python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py dummy.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --tensorrt --onnx-file model.onnx
+
+# With verification (checkpoint required for reference)
+python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --tensorrt --onnx-file model.onnx
 ```
 
+**Note:** When converting from ONNX to TensorRT without `--verify`, the checkpoint file is not actually used (you can use a dummy filename). However, with `--verify`, a valid checkpoint is required to compare outputs.
+
 
 
 

From 216d75a5475b39067c7ce71ba552bfc4073479b4 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 1 Oct 2025 17:09:15 +0900
Subject: [PATCH 05/19] chore: fix config

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../configs/deploy/resnet18_5ch.py            | 112 ++++++-
 .../deploy/main.py                            | 275 +++++++++++-------
 tools/calibration_classification/README.md    | 170 +++++++----
 3 files changed, 384 insertions(+), 173 deletions(-)

diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
index b16c2346..729b0ed7 100644
--- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
@@ -1,35 +1,117 @@
+# projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+# Deployment configuration for ResNet18 5-channel calibration classification model
+#
+# 1. export: Controls export behavior (mode, verification, device, output directory)
+# 2. runtime_io: Runtime I/O configuration (data paths, sample selection)
+# 3. Backend configs: ONNX and TensorRT specific settings
+
+# ==============================================================================
+# Export Configuration
+# ==============================================================================
+export = dict(
+    mode="both",  # Export mode: "onnx", "trt", or "both"
+    # - "onnx": Export to ONNX only
+    # - "trt": Convert to TensorRT only (requires onnx_file in runtime_io)
+    # - "both": Export to ONNX then convert to TensorRT
+    verify=True,  # Run verification comparing PyTorch/ONNX/TRT outputs
+    device="cuda:0",  # Device for export (use "cuda:0" or "cpu")
+    # Note: TensorRT always requires CUDA, will auto-switch if needed
+    work_dir="/workspace/work_dirs",  # Output directory for exported models
+)
+
+# ==============================================================================
+# Runtime I/O Configuration
+# ==============================================================================
+runtime_io = dict(
+    info_pkl="data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl",
+    sample_idx=0,  # Sample index to use for export and verification
+    onnx_file=None,  # Optional: Path to existing ONNX file
+    # - If provided with mode="trt", will convert this ONNX to TensorRT
+    # - If None with mode="both", will export ONNX first then convert
+)
+
+# ==============================================================================
+# Codebase Configuration
+# ==============================================================================
 codebase_config = dict(type="mmpretrain", task="Classification", model_type="end2end")
 
+# ==============================================================================
+# TensorRT Backend Configuration
+# ==============================================================================
 backend_config = dict(
     type="tensorrt",
     common_config=dict(
-        max_workspace_size=1 << 30,
-        fp16_mode=False,
-        tf32_mode=False,
-        strongly_typed=False,
+        max_workspace_size=1 << 30,  # 1 GiB workspace for TensorRT
+        # Precision policy controls how TensorRT handles numerical precision:
+        # - "auto": TensorRT automatically selects precision (default)
+        # - "fp16": Enable FP16 mode for faster inference with slight accuracy trade-off
+        # - "fp32_tf32": Enable TF32 mode (Tensor Cores for FP32 operations on Ampere+)
+        # - "strongly_typed": Enforce strict type checking (prevents automatic precision conversion)
+        precision_policy="fp32_tf32",
     ),
+    # Dynamic shape configuration for different input resolutions
     model_inputs=[
         dict(
             input_shapes=dict(
                 input=dict(
-                    min_shape=[1, 5, 1080, 1920],
-                    opt_shape=[1, 5, 1860, 2880],
-                    max_shape=[1, 5, 2160, 3840],
+                    min_shape=[1, 5, 1080, 1920],  # Minimum supported input shape
+                    opt_shape=[1, 5, 1860, 2880],  # Optimal shape for performance tuning
+                    max_shape=[1, 5, 2160, 3840],  # Maximum supported input shape
                 ),
             )
         )
     ],
 )
 
+# ==============================================================================
+# ONNX Export Configuration
+# ==============================================================================
 onnx_config = dict(
     type="onnx",
-    export_params=True,
-    keep_initializers_as_inputs=False,
-    opset_version=16,
-    do_constant_folding=True,
-    save_file="end2end.onnx",
-    input_names=["input"],
-    output_names=["output"],
-    dynamic_axes={"input": {0: "batch_size", 2: "height", 3: "width"}, "output": {0: "batch_size"}},
+    export_params=True,  # Include trained parameters in the model
+    keep_initializers_as_inputs=False,  # Don't expose initializers as inputs
+    opset_version=16,  # ONNX opset version (16 recommended for modern features)
+    do_constant_folding=True,  # Optimize by folding constant expressions
+    save_file="end2end.onnx",  # Output filename
+    input_names=["input"],  # Name of input tensor
+    output_names=["output"],  # Name of output tensor
+    # Dynamic axes for variable input dimensions
+    dynamic_axes={
+        "input": {0: "batch_size", 2: "height", 3: "width"},  # Batch, H, W are dynamic
+        "output": {0: "batch_size"},  # Batch is dynamic
+    },
     input_shape=None,
 )
+
+# ==============================================================================
+# Usage Examples
+# ==============================================================================
+#
+# Basic usage (export to both ONNX and TensorRT):
+#   python projects/CalibrationStatusClassification/deploy/main.py \
+#       projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+#       projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+#       checkpoint.pth
+#
+# Export to ONNX only:
+#   Set export.mode = "onnx" in this config, then run the command above
+#
+# Convert existing ONNX to TensorRT:
+#   1) Set export.mode = "trt"
+#   2) Set runtime_io.onnx_file = "/path/to/existing/model.onnx"
+#   3) Run the command above
+#
+# Override config settings via command line:
+#   python projects/CalibrationStatusClassification/deploy/main.py \
+#       <deploy_cfg> <model_cfg> <checkpoint> \
+#       --work-dir ./custom_output \
+#       --device cuda:1 \
+#       --info-pkl /path/to/custom/info.pkl \
+#       --sample-idx 5
+#
+# Available precision policies:
+#   - auto: Let TensorRT decide (default, good balance)
+#   - fp16: Faster inference, ~2x speedup, small accuracy loss
+#   - fp32_tf32: Use Tensor Cores on Ampere+ GPUs for FP32
+#   - explicit_int8: INT8 quantization (requires calibration dataset)
+#   - strongly_typed: Strict type enforcement, no automatic conversion
diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index 2c136afd..bcfadc3b 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -41,6 +41,15 @@
 EXPECTED_CHANNELS = 5  # RGB + Depth + Intensity
 LABELS = {"0": "miscalibrated", "1": "calibrated"}
 
+# Precision policy mapping
+PRECISION_POLICIES = {
+    "auto": {},  # No special flags, TensorRT decides
+    "fp16": {"FP16": True},
+    "fp32_tf32": {"TF32": True},  # TF32 for FP32 operations
+    "explicit_int8": {"INT8": True},
+    "strongly_typed": {"STRONGLY_TYPED": True},  # Network creation flag
+}
+
 
 def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]:
     """
@@ -150,36 +159,105 @@ def load_sample_data_from_info_pkl(
 
 
 class DeploymentConfig:
-    """Configuration container for deployment settings."""
+    """Enhanced configuration container for deployment settings with validation."""
 
     def __init__(self, deploy_cfg: Config):
         self.deploy_cfg = deploy_cfg
-        self.backend_config = deploy_cfg.get("backend_config", {})
-        self.onnx_config = deploy_cfg.get("onnx_config", {})
+        self._validate_config()
+
+    def _validate_config(self) -> None:
+        """Validate configuration structure and required fields."""
+        if "export" not in self.deploy_cfg:
+            raise ValueError(
+                "Missing 'export' section in deploy config. "
+                "Please update your config to the new format with 'export', 'runtime_io' sections."
+            )
+
+        if "runtime_io" not in self.deploy_cfg:
+            raise ValueError("Missing 'runtime_io' section in deploy config.")
+
+        # Validate export mode
+        valid_modes = ["onnx", "trt", "both"]
+        mode = self.export_config.get("mode", "both")
+        if mode not in valid_modes:
+            raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}")
+
+        # Validate precision policy if present
+        backend_cfg = self.deploy_cfg.get("backend_config", {})
+        common_cfg = backend_cfg.get("common_config", {})
+        precision_policy = common_cfg.get("precision_policy", "auto")
+        if precision_policy not in PRECISION_POLICIES:
+            raise ValueError(
+                f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}"
+            )
+
+    @property
+    def export_config(self) -> Dict:
+        """Get export configuration (mode, verify, device, work_dir)."""
+        return self.deploy_cfg.get("export", {})
+
+    @property
+    def runtime_io_config(self) -> Dict:
+        """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file)."""
+        return self.deploy_cfg.get("runtime_io", {})
+
+    @property
+    def should_export_onnx(self) -> bool:
+        """Check if ONNX export is requested."""
+        mode = self.export_config.get("mode", "both")
+        return mode in ["onnx", "both"]
+
+    @property
+    def should_export_tensorrt(self) -> bool:
+        """Check if TensorRT export is requested."""
+        mode = self.export_config.get("mode", "both")
+        return mode in ["trt", "both"]
+
+    @property
+    def should_verify(self) -> bool:
+        """Check if verification is requested."""
+        return self.export_config.get("verify", False)
+
+    @property
+    def device(self) -> str:
+        """Get device for export."""
+        return self.export_config.get("device", "cuda:0")
+
+    @property
+    def work_dir(self) -> str:
+        """Get working directory."""
+        return self.export_config.get("work_dir", os.getcwd())
 
     @property
     def onnx_settings(self) -> Dict:
         """Get ONNX export settings."""
+        onnx_config = self.deploy_cfg.get("onnx_config", {})
         return {
-            "opset_version": self.onnx_config.get("opset_version", 11),
-            "do_constant_folding": self.onnx_config.get("do_constant_folding", True),
-            "input_names": self.onnx_config.get("input_names", ["input"]),
-            "output_names": self.onnx_config.get("output_names", ["output"]),
-            "dynamic_axes": self.onnx_config.get("dynamic_axes"),
-            "export_params": self.onnx_config.get("export_params", True),
-            "keep_initializers_as_inputs": self.onnx_config.get("keep_initializers_as_inputs", False),
-            "save_file": self.onnx_config.get("save_file", "calibration_classifier.onnx"),
+            "opset_version": onnx_config.get("opset_version", 16),
+            "do_constant_folding": onnx_config.get("do_constant_folding", True),
+            "input_names": onnx_config.get("input_names", ["input"]),
+            "output_names": onnx_config.get("output_names", ["output"]),
+            "dynamic_axes": onnx_config.get("dynamic_axes"),
+            "export_params": onnx_config.get("export_params", True),
+            "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False),
+            "save_file": onnx_config.get("save_file", "calibration_classifier.onnx"),
         }
 
     @property
     def tensorrt_settings(self) -> Dict:
-        """Get TensorRT export settings."""
-        common_config = self.backend_config.get("common_config", {})
+        """Get TensorRT export settings with precision policy support."""
+        backend_config = self.deploy_cfg.get("backend_config", {})
+        common_config = backend_config.get("common_config", {})
+
+        # Get precision policy
+        precision_policy = common_config.get("precision_policy", "auto")
+        policy_flags = PRECISION_POLICIES.get(precision_policy, {})
+
         return {
             "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE),
-            "fp16_mode": common_config.get("fp16_mode", False),
-            "model_inputs": self.backend_config.get("model_inputs", []),
-            "strongly_typed": common_config.get("strongly_typed", False),
+            "precision_policy": precision_policy,
+            "policy_flags": policy_flags,
+            "model_inputs": backend_config.get("model_inputs", []),
         }
 
 
@@ -189,20 +267,16 @@ def parse_args() -> argparse.Namespace:
         description="Export CalibrationStatusClassification model to ONNX/TensorRT.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
-    parser.add_argument("deploy_cfg", help="deploy config path")
-    parser.add_argument("model_cfg", help="model config path")
-    parser.add_argument("checkpoint", help="model checkpoint path")
-    parser.add_argument("--info_pkl", required=True, help="info.pkl file path containing calibration data")
-    parser.add_argument("--sample_idx", type=int, default=0, help="sample index from info.pkl (default: 0)")
-    parser.add_argument("--work-dir", default=os.getcwd(), help="output directory")
-    parser.add_argument("--device", default="cpu", help="device for conversion")
-    parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="logging level")
-    parser.add_argument("--verify", action="store_true", help="verify model outputs")
-    parser.add_argument("--onnx", action="store_true", help="export to ONNX format")
-    parser.add_argument("--tensorrt", action="store_true", help="export to TensorRT format")
-    parser.add_argument(
-        "--onnx-file", help="path to existing ONNX file (required when --tensorrt is specified without --onnx)"
-    )
+    parser.add_argument("deploy_cfg", help="Deploy config path")
+    parser.add_argument("model_cfg", help="Model config path")
+    parser.add_argument("checkpoint", help="Model checkpoint path")
+
+    # Optional overrides
+    parser.add_argument("--work-dir", help="Override output directory from config")
+    parser.add_argument("--device", help="Override device from config")
+    parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level")
+    parser.add_argument("--info-pkl", help="Override info.pkl path from config")
+    parser.add_argument("--sample-idx", type=int, help="Override sample index from config")
     return parser.parse_args()
 
 
@@ -263,17 +337,12 @@ def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None:
 def export_to_tensorrt(
     onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger
 ) -> bool:
-    """Export ONNX model to a TensorRT engine.
-
-    Notes:
-      - Always uses EXPLICIT_BATCH.
-      - If strongly_typed=True, disallows precision-changing builder flags (FP16/BF16/FP8/INT8/INT4/FP4).
-        Only TF32 is permitted (controls Tensor Cores for FP32, not dtype).
-    """
+    """Export ONNX model to a TensorRT engine with precision policy support."""
     settings = config.tensorrt_settings
-    strongly_typed = settings.get("strongly_typed", False)
-    fp16_mode = settings.get("fp16_mode", False)
-    tf32_mode = bool(settings.get("tf32_mode", False))
+    precision_policy = settings["precision_policy"]
+    policy_flags = settings["policy_flags"]
+
+    logger.info(f"Building TensorRT engine with precision policy: {precision_policy}")
 
     # Initialize TensorRT
     trt_logger = trt.Logger(trt.Logger.WARNING)
@@ -283,31 +352,23 @@ def export_to_tensorrt(
     builder_config = builder.create_builder_config()
     builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"])
 
-    # Create network and config
+    # Create network with appropriate flags
     flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-    if strongly_typed:
+
+    # Handle strongly typed flag (network creation flag, not builder flag)
+    if policy_flags.get("STRONGLY_TYPED"):
         flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
         logger.info("Using strongly typed TensorRT network creation")
-    network = builder.create_network(flags)
 
-    def _warn_if(flag_name: str, enabled: bool):
-        if enabled:
-            logger.warning(
-                f"'{flag_name}' requested but will be IGNORED because strongly_typed=True. "
-                "Strongly typed mode must derive dtypes from the model/Q-DQ/Cast."
-            )
-
-    if strongly_typed:
-        # Disallow precision-changing flags in strongly typed mode
-        _warn_if("FP16", fp16_mode)
-    else:
-        if fp16_mode:
-            builder_config.set_flag(trt.BuilderFlag.FP16)
-            logger.info("BuilderFlag.FP16 enabled")
+    network = builder.create_network(flags)
 
-    if tf32_mode and hasattr(trt.BuilderFlag, "TF32"):
-        builder_config.set_flag(trt.BuilderFlag.TF32)
-        logger.info("BuilderFlag.TF32 enabled")
+    # Apply precision flags to builder config
+    for flag_name, enabled in policy_flags.items():
+        if flag_name == "STRONGLY_TYPED":
+            continue  # Already handled as network creation flag above
+        if enabled and hasattr(trt.BuilderFlag, flag_name):
+            builder_config.set_flag(getattr(trt.BuilderFlag, flag_name))
+            logger.info(f"BuilderFlag.{flag_name} enabled")
 
     # Setup optimization profile
     profile = builder.create_optimization_profile()
@@ -595,21 +656,6 @@ def main():
     args = parse_args()
     logger = setup_logging(args.log_level)
 
-    # Check if at least one export format is specified
-    if not args.onnx and not args.tensorrt:
-        logger.error("Please specify at least one export format: --onnx or --tensorrt")
-        return
-
-    # Check if TensorRT is requested but no ONNX file is provided and ONNX export is not requested
-    if args.tensorrt and not args.onnx and not args.onnx_file:
-        logger.error(
-            "TensorRT export requires either --onnx (to export ONNX first) or --onnx-file (to use existing ONNX file)"
-        )
-        return
-
-    # Setup
-    mmengine.mkdir_or_exist(osp.abspath(args.work_dir))
-
     # Load configurations
     logger.info(f"Loading deploy config from: {args.deploy_cfg}")
     deploy_cfg = Config.fromfile(args.deploy_cfg)
@@ -618,58 +664,69 @@ def main():
     logger.info(f"Loading model config from: {args.model_cfg}")
     model_cfg = Config.fromfile(args.model_cfg)
 
-    # Setup file paths
+    # Get settings from config with CLI overrides
+    work_dir = args.work_dir if args.work_dir else config.work_dir
+    device = args.device if args.device else config.device
+    info_pkl = args.info_pkl if args.info_pkl else config.runtime_io_config.get("info_pkl")
+    sample_idx = args.sample_idx if args.sample_idx is not None else config.runtime_io_config.get("sample_idx", 0)
+    existing_onnx = config.runtime_io_config.get("onnx_file")
+
+    # Validate required parameters
+    if not info_pkl:
+        logger.error("info_pkl path must be provided either in config or via --info-pkl")
+        return
+
+    # Setup working directory
+    mmengine.mkdir_or_exist(osp.abspath(work_dir))
+    logger.info(f"Working directory: {work_dir}")
+    logger.info(f"Device: {device}")
+    logger.info(f"Export mode: {config.export_config.get('mode', 'both')}")
+
+    # Determine export paths
     onnx_path = None
     trt_path = None
-    trt_file = None
 
-    if args.onnx:
+    if config.should_export_onnx:
         onnx_settings = config.onnx_settings
-        onnx_path = osp.join(args.work_dir, onnx_settings["save_file"])
+        onnx_path = osp.join(work_dir, onnx_settings["save_file"])
 
-    if args.tensorrt:
-        if args.onnx_file:
-            # Use provided ONNX file path
-            onnx_path = args.onnx_file
+    if config.should_export_tensorrt:
+        # Use existing ONNX if provided, otherwise use the one we'll export
+        if existing_onnx and not config.should_export_onnx:
+            onnx_path = existing_onnx
             if not osp.exists(onnx_path):
                 logger.error(f"Provided ONNX file does not exist: {onnx_path}")
                 return
             logger.info(f"Using existing ONNX file: {onnx_path}")
-        elif args.onnx:
-            # ONNX will be exported first, use the path from config
-            onnx_settings = config.onnx_settings
-            onnx_path = osp.join(args.work_dir, onnx_settings["save_file"])
-        else:
-            # This should not happen due to validation above, but just in case
-            logger.error("No ONNX file path available for TensorRT conversion")
+        elif not onnx_path:
+            # Need ONNX for TensorRT but neither export nor existing file specified
+            logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.")
             return
 
         # Set TensorRT output path
         onnx_settings = config.onnx_settings
         trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
-        trt_path = osp.join(args.work_dir, trt_file)
+        trt_path = osp.join(work_dir, trt_file)
+
     # Load model
     logger.info(f"Loading model from checkpoint: {args.checkpoint}")
-    device = torch.device(args.device)
+    device = torch.device(device)
     model = get_model(model_cfg, args.checkpoint, device=device)
 
     # Load sample data
-    logger.info(f"Loading sample data from info.pkl: {args.info_pkl}")
-    input_tensor_calibrated = load_sample_data_from_info_pkl(
-        args.info_pkl, model_cfg, 0.0, args.sample_idx, device=args.device
-    )
-    input_tensor_miscalibrated = load_sample_data_from_info_pkl(
-        args.info_pkl, model_cfg, 1.0, args.sample_idx, device=args.device
-    )
+    logger.info(f"Loading sample data from info.pkl: {info_pkl}")
+    input_tensor_calibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 0.0, sample_idx, device=device)
+    input_tensor_miscalibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 1.0, sample_idx, device=device)
 
     # Export models
-    if args.onnx:
+    if config.should_export_onnx:
         export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger)
-    if args.tensorrt:
+
+    if config.should_export_tensorrt:
         logger.info("Converting ONNX to TensorRT...")
 
         # Ensure CUDA device for TensorRT
-        if args.device == "cpu":
+        if not device.startswith("cuda"):
             logger.warning("TensorRT requires CUDA device, switching to cuda")
             device = torch.device("cuda")
             input_tensor_calibrated = input_tensor_calibrated.to(device)
@@ -682,14 +739,15 @@ def main():
             logger.error("TensorRT conversion failed")
 
     # Run verification if requested
-    if args.verify:
+    if config.should_verify:
         logger.info(
             "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..."
         )
         input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated}
-        # Only verify formats that were exported
-        onnx_path_for_verify = onnx_path if (args.onnx or (args.tensorrt and args.onnx_file)) else None
-        trt_path_for_verify = trt_path if args.tensorrt else None
+
+        # Only verify formats that were exported or provided
+        onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None
+        trt_path_for_verify = trt_path if config.should_export_tensorrt else None
 
         run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger)
 
@@ -697,14 +755,13 @@ def main():
 
     # Log what was exported
     exported_formats = []
-    if args.onnx:
+    if config.should_export_onnx:
         exported_formats.append("ONNX")
-    if args.tensorrt:
+    if config.should_export_tensorrt:
         exported_formats.append("TensorRT")
 
     logger.info(f"Exported formats: {', '.join(exported_formats)}")
 
 
-# TODO: make deployment script inherit from awml base deploy script or use awml deploy script directly
 if __name__ == "__main__":
     main()
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index 4ebcd141..7e2ce683 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -5,14 +5,14 @@ It contains training, evaluation, and visualization for Calibration classificati
 
 - [Support priority](https://github.com/tier4/AWML/blob/main/docs/design/autoware_ml_design.md#support-priority): Tier B
 - Supported dataset
-  - [] NuScenes
+  - [ ] NuScenes
   - [x] T4dataset
 - Other supported feature
   - [ ] Add unit test
 
 ## 1. Setup environment
 
-Please follow the [installation tutorial](/docs/tutorial/tutorial_detection_3d.md)to set up the environment.
+Please follow the [installation tutorial](/docs/tutorial/tutorial_detection_3d.md) to set up the environment.
 
 ## 2. Prepare dataset
 
@@ -26,7 +26,7 @@ Prepare the dataset you use.
 docker run -it --rm --gpus --shm-size=64g --name awml -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml
 ```
 
-- Make info files for T4dataset X2 Gen2
+- Create info files for T4dataset X2 Gen2
 
 ```sh
 python tools/calibration_classification/create_data_t4dataset.py --config /workspace/autoware_ml/configs/calibration_classification/dataset/t4dataset/gen2_base.py --version gen2_base --root_path ./data/t4dataset -o ./data/t4dataset/calibration_info/
@@ -124,19 +124,19 @@ Each file contains calibration information including:
 
 ```sh
 # Process all samples from info.pkl
-python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization
+python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization
 
 # Process specific sample
-python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization --sample_idx 0
+python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization --sample_idx 0
 
 # Process specific indices
-python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization --indices 0 1 2
+python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/info.pkl --data_root data/ --output_dir ./work_dirs/calibration_visualization --indices 0 1 2
 ```
 
 - For T4dataset visualization:
 
 ```sh
-python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --data_root data/t4dataset --output_dir ./work_dirs/calibration_visualization
+python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --data_root data/t4dataset --output_dir ./work_dirs/calibration_visualization
 ```
 
 ## 3. Visualization Settings (During training, validation, testing)
@@ -174,7 +174,7 @@ dict(
 
 ### 3.3. Usage Strategy
 
-**Training/Validataion Phase:**
+**Training/Validation Phase:**
 - Disable visualization for efficiency: `projection_vis_dir=None`, `results_vis_dir=None`
 - Focus on model training performance
 
@@ -188,8 +188,8 @@ dict(
 - `results_vis_dir/`: Classification results with predicted labels
 
 ## 4. Train
-### 4.1. Environment set up
-Set `CUBLAS_WORKSPACE_CONFIG` for the deterministic behavior, plese check this [nvidia doc](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility) for more info
+### 4.1. Environment Setup
+Set `CUBLAS_WORKSPACE_CONFIG` for deterministic behavior, please check this [nvidia doc](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility) for more info
 
 ```sh
 export CUBLAS_WORKSPACE_CONFIG=:4096:8
@@ -214,16 +214,16 @@ python tools/calibration_classification/train.py projects/CalibrationStatusClass
 docker run -it --rm --gpus --name autoware-ml --shm-size=64g -d -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml bash -c 'python tools/calibration_classification/train.py {config_file}'
 ```
 
-### 4.3. Log analysis by Tensorboard
+### 4.3. Log Analysis with TensorBoard
 
-- Run the TensorBoard and navigate to http://127.0.0.1:6006/
+- Run TensorBoard and navigate to http://127.0.0.1:6006/
 
 ```sh
 tensorboard --logdir work_dirs --bind_all
 ```
 
-## 5. Analyze
-### 5.1. Evaluation Pytorch
+## 5. Evaluation
+### 5.1. PyTorch Model Evaluation
 
 - Evaluation
 
@@ -238,56 +238,138 @@ python tools/calibration_classification/test.py projects/CalibrationStatusClassi
 
 
 ## 6. Deployment
-This directory contains scripts for evaluating calibration classification models in different formats.
 
-### 6.1 Convert from Pytorch to ONNX and TensorRT
-```sh
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx --tensorrt
+The deployment system supports exporting models to ONNX and TensorRT formats with a unified configuration approach.
+
+### 6.1. Deployment Configuration
+
+The deployment uses a **config-first approach** where all settings are defined in the deployment config file. The config has three main sections:
+
+#### Configuration Structure
+
+```python
+# Export configuration - controls export behavior
+export = dict(
+    mode="both",                       # "onnx", "trt", or "both"
+    verify=True,                       # Run verification comparing outputs
+    device="cuda:0",                   # Device for export
+    work_dir="/workspace/work_dirs",   # Output directory
+)
+
+# Runtime I/O configuration - data paths
+runtime_io = dict(
+    info_pkl="data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl",
+    sample_idx=0,                      # Sample for export/verification
+    onnx_file=None,                    # Optional: existing ONNX path
+)
+
+# TensorRT backend configuration
+backend_config = dict(
+    type="tensorrt",
+    common_config=dict(
+        max_workspace_size=1 << 30,    # 1 GiB
+        precision_policy="auto",        # Precision policy (see below)
+    ),
+    model_inputs=[...],                 # Dynamic shape configuration
+)
 ```
 
-### 6.1.1 Convert from Pytorch to ONNX
+#### Precision Policies
+
+Control TensorRT inference precision with the `precision_policy` parameter:
+
+| Policy | Description | Use Case |
+|--------|-------------|----------|
+| `auto` | TensorRT decides automatically | Default, FP32 |
+| `fp16` | Half-precision floating point | 2x faster, small accuracy loss |
+| `fp32_tf32` | Tensor Cores for FP32 | Ampere+ GPUs, FP32 speedup |
+| `strongly_typed` | Enforces explicit type constraints, preserves QDQ (Quantize-Dequantize) nodes | INT8 quantized models from QAT or PTQ with explicit Q/DQ ops |
+
+### 6.2. Basic Usage
+
+#### Export to Both ONNX and TensorRT
+
+All settings come from the config file:
+
 ```sh
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --onnx
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    checkpoint.pth
 ```
 
-### 6.1.2 Convert from ONNX to TensorRT
-```sh
-# Without verification (checkpoint not needed)
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py dummy.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --tensorrt --onnx-file model.onnx
+#### Override Config Settings via CLI
 
-# With verification (checkpoint required for reference)
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify --tensorrt --onnx-file model.onnx
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    checkpoint.pth \
+    --work-dir ./custom_output \
+    --device cuda:1 \
+    --info-pkl /path/to/custom/info.pkl \
+    --sample-idx 5
 ```
 
-**Note:** When converting from ONNX to TensorRT without `--verify`, the checkpoint file is not actually used (you can use a dummy filename). However, with `--verify`, a valid checkpoint is required to compare outputs.
+### 6.3. Export Modes
+
+#### Export to ONNX Only
+
+In config file, set `export.mode = "onnx"`.
+
+
+#### Convert Existing ONNX to TensorRT
+
+In config file:
+- Set `export.mode = "trt"`
+- Set `runtime_io.onnx_file = "/path/to/model.onnx"`
+
 
+**Note:** The checkpoint is used for verification. To skip verification, set `export.verify = False` in config.
 
 
 
-### 6.2 Evaluate ONNX and tensorrt
 
-##### ONNX Model Evaluation
+
+
+### 6.4. Evaluate ONNX and TensorRT Models
+
+#### ONNX Model Evaluation
+
 ```bash
-python tools/calibration_classification/evaluate_inference.py --onnx work_dirs/end2end.onnx --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
+python tools/calibration_classification/evaluate_inference.py \
+    --onnx work_dirs/end2end.onnx \
+    --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py \
+    --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
 ```
 
-##### TensorRT Model Evaluation
+#### TensorRT Model Evaluation
+
 ```bash
-python tools/calibration_classification/evaluate_inference.py --tensorrt work_dirs/end2end.quant.engine --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
+python tools/calibration_classification/evaluate_inference.py \
+    --tensorrt work_dirs/end2end.engine \
+    --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py \
+    --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
 ```
 
+#### Command Line Arguments
 
+- `--onnx`: Path to ONNX model file (mutually exclusive with `--tensorrt`)
+- `--tensorrt`: Path to TensorRT engine file (mutually exclusive with `--onnx`)
+- `--model-cfg`: Path to model config file
+- `--info-pkl`: Path to dataset info file
+- `--device`: Device to use for inference (`cpu` or `cuda`, default: `cpu`)
+- `--log-level`: Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, default: `INFO`)
 
+### 6.5. INT8 Quantization
 
-### 6.3 INT8 Optimization
-
-Set the number of image you want for calibration. Note that you need to consider the size of your memory.
-For 32 GB, the most you can do is 1860 x 2880 x 5 x 4 Bytes / 32 GB
+Set the number of images you want for calibration. Note that you need to consider the size of your memory.
+For 32 GB memory, the maximum you can use is approximately: 1860 x 2880 x 5 x 4 Bytes / 32 GB
 
-Thus, please limit the calibration data with the indices parameters
+Therefore, please limit the calibration data using the indices parameter
 
 ```python
-python tools/calibration_classification/visualize_lidar_camera_projection.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200
+python tools/calibration_classification/toolkit.py  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200
 ```
 
 
@@ -298,15 +380,5 @@ docker run -it --rm --gpus all --shm-size=32g --name awml-opt -p 6006:6006 -v $P
 # Access the optimization docker
 python3 -m modelopt.onnx.quantization --onnx_path=end2end.onnx --quantize_mode=int8 --calibration_data_path=calibration_file.npz
 
-# After getting the end2en2.quant.onnx, you can evaluate with quant onnx or engine
+# After getting the end2end.quant.onnx, you can evaluate with quant onnx or conver to int8 engine by following section 6.3
 ```
-
-
-
-#### Command Line Arguments
-
-- `--onnx`: Path to ONNX model file (mutually exclusive with `--tensorrt`)
-- `--tensorrt`: Path to TensorRT engine file (mutually exclusive with `--onnx`)
-- `--model-cfg`: Path to model config file (default: `projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py`)
-- `--device`: Device to use for inference (`cpu` or `cuda`, default: `cpu`)
-- `--log-level`: Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, default: `INFO`)

From b11f2a57200006eaf142e1184089ca1c2d2270f6 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 1 Oct 2025 17:38:22 +0900
Subject: [PATCH 06/19] chore: fix readme

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../deploy/main.py                            |  2 +-
 tools/calibration_classification/README.md    |  6 +-
 .../evaluate_inference.py                     | 76 +++++++++++--------
 3 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index bcfadc3b..32084dc4 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -726,7 +726,7 @@ def main():
         logger.info("Converting ONNX to TensorRT...")
 
         # Ensure CUDA device for TensorRT
-        if not device.startswith("cuda"):
+        if device.type != "cuda":
             logger.warning("TensorRT requires CUDA device, switching to cuda")
             device = torch.device("cuda")
             input_tensor_calibrated = input_tensor_calibrated.to(device)
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index 7e2ce683..9de32e1a 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -339,7 +339,7 @@ In config file:
 ```bash
 python tools/calibration_classification/evaluate_inference.py \
     --onnx work_dirs/end2end.onnx \
-    --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py \
+    --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
     --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
 ```
 
@@ -348,7 +348,7 @@ python tools/calibration_classification/evaluate_inference.py \
 ```bash
 python tools/calibration_classification/evaluate_inference.py \
     --tensorrt work_dirs/end2end.engine \
-    --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py \
+    --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
     --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
 ```
 
@@ -369,7 +369,7 @@ For 32 GB memory, the maximum you can use is approximately: 1860 x 2880 x 5 x 4
 Therefore, please limit the calibration data using the indices parameter
 
 ```python
-python tools/calibration_classification/toolkit.py  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200
+python tools/calibration_classification/toolkit.py  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200
 ```
 
 
diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py
index 95f7fd1a..1842ef01 100644
--- a/tools/calibration_classification/evaluate_inference.py
+++ b/tools/calibration_classification/evaluate_inference.py
@@ -178,22 +178,11 @@ def create_test_transform(model_cfg: Config) -> CalibrationClassificationTransfo
 
 
 def run_onnx_inference(
-    onnx_path: str,
+    ort_session,
     input_tensor: torch.Tensor,
     logger: logging.Logger,
 ) -> Tuple[np.ndarray, float]:
     """Run ONNX inference directly and return output and latency."""
-    try:
-        import onnxruntime as ort
-    except ImportError:
-        raise ImportError(
-            "onnxruntime is required for ONNX inference. Please install it with: pip install onnxruntime"
-        )
-
-    # Clear GPU cache if available
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-
     # Convert input tensor to float32
     input_tensor = input_tensor.float()
 
@@ -207,16 +196,8 @@ def run_onnx_inference(
         input_tensor = input_tensor.unsqueeze(0)  # Add batch dimension
         logger.debug(f"Added batch dimension - Shape: {input_tensor.shape}")
 
-    # ONNX inference with timing
-    providers = ["CPUExecutionProvider"]
-    ort_session = ort.InferenceSession(onnx_path, providers=providers)
-
-    # Debug: Print ONNX model input info
+    # Get input name from session
     input_name = ort_session.get_inputs()[0].name
-    input_shape = ort_session.get_inputs()[0].shape
-    input_type = ort_session.get_inputs()[0].type
-    logger.debug(f"ONNX model expects - Input name: {input_name}, Shape: {input_shape}, Type: {input_type}")
-
     onnx_input = {input_name: input_tensor.cpu().numpy().astype(np.float32)}
 
     start_time = time.perf_counter()
@@ -271,19 +252,23 @@ def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.L
 
         # Validate input shape
         expected_shape = engine.get_tensor_shape(input_name)
-        if input_np.shape != tuple(expected_shape):
-            logger.warning(f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}")
-            # Try to reshape if possible
-            if len(input_np.shape) == len(expected_shape):
-                try:
-                    input_np = input_np.reshape(expected_shape)
-                    logger.info(f"Reshaped input to {input_np.shape}")
-                except Exception as e:
-                    raise RuntimeError(f"Cannot reshape input from {input_np.shape} to {expected_shape}: {e}")
-            elif len(input_np.shape) == len(expected_shape) - 1 and expected_shape[0] == 1:
+
+        # Check if shapes are compatible (considering -1 as dynamic dimension)
+        def shapes_compatible(actual_shape, expected_shape):
+            """Check if actual shape is compatible with expected shape (which may contain -1 for dynamic dims)."""
+            if len(actual_shape) != len(expected_shape):
+                return False
+            for actual_dim, expected_dim in zip(actual_shape, expected_shape):
+                if expected_dim != -1 and actual_dim != expected_dim:
+                    return False
+            return True
+
+        if not shapes_compatible(input_np.shape, expected_shape):
+            # Only warn/error if shapes are truly incompatible
+            if len(input_np.shape) == len(expected_shape) - 1 and expected_shape[0] == 1:
                 # Add batch dimension if missing
                 try:
-                    input_np = input_np.reshape(expected_shape)
+                    input_np = np.expand_dims(input_np, axis=0)
                     logger.info(f"Added batch dimension to input: {input_np.shape}")
                 except Exception as e:
                     raise RuntimeError(
@@ -293,6 +278,10 @@ def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.L
                 raise RuntimeError(
                     f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}. Please ensure input has correct batch dimension."
                 )
+        else:
+            # Shapes are compatible (dynamic dimensions match), use actual shape
+            if tuple(expected_shape) != input_np.shape:
+                logger.debug(f"Using dynamic input shape {input_np.shape} (engine expects {expected_shape})")
 
         context.set_input_shape(input_name, input_np.shape)
         output_shape = context.get_tensor_shape(output_name)
@@ -400,8 +389,29 @@ def evaluate_model(
 
     # Initialize inference engine
     if model_type == "onnx":
+        try:
+            import onnxruntime as ort
+        except ImportError:
+            raise ImportError(
+                "onnxruntime is required for ONNX inference. Please install it with: pip install onnxruntime"
+            )
+
         logger.info(f"Using ONNX model: {model_path}")
-        inference_func = lambda input_tensor: run_onnx_inference(model_path, input_tensor, logger)
+        logger.info("Creating ONNX InferenceSession (one-time setup)...")
+
+        # Create session once and reuse it
+        providers = ["CPUExecutionProvider"]
+        ort_session = ort.InferenceSession(model_path, providers=providers)
+
+        # Debug: Print ONNX model input info
+        input_name = ort_session.get_inputs()[0].name
+        input_shape = ort_session.get_inputs()[0].shape
+        input_type = ort_session.get_inputs()[0].type
+        logger.info(f"ONNX model expects - Input name: {input_name}, Shape: {input_shape}, Type: {input_type}")
+        logger.info("ONNX InferenceSession created successfully.")
+
+        # Bind session to closure for reuse
+        inference_func = lambda input_tensor: run_onnx_inference(ort_session, input_tensor, logger)
     elif model_type == "tensorrt":
         logger.info(f"Using TensorRT model: {model_path}")
         engine = load_tensorrt_engine(model_path, logger)

From a8298b4b36051e2dce48dd16f3b2b0e825983c50 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 1 Oct 2025 19:35:36 +0900
Subject: [PATCH 07/19] chore: fix for latest update

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../evaluate_inference.py                     | 137 +++++++++++-------
 1 file changed, 82 insertions(+), 55 deletions(-)

diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py
index 1842ef01..b4dcf9f8 100644
--- a/tools/calibration_classification/evaluate_inference.py
+++ b/tools/calibration_classification/evaluate_inference.py
@@ -111,70 +111,41 @@ def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any
 
 def load_sample_data_from_info_pkl(
     info_pkl_path: str,
-    model_cfg: Config,
+    transform: CalibrationClassificationTransform,
     sample_idx: int = 0,
     device: str = "cpu",
-    transform_test: Optional[CalibrationClassificationTransform] = None,
-) -> torch.Tensor:
+    miscalibration_probability: float = 0.0,
+) -> Tuple[torch.Tensor, int]:
     """
     Load and preprocess sample data from info.pkl using CalibrationClassificationTransform.
     Args:
         info_pkl_path: Path to the info.pkl file
-        model_cfg: Model configuration containing data_root setting
+        transform: Reusable CalibrationClassificationTransform instance
         sample_idx: Index of the sample to load (default: 0)
         device: Device to load tensor on
-        transform_test: Pre-created test transform instance (optional)
+        miscalibration_probability: Probability to generate miscalibrated samples (0.0=calibrated, 1.0=miscalibrated)
     Returns:
-        Preprocessed tensor ready for model inference
+        Tuple of (preprocessed tensor ready for model inference, ground truth label)
     """
     # Load sample data from info.pkl
     sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
 
-    # Get data_root from model config
-    data_root = model_cfg.get("data_root", None)
-    if data_root is None:
-        raise ValueError("data_root not found in model configuration")
+    # Ensure sample_idx is in the data
+    sample_data["sample_idx"] = sample_idx
 
-    # Use pre-created transform or create new one (always use test mode)
-    if transform_test is None:
-        transform = CalibrationClassificationTransform(
-            transform_config=model_cfg.get("transform_config", None),
-            mode="test",
-            data_root=data_root,
-            projection_vis_dir=None,
-            results_vis_dir=None,
-            enable_augmentation=False,
-        )
-    else:
-        transform = transform_test
+    # Update transform's miscalibration probability (reuse existing transform instance)
+    transform.miscalibration_probability = miscalibration_probability
 
     # Apply transform with miscalibration control
     results = transform.transform(sample_data)
     input_data_processed = results["fused_img"]  # (H, W, 5)
+    gt_label = results["gt_label"]  # 0=miscalibrated, 1=calibrated
 
     # Convert to tensor
     input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float()  # (5, H, W)
     input_tensor = input_tensor.unsqueeze(0).to(device)  # (1, 5, H, W)
 
-    return input_tensor
-
-
-def create_test_transform(model_cfg: Config) -> CalibrationClassificationTransform:
-    """Create test transform instance once for reuse."""
-    data_root = model_cfg.get("data_root", None)
-    if data_root is None:
-        raise ValueError("data_root not found in model configuration")
-
-    transform_test = CalibrationClassificationTransform(
-        transform_config=model_cfg.get("transform_config", None),
-        mode="test",
-        data_root=data_root,
-        projection_vis_dir=None,
-        results_vis_dir=None,
-        enable_augmentation=False,
-    )
-
-    return transform_test
+    return input_tensor, gt_label
 
 
 def run_onnx_inference(
@@ -246,7 +217,12 @@ def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.L
             raise RuntimeError("Could not find input/output tensor names")
 
         # Prepare arrays
-        input_np = input_tensor.numpy().astype(np.float32)
+        # Convert torch tensor to numpy (handle both CPU and CUDA tensors)
+        if hasattr(input_tensor, "cpu"):
+            input_np = input_tensor.cpu().numpy().astype(np.float32)
+        else:
+            input_np = input_tensor.astype(np.float32)
+
         if not input_np.flags["C_CONTIGUOUS"]:
             input_np = np.ascontiguousarray(input_np)
 
@@ -384,8 +360,20 @@ def evaluate_model(
         logger.error(f"Failed to load info.pkl: {e}")
         raise
 
-    # Create transform instances once
-    transform_test = create_test_transform(model_cfg)
+    # Initialize transform once and reuse it for all samples
+    data_root = model_cfg.get("data_root", None)
+    if data_root is None:
+        raise ValueError("data_root not found in model configuration")
+
+    transform = CalibrationClassificationTransform(
+        transform_config=model_cfg.get("transform_config", None),
+        mode="test",
+        data_root=data_root,
+        projection_vis_dir=None,
+        results_vis_dir=None,
+        enable_augmentation=False,
+        miscalibration_probability=0.0,  # Will be updated per sample
+    )
 
     # Initialize inference engine
     if model_type == "onnx":
@@ -399,10 +387,37 @@ def evaluate_model(
         logger.info(f"Using ONNX model: {model_path}")
         logger.info("Creating ONNX InferenceSession (one-time setup)...")
 
+        # Configure execution providers based on device
+        if device == "cuda":
+            # Try to use CUDA provider, fall back to CPU if not available
+            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+            logger.info("Attempting to use CUDA acceleration for ONNX inference...")
+            try:
+                available_providers = ort.get_available_providers()
+                if "CUDAExecutionProvider" not in available_providers:
+                    logger.warning(
+                        "CUDAExecutionProvider not available. Available providers: {}. "
+                        "Install onnxruntime-gpu for CUDA acceleration: pip install onnxruntime-gpu".format(
+                            available_providers
+                        )
+                    )
+                    logger.warning("Falling back to CPU execution")
+                    providers = ["CPUExecutionProvider"]
+                else:
+                    logger.info("CUDAExecutionProvider is available")
+            except Exception as e:
+                logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU")
+                providers = ["CPUExecutionProvider"]
+        else:
+            providers = ["CPUExecutionProvider"]
+            logger.info("Using CPU for ONNX inference")
+
         # Create session once and reuse it
-        providers = ["CPUExecutionProvider"]
         ort_session = ort.InferenceSession(model_path, providers=providers)
 
+        # Log which provider is actually being used
+        logger.info(f"ONNX session using providers: {ort_session.get_providers()}")
+
         # Debug: Print ONNX model input info
         input_name = ort_session.get_inputs()[0].name
         input_shape = ort_session.get_inputs()[0].shape
@@ -414,6 +429,10 @@ def evaluate_model(
         inference_func = lambda input_tensor: run_onnx_inference(ort_session, input_tensor, logger)
     elif model_type == "tensorrt":
         logger.info(f"Using TensorRT model: {model_path}")
+        # TensorRT has its own CUDA memory management, force CPU device for data preparation
+        # if device != "cpu":
+        #     logger.warning(f"TensorRT inference requires CPU device for data preparation. Overriding device from '{device}' to 'cpu'")
+        #     device = "cpu"
         engine = load_tensorrt_engine(model_path, logger)
         inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor, logger)
     else:
@@ -431,26 +450,28 @@ def evaluate_model(
             logger.info(f"Processing sample {sample_idx + 1}/{num_samples}")
 
         try:
-            # Load sample data directly from info.pkl
-            input_tensor_calibrated = load_sample_data_from_info_pkl(
+            # Load sample data directly from info.pkl - generate calibrated sample
+            input_tensor_calibrated, gt_label_calibrated = load_sample_data_from_info_pkl(
                 info_pkl_path,
-                model_cfg,
+                transform,
                 sample_idx,
                 device=device,
-                transform_test=transform_test,
+                miscalibration_probability=0.0,  # Always calibrated
             )
-            input_tensor_miscalibrated = load_sample_data_from_info_pkl(
+
+            # Load sample data again - generate miscalibrated sample
+            input_tensor_miscalibrated, gt_label_miscalibrated = load_sample_data_from_info_pkl(
                 info_pkl_path,
-                model_cfg,
+                transform,
                 sample_idx,
                 device=device,
-                transform_test=transform_test,
+                miscalibration_probability=1.0,  # Always miscalibrated
             )
 
             # Test both calibrated and miscalibrated samples
             test_samples = [
-                (input_tensor_calibrated, 1),  # calibrated sample
-                (input_tensor_miscalibrated, 0),  # miscalibrated sample
+                (input_tensor_calibrated, gt_label_calibrated),
+                (input_tensor_miscalibrated, gt_label_miscalibrated),
             ]
 
             for input_tensor, gt_label in test_samples:
@@ -483,6 +504,8 @@ def evaluate_model(
                     logger.info(
                         f"Sample {sample_idx + 1} (GT={gt_label}): Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms"
                     )
+                    logger.info(f"  Raw logits: {logits.squeeze()}")
+                    logger.info(f"  Probabilities: {probabilities.squeeze()}")
 
             # Clear GPU memory periodically for TensorRT
             if model_type == "tensorrt" and sample_idx % 10 == 0:
@@ -596,7 +619,11 @@ def main():
         help="Number of samples to evaluate (default: 10)",
     )
     parser.add_argument(
-        "--device", type=str, default="cpu", choices=["cpu", "cuda"], help="Device to use for inference"
+        "--device",
+        type=str,
+        default="cpu",
+        choices=["cpu", "cuda"],
+        help="Device to use for inference. For ONNX: enables CUDA acceleration (requires onnxruntime-gpu). For TensorRT: always uses CUDA internally regardless of this setting.",
     )
     parser.add_argument(
         "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging level"

From 3cb94ce7771aa49d7e067d36084d3bfdcb514a9a Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 1 Oct 2025 20:49:51 +0900
Subject: [PATCH 08/19] chore: combine evaluation script with deploy script

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../CalibrationStatusClassification/README.md |  72 +++-
 .../configs/deploy/resnet18_5ch.py            |  31 +-
 .../deploy/main.py                            | 396 +++++++++++++++++-
 .../evaluate_inference.py                     |  47 +--
 4 files changed, 511 insertions(+), 35 deletions(-)

diff --git a/projects/CalibrationStatusClassification/README.md b/projects/CalibrationStatusClassification/README.md
index 3790ea2b..76ed49b1 100644
--- a/projects/CalibrationStatusClassification/README.md
+++ b/projects/CalibrationStatusClassification/README.md
@@ -66,14 +66,80 @@ python tools/calibration_classification/train.py projects/CalibrationStatusClass
 
 ### 5. Deploy
 
-Example commands for deployment (modify paths if needed):
+The deployment script supports model export, verification, and evaluation.
 
-- Custom script (with verification):
+#### 5.1 Basic Export (ONNX and TensorRT)
+
+Export model to ONNX and TensorRT formats:
+
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+  projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+  checkpoint.pth \
+  --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \
+  --device cuda:0 \
+  --work-dir ./work_dirs/
+```
+
+#### 5.2 Export with Verification
+
+Verify exported models against PyTorch reference on single sample:
+
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+  projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+  checkpoint.pth \
+  --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \
+  --sample-idx 0 \
+  --device cuda:0 \
+  --work-dir ./work_dirs/ \
+  --verify
+```
+
+#### 5.3 Full Model Evaluation
+
+Evaluate exported models on multiple samples with comprehensive metrics:
 
 ```sh
-python projects/CalibrationStatusClassification/deploy/main.py projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py checkpoint.pth --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --sample_idx 0 --device cuda:0 --work-dir /workspace/work_dirs/ --verify
+python projects/CalibrationStatusClassification/deploy/main.py \
+  projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+  checkpoint.pth \
+  --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \
+  --device cuda:0 \
+  --work-dir ./work_dirs/ \
+  --evaluate \
+  --num-samples 50 \
+  --verbose
 ```
 
+**Evaluation output includes:**
+- Overall accuracy and per-class accuracy
+- Confusion matrix
+- Latency statistics (mean, median, P95, P99, min, max, std)
+- Throughput (samples/sec)
+
+#### 5.4 Evaluate Existing Models (without re-export)
+
+To evaluate previously exported models without re-exporting:
+
+```sh
+# Evaluate ONNX model
+python projects/CalibrationStatusClassification/deploy/main.py \
+  projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+  checkpoint.pth \
+  --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \
+  --device cuda:0 \
+  --work-dir ./work_dirs/ \
+  --evaluate \
+  --num-samples 100
+```
+
+**Note:** The script will automatically detect and evaluate existing ONNX/TensorRT models in the work directory.
+
 ## Troubleshooting
 
 ## Reference
diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
index 729b0ed7..02d66642 100644
--- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
@@ -30,6 +30,20 @@
     # - If None with mode="both", will export ONNX first then convert
 )
 
+# ==============================================================================
+# Evaluation Configuration
+# ==============================================================================
+evaluation = dict(
+    enabled=True,  # Enable full model evaluation (set to True to run evaluation)
+    num_samples=100,  # Number of samples to evaluate from info.pkl
+    verbose=True,  # Enable verbose logging showing per-sample results
+    # Optional: Specify models to evaluate (if None, uses exported models from work_dir)
+    onnx_model="/workspace/work_dirs/end2end.onnx",  # Path to ONNX model file to evaluate (e.g., "/path/to/model.onnx")
+    tensorrt_model="/workspace/work_dirs/end2end.engine",  # Path to TensorRT engine file to evaluate (e.g., "/path/to/model.engine")
+    # Note: If models are None, will automatically detect and evaluate exported models
+    # Note: Command line args (--evaluate, --num-samples, --verbose) override these settings
+)
+
 # ==============================================================================
 # Codebase Configuration
 # ==============================================================================
@@ -101,13 +115,28 @@
 #   2) Set runtime_io.onnx_file = "/path/to/existing/model.onnx"
 #   3) Run the command above
 #
+# Enable evaluation in config:
+#   1) Set evaluation.enabled = True
+#   2) Set evaluation.num_samples = 100  # Adjust as needed
+#   3) Optionally set evaluation.onnx_model and/or evaluation.tensorrt_model
+#   4) Run the command above
+#
+# Evaluate specific models:
+#   1) Set evaluation.enabled = True
+#   2) Set evaluation.onnx_model = "/path/to/model.onnx"
+#   3) Set evaluation.tensorrt_model = "/path/to/model.engine"
+#   4) Run the command above
+#
 # Override config settings via command line:
 #   python projects/CalibrationStatusClassification/deploy/main.py \
 #       <deploy_cfg> <model_cfg> <checkpoint> \
 #       --work-dir ./custom_output \
 #       --device cuda:1 \
 #       --info-pkl /path/to/custom/info.pkl \
-#       --sample-idx 5
+#       --sample-idx 5 \
+#       --evaluate \
+#       --num-samples 50 \
+#       --verbose
 #
 # Available precision policies:
 #   - auto: Let TensorRT decide (default, good balance)
diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index 32084dc4..1700917d 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -6,12 +6,15 @@
 
 Features:
 - ONNX export with optimization
-- TensorRT conversion
-- Dual verification (ONNX + TensorRT)
-- Performance benchmarking
+- TensorRT conversion with precision policy support
+- Dual verification (ONNX + TensorRT) on single samples
+- Full model evaluation on multiple samples with metrics
+- Performance benchmarking with latency statistics
+- Confusion matrix and per-class accuracy analysis
 """
 
 import argparse
+import gc
 import logging
 import os
 import os.path as osp
@@ -28,6 +31,7 @@
 import pycuda.driver as cuda
 import tensorrt as trt
 import torch
+import torch.nn.functional as F
 from mmengine.config import Config
 from mmpretrain.apis import get_model
 
@@ -201,6 +205,11 @@ def runtime_io_config(self) -> Dict:
         """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file)."""
         return self.deploy_cfg.get("runtime_io", {})
 
+    @property
+    def evaluation_config(self) -> Dict:
+        """Get evaluation configuration (enabled, num_samples, verbose, model paths)."""
+        return self.deploy_cfg.get("evaluation", {})
+
     @property
     def should_export_onnx(self) -> bool:
         """Check if ONNX export is requested."""
@@ -277,6 +286,14 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level")
     parser.add_argument("--info-pkl", help="Override info.pkl path from config")
     parser.add_argument("--sample-idx", type=int, help="Override sample index from config")
+
+    # Evaluation mode
+    parser.add_argument("--evaluate", action="store_true", help="Run full evaluation on multiple samples")
+    parser.add_argument(
+        "--num-samples", type=int, default=10, help="Number of samples to evaluate (only with --evaluate)"
+    )
+    parser.add_argument("--verbose", action="store_true", help="Enable verbose logging during evaluation")
+
     return parser.parse_args()
 
 
@@ -651,6 +668,292 @@ def run_verification(
     logger.info("=" * 50)
 
 
+def evaluate_exported_model(
+    model_path: str,
+    model_type: str,
+    model_cfg: Config,
+    info_pkl_path: str,
+    logger: logging.Logger,
+    device: str = "cpu",
+    num_samples: int = 10,
+    verbose: bool = False,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Evaluate exported model (ONNX or TensorRT) using info.pkl data.
+
+    Args:
+        model_path: Path to model file (.onnx or .engine)
+        model_type: Type of model ("onnx" or "tensorrt")
+        model_cfg: Model configuration
+        info_pkl_path: Path to info.pkl file
+        logger: Logger instance
+        device: Device for preprocessing
+        num_samples: Number of samples to evaluate
+        verbose: Enable verbose logging
+
+    Returns:
+        Tuple of (predictions, ground_truth, probabilities, latencies)
+    """
+
+    # Load info.pkl data
+    try:
+        with open(info_pkl_path, "rb") as f:
+            info_data = pickle.load(f)
+
+        if "data_list" not in info_data:
+            raise ValueError("Expected 'data_list' key in info.pkl")
+
+        samples_list = info_data["data_list"]
+        logger.info(f"Loaded {len(samples_list)} samples from info.pkl")
+
+        # Limit number of samples for evaluation
+        num_samples = min(num_samples, len(samples_list))
+        logger.info(f"Evaluating {num_samples} samples")
+
+    except Exception as e:
+        logger.error(f"Failed to load info.pkl: {e}")
+        raise
+
+    # Initialize transform once and reuse it for all samples
+    data_root = model_cfg.get("data_root", None)
+    if data_root is None:
+        raise ValueError("data_root not found in model configuration")
+
+    transform_config = model_cfg.get("transform_config", None)
+    if transform_config is None:
+        raise ValueError("transform_config not found in model configuration")
+
+    transform = CalibrationClassificationTransform(
+        transform_config=transform_config,
+        mode="test",
+        max_depth=model_cfg.get("max_depth", 128.0),
+        dilation_size=model_cfg.get("dilation_size", 1),
+        undistort=True,
+        miscalibration_probability=0.0,  # Will be updated per sample
+        enable_augmentation=False,
+        data_root=data_root,
+        projection_vis_dir=None,
+        results_vis_dir=None,
+        binary_save_dir=None,
+    )
+
+    # Initialize inference engine
+    if model_type == "onnx":
+        logger.info(f"Using ONNX model: {model_path}")
+        logger.info("Creating ONNX InferenceSession...")
+
+        # Configure execution providers
+        if device.startswith("cuda"):
+            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+            logger.info("Attempting to use CUDA acceleration for ONNX inference...")
+            try:
+                available_providers = ort.get_available_providers()
+                if "CUDAExecutionProvider" not in available_providers:
+                    logger.warning(
+                        f"CUDAExecutionProvider not available. Available: {available_providers}. "
+                        "Install onnxruntime-gpu for CUDA acceleration"
+                    )
+                    providers = ["CPUExecutionProvider"]
+                else:
+                    logger.info("CUDAExecutionProvider is available")
+            except Exception as e:
+                logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU")
+                providers = ["CPUExecutionProvider"]
+        else:
+            providers = ["CPUExecutionProvider"]
+            logger.info("Using CPU for ONNX inference")
+
+        # Create session once and reuse it
+        ort_session = ort.InferenceSession(model_path, providers=providers)
+        logger.info(f"ONNX session using providers: {ort_session.get_providers()}")
+
+        # Define inference function
+        def inference_func(input_tensor):
+            input_name = ort_session.get_inputs()[0].name
+            onnx_input = {input_name: input_tensor.cpu().numpy()}
+            start_time = time.perf_counter()
+            onnx_output = ort_session.run(None, onnx_input)[0]
+            latency = (time.perf_counter() - start_time) * 1000
+            return onnx_output, latency
+
+    elif model_type == "tensorrt":
+        logger.info(f"Using TensorRT model: {model_path}")
+
+        # Load TensorRT engine
+        trt_logger = trt.Logger(trt.Logger.WARNING)
+        trt.init_libnvinfer_plugins(trt_logger, "")
+        runtime = trt.Runtime(trt_logger)
+
+        with open(model_path, "rb") as f:
+            engine = runtime.deserialize_cuda_engine(f.read())
+
+        if engine is None:
+            raise RuntimeError("Failed to deserialize TensorRT engine")
+
+        # Define inference function
+        def inference_func(input_tensor):
+            return _run_tensorrt_inference(engine, input_tensor.cpu(), logger)
+
+    else:
+        raise ValueError(f"Unsupported model type: {model_type}")
+
+    # Lists to store results
+    all_predictions = []
+    all_ground_truth = []
+    all_probabilities = []
+    all_latencies = []
+
+    # Evaluate samples
+    for sample_idx in range(num_samples):
+        if sample_idx % 5 == 0:
+            logger.info(f"Processing sample {sample_idx + 1}/{num_samples}")
+
+        try:
+            # Test both calibrated and miscalibrated versions
+            for miscalibration_prob, expected_label in [(0.0, 1), (1.0, 0)]:
+                # Update transform's miscalibration probability
+                transform.miscalibration_probability = miscalibration_prob
+
+                # Load and preprocess sample
+                sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
+                sample_data["sample_idx"] = sample_idx
+
+                results = transform.transform(sample_data)
+                input_data_processed = results["fused_img"]
+                gt_label = results["gt_label"]
+
+                # Convert to tensor
+                input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float()
+                input_tensor = input_tensor.unsqueeze(0).to(device)
+
+                # Run inference
+                output_np, latency = inference_func(input_tensor)
+
+                if output_np is None:
+                    logger.error(f"Failed to get output for sample {sample_idx}")
+                    continue
+
+                # Convert logits to probabilities
+                logits = torch.from_numpy(output_np)
+                probabilities = F.softmax(logits, dim=-1)
+                predicted_class = torch.argmax(probabilities, dim=-1).item()
+                confidence = probabilities.max().item()
+
+                # Store results
+                all_predictions.append(predicted_class)
+                all_ground_truth.append(gt_label)
+                all_probabilities.append(probabilities.cpu().numpy())
+                all_latencies.append(latency)
+
+                # Print sample results only in verbose mode
+                if verbose:
+                    logger.info(
+                        f"Sample {sample_idx + 1} (GT={gt_label}): "
+                        f"Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms"
+                    )
+
+            # Clear GPU memory periodically
+            if model_type == "tensorrt" and sample_idx % 10 == 0:
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                gc.collect()
+
+        except Exception as e:
+            logger.error(f"Error processing sample {sample_idx}: {e}")
+            continue
+
+    return (
+        np.array(all_predictions),
+        np.array(all_ground_truth),
+        np.array(all_probabilities),
+        np.array(all_latencies),
+    )
+
+
+def print_evaluation_results(
+    all_predictions: np.ndarray,
+    all_ground_truth: np.ndarray,
+    all_probabilities: np.ndarray,
+    all_latencies: np.ndarray,
+    model_type: str,
+    logger: logging.Logger,
+) -> None:
+    """Print comprehensive evaluation results with metrics and statistics."""
+
+    if len(all_predictions) == 0:
+        logger.error("No samples were processed successfully.")
+        return
+
+    correct_predictions = (all_predictions == all_ground_truth).sum()
+    total_samples = len(all_predictions)
+    accuracy = correct_predictions / total_samples
+    avg_latency = np.mean(all_latencies)
+
+    # Print header
+    logger.info(f"\n{'='*60}")
+    logger.info(f"{model_type.upper()} Model Evaluation Results")
+    logger.info(f"{'='*60}")
+    logger.info(f"Total samples: {total_samples}")
+    logger.info(f"Correct predictions: {correct_predictions}")
+    logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
+    logger.info(f"Average latency: {avg_latency:.2f} ms")
+
+    # Calculate per-class accuracy
+    unique_classes = np.unique(all_ground_truth)
+    logger.info(f"\nPer-class accuracy:")
+    for cls in unique_classes:
+        cls_mask = all_ground_truth == cls
+        cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum()
+        cls_total = cls_mask.sum()
+        cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0
+        logger.info(
+            f"  Class {cls} ({LABELS[str(cls)]}): "
+            f"{cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)"
+        )
+
+    # Calculate average confidence
+    avg_confidence = np.mean([prob.max() for prob in all_probabilities])
+    logger.info(f"\nAverage confidence: {avg_confidence:.4f}")
+
+    # Calculate latency statistics
+    min_latency = np.min(all_latencies)
+    max_latency = np.max(all_latencies)
+    std_latency = np.std(all_latencies)
+    p50_latency = np.percentile(all_latencies, 50)
+    p95_latency = np.percentile(all_latencies, 95)
+    p99_latency = np.percentile(all_latencies, 99)
+
+    logger.info(f"\nLatency Statistics:")
+    logger.info(f"  Average: {avg_latency:.2f} ms")
+    logger.info(f"  Median (P50): {p50_latency:.2f} ms")
+    logger.info(f"  P95: {p95_latency:.2f} ms")
+    logger.info(f"  P99: {p99_latency:.2f} ms")
+    logger.info(f"  Min: {min_latency:.2f} ms")
+    logger.info(f"  Max: {max_latency:.2f} ms")
+    logger.info(f"  Std: {std_latency:.2f} ms")
+
+    # Show confusion matrix
+    logger.info(f"\nConfusion Matrix:")
+    logger.info(f"{'':>10} Predicted")
+    logger.info(f"{'Actual':>10} {'0 (misc)':>10} {'1 (calib)':>10}")
+    logger.info(f"{'-'*32}")
+    for true_cls in unique_classes:
+        row_label = f"{true_cls} ({LABELS[str(true_cls)][:4]})"
+        row = [f"{row_label:>10}"]
+        for pred_cls in unique_classes:
+            count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum()
+            row.append(f"{count:>10}")
+        logger.info(" ".join(row))
+
+    logger.info(f"\n{'='*60}")
+    logger.info(f"Evaluation Summary:")
+    logger.info(f"  Model Type: {model_type.upper()}")
+    logger.info(f"  Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
+    logger.info(f"  Avg Latency: {avg_latency:.2f} ms")
+    logger.info(f"  Throughput: {1000/avg_latency:.2f} samples/sec")
+    logger.info(f"{'='*60}\n")
+
+
 def main():
     """Main deployment function."""
     args = parse_args()
@@ -751,6 +1054,93 @@ def main():
 
         run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger)
 
+    # Get evaluation settings from config and CLI
+    eval_cfg = config.evaluation_config
+    should_evaluate = args.evaluate or eval_cfg.get("enabled", False)
+    num_samples = args.num_samples if args.num_samples != 10 else eval_cfg.get("num_samples", 10)
+    verbose_mode = args.verbose or eval_cfg.get("verbose", False)
+
+    # Run full evaluation if requested
+    if should_evaluate:
+        logger.info(f"\n{'='*60}")
+        logger.info("Starting full model evaluation...")
+        logger.info(f"{'='*60}")
+
+        # Determine which models to evaluate
+        models_to_evaluate = []
+
+        # Check for ONNX model (config-specified or auto-detected)
+        eval_onnx_path = eval_cfg.get("onnx_model")
+        if eval_onnx_path:
+            # Use config-specified ONNX model
+            if osp.exists(eval_onnx_path):
+                models_to_evaluate.append(("onnx", eval_onnx_path))
+                logger.info(f"Using config-specified ONNX model: {eval_onnx_path}")
+            else:
+                logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}")
+        elif config.should_export_onnx or existing_onnx:
+            # Auto-detect exported ONNX model
+            if onnx_path and osp.exists(onnx_path):
+                models_to_evaluate.append(("onnx", onnx_path))
+            else:
+                logger.warning(f"ONNX model not found at {onnx_path}, skipping ONNX evaluation")
+
+        # Check for TensorRT model (config-specified or auto-detected)
+        eval_trt_path = eval_cfg.get("tensorrt_model")
+        if eval_trt_path:
+            # Use config-specified TensorRT model
+            if osp.exists(eval_trt_path):
+                models_to_evaluate.append(("tensorrt", eval_trt_path))
+                logger.info(f"Using config-specified TensorRT model: {eval_trt_path}")
+            else:
+                logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}")
+        elif config.should_export_tensorrt:
+            # Auto-detect exported TensorRT model
+            if trt_path and osp.exists(trt_path):
+                models_to_evaluate.append(("tensorrt", trt_path))
+            else:
+                logger.warning(f"TensorRT model not found at {trt_path}, skipping TensorRT evaluation")
+
+        if not models_to_evaluate:
+            logger.error(
+                "No models available for evaluation. Please export models first or specify model paths in config."
+            )
+        else:
+            # Evaluate each model
+            for model_type, model_path in models_to_evaluate:
+                logger.info(f"\nEvaluating {model_type.upper()} model...")
+                logger.info(f"Model path: {model_path}")
+                logger.info(f"Number of samples: {num_samples}")
+                logger.info(f"Verbose mode: {verbose_mode}")
+
+                try:
+                    # Run evaluation
+                    predictions, ground_truth, probabilities, latencies = evaluate_exported_model(
+                        model_path=model_path,
+                        model_type=model_type,
+                        model_cfg=model_cfg,
+                        info_pkl_path=info_pkl,
+                        logger=logger,
+                        device=device.type if isinstance(device, torch.device) else device,
+                        num_samples=num_samples,
+                        verbose=verbose_mode,
+                    )
+
+                    # Print results
+                    print_evaluation_results(predictions, ground_truth, probabilities, latencies, model_type, logger)
+
+                    # Cleanup
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+                    gc.collect()
+
+                except Exception as e:
+                    logger.error(f"Evaluation failed for {model_type.upper()} model: {e}")
+                    import traceback
+
+                    logger.error(traceback.format_exc())
+                    continue
+
     logger.info("Deployment completed successfully!")
 
     # Log what was exported
diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py
index b4dcf9f8..a32b6127 100644
--- a/tools/calibration_classification/evaluate_inference.py
+++ b/tools/calibration_classification/evaluate_inference.py
@@ -154,30 +154,17 @@ def run_onnx_inference(
     logger: logging.Logger,
 ) -> Tuple[np.ndarray, float]:
     """Run ONNX inference directly and return output and latency."""
-    # Convert input tensor to float32
-    input_tensor = input_tensor.float()
-
-    # Debug: Print input tensor info before preprocessing
-    logger.debug(
-        f"Input tensor before preprocessing - Shape: {input_tensor.shape}, Dtype: {input_tensor.dtype}, Min: {input_tensor.min():.4f}, Max: {input_tensor.max():.4f}"
-    )
-
-    # Add batch dimension if needed (ONNX expects 4D input: batch, channels, height, width)
-    if input_tensor.dim() == 3:
-        input_tensor = input_tensor.unsqueeze(0)  # Add batch dimension
-        logger.debug(f"Added batch dimension - Shape: {input_tensor.shape}")
-
     # Get input name from session
     input_name = ort_session.get_inputs()[0].name
-    onnx_input = {input_name: input_tensor.cpu().numpy().astype(np.float32)}
+
+    # Convert to numpy (input_tensor is already in correct shape from load_sample_data_from_info_pkl)
+    onnx_input = {input_name: input_tensor.cpu().numpy()}
 
     start_time = time.perf_counter()
     onnx_output = ort_session.run(None, onnx_input)[0]
     end_time = time.perf_counter()
     onnx_latency = (end_time - start_time) * 1000
 
-    logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms")
-
     # Ensure onnx_output is numpy array
     if not isinstance(onnx_output, np.ndarray):
         logger.error(f"Unexpected ONNX output type: {type(onnx_output)}")
@@ -217,11 +204,18 @@ def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.L
             raise RuntimeError("Could not find input/output tensor names")
 
         # Prepare arrays
-        # Convert torch tensor to numpy (handle both CPU and CUDA tensors)
-        if hasattr(input_tensor, "cpu"):
-            input_np = input_tensor.cpu().numpy().astype(np.float32)
-        else:
-            input_np = input_tensor.astype(np.float32)
+        # Convert torch tensor to numpy (input_tensor is guaranteed to be on CPU)
+        input_np = input_tensor.numpy().astype(np.float32)
+
+        # Validate input data quality
+        if np.isnan(input_np).any():
+            raise ValueError(f"Input contains NaN values")
+        if np.isinf(input_np).any():
+            raise ValueError(f"Input contains Inf values")
+
+        logger.debug(
+            f"Input data stats: min={input_np.min():.4f}, max={input_np.max():.4f}, mean={input_np.mean():.4f}"
+        )
 
         if not input_np.flags["C_CONTIGUOUS"]:
             input_np = np.ascontiguousarray(input_np)
@@ -429,12 +423,9 @@ def evaluate_model(
         inference_func = lambda input_tensor: run_onnx_inference(ort_session, input_tensor, logger)
     elif model_type == "tensorrt":
         logger.info(f"Using TensorRT model: {model_path}")
-        # TensorRT has its own CUDA memory management, force CPU device for data preparation
-        # if device != "cpu":
-        #     logger.warning(f"TensorRT inference requires CPU device for data preparation. Overriding device from '{device}' to 'cpu'")
-        #     device = "cpu"
         engine = load_tensorrt_engine(model_path, logger)
-        inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor, logger)
+        # TensorRT requires CPU tensors - it manages GPU memory internally
+        inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor.cpu(), logger)
     else:
         raise ValueError(f"Unsupported model type: {model_type}")
 
@@ -622,8 +613,8 @@ def main():
         "--device",
         type=str,
         default="cpu",
-        choices=["cpu", "cuda"],
-        help="Device to use for inference. For ONNX: enables CUDA acceleration (requires onnxruntime-gpu). For TensorRT: always uses CUDA internally regardless of this setting.",
+        choices=["cpu", "cuda:0"],
+        help="Device to use for data preprocessing. For ONNX: also enables CUDA acceleration if onnxruntime-gpu is installed. For TensorRT: data is always transferred to CPU before inference (TensorRT manages GPU memory internally).",
     )
     parser.add_argument(
         "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging level"

From befb3dffcfdaf3b4f76fab4f8ef2456d83a36805 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 1 Oct 2025 20:56:21 +0900
Subject: [PATCH 09/19] chore: add onnxruntime with cuda

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 projects/CalibrationStatusClassification/Dockerfile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/projects/CalibrationStatusClassification/Dockerfile b/projects/CalibrationStatusClassification/Dockerfile
index 77153382..e4c5578a 100644
--- a/projects/CalibrationStatusClassification/Dockerfile
+++ b/projects/CalibrationStatusClassification/Dockerfile
@@ -2,6 +2,13 @@ ARG AWML_BASE_IMAGE="autoware-ml:latest"
 FROM ${AWML_BASE_IMAGE}
 ARG TRT_VERSION=10.8.0.43
 
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+  libcudnn9-cuda-12 \
+  libcudnn9-dev-cuda-12 \
+  && apt-get clean \
+  && rm -rf /var/lib/apt/lists/*
+
 # Install pip dependencies
 RUN python3 -m pip --no-cache-dir install \
   onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ \

From 878bc8b2ffe8caf18933cea14bcdf8832de62087 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Fri, 3 Oct 2025 10:53:11 +0900
Subject: [PATCH 10/19] chore: fix procedure

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 README.md                                     |   1 +
 ...orial_calibration_status_classification.md |  77 ++++++
 docs/tutorial/tutorial_detection_3d.md        |   1 +
 .../CalibrationStatusClassification/README.md |  72 +-----
 .../configs/deploy/resnet18_5ch.py            |  20 +-
 .../deploy/main.py                            | 168 +++++++------
 tools/calibration_classification/README.md    | 233 ++++++++++++++----
 .../visualize_lidar_camera_projection.py      |  44 ++--
 8 files changed, 397 insertions(+), 219 deletions(-)
 create mode 100644 docs/tutorial/tutorial_calibration_status_classification.md

diff --git a/README.md b/README.md
index a4c280d9..9e720af4 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,7 @@ We hope that `AWML` promotes the community between Autoware and ML researchers a
 ## Get started
 
 - [Start training for 3D object detection](/docs/tutorial/tutorial_detection_3d.md)
+- [Start training for Classification Status Classifier](/docs/tutorial/tutorial_calibration_status_classification.md)
 
 ## Docs
 ### Design documents
diff --git a/docs/tutorial/tutorial_calibration_status_classification.md b/docs/tutorial/tutorial_calibration_status_classification.md
new file mode 100644
index 00000000..8aae7511
--- /dev/null
+++ b/docs/tutorial/tutorial_calibration_status_classification.md
@@ -0,0 +1,77 @@
+# Tutorial: Calibration Status Classification
+
+## 1. Setup environment
+
+See [tutorial_installation](/docs/tutorial/tutorial_installation.md) to set up the environment.
+
+### Build Docker for CalibrationStatusClassification
+
+```sh
+DOCKER_BUILDKIT=1 docker build -t autoware-ml-calib projects/CalibrationStatusClassification/
+```
+
+## 2. Train and evaluation
+
+In this tutorial, we use [CalibrationStatusClassification](/projects/CalibrationStatusClassification/).
+If you want to know the tools in detail, please see [calibration_classification](/tools/calibration_classification/) and [CalibrationStatusClassification](/projects/CalibrationStatusClassification/).
+
+### 2.1. Prepare T4dataset
+
+- Run docker
+
+```sh
+docker run -it --rm --gpus all --shm-size=64g --name awml-calib -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-calib
+```
+
+- Create info files for T4dataset
+
+```sh
+python tools/calibration_classification/create_data_t4dataset.py --config /workspace/autoware_ml/configs/calibration_classification/dataset/t4dataset/gen2_base.py --version gen2_base --root_path ./data/t4dataset -o ./data/t4dataset/calibration_info/
+```
+
+- (Optional) Process only specific cameras
+
+```sh
+python tools/calibration_classification/create_data_t4dataset.py --config /workspace/autoware_ml/configs/calibration_classification/dataset/t4dataset/gen2_base.py --version gen2_base --root_path ./data/t4dataset -o ./data/t4dataset/calibration_info/ --target_cameras CAM_FRONT CAM_LEFT CAM_RIGHT
+```
+
+### 2.2. Visualization (Optional)
+
+Visualize calibration data before training to verify sensor alignment:
+
+```sh
+python tools/calibration_classification/toolkit.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl --data_root data/t4dataset --output_dir ./work_dirs/calibration_visualization
+```
+
+### 2.3. Train
+
+- Single GPU:
+
+```sh
+python tools/calibration_classification/train.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py
+```
+
+- Multi GPU (example with 2 GPUs):
+
+```sh
+./tools/calibration_classification/dist_train.sh projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py 2
+```
+
+### 2.4. Evaluation
+
+```sh
+python tools/calibration_classification/test.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py epoch_25.pth --out results.pkl
+```
+
+### 2.5. Deploy the ONNX file
+
+Export model to ONNX and TensorRT:
+
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    checkpoint.pth
+```
+
+For INT8 quantization and advanced deployment options, see [Deployment guide](/tools/calibration_classification/README.md#6-deployment).
diff --git a/docs/tutorial/tutorial_detection_3d.md b/docs/tutorial/tutorial_detection_3d.md
index 4f2728e0..ef4cdfba 100644
--- a/docs/tutorial/tutorial_detection_3d.md
+++ b/docs/tutorial/tutorial_detection_3d.md
@@ -1,3 +1,4 @@
+# Tutorial: Detection 3D
 
 ## 1. Setup environment
 
diff --git a/projects/CalibrationStatusClassification/README.md b/projects/CalibrationStatusClassification/README.md
index 76ed49b1..0940e7ad 100644
--- a/projects/CalibrationStatusClassification/README.md
+++ b/projects/CalibrationStatusClassification/README.md
@@ -64,81 +64,27 @@ python tools/calibration_classification/train.py projects/CalibrationStatusClass
 ./tools/calibration_classification/dist_train.sh projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py 2
 ```
 
-### 5. Deploy
-
-The deployment script supports model export, verification, and evaluation.
-
-#### 5.1 Basic Export (ONNX and TensorRT)
-
-Export model to ONNX and TensorRT formats:
 
+### 5. Test
+Run testing
 ```sh
-python projects/CalibrationStatusClassification/deploy/main.py \
-  projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-  checkpoint.pth \
-  --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \
-  --device cuda:0 \
-  --work-dir ./work_dirs/
+python tools/calibration_classification/test.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py  epoch_25.pth --out {output_file}
 ```
 
-#### 5.2 Export with Verification
 
-Verify exported models against PyTorch reference on single sample:
-
-```sh
-python projects/CalibrationStatusClassification/deploy/main.py \
-  projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-  checkpoint.pth \
-  --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \
-  --sample-idx 0 \
-  --device cuda:0 \
-  --work-dir ./work_dirs/ \
-  --verify
-```
+### 6. Deploy
 
-#### 5.3 Full Model Evaluation
-
-Evaluate exported models on multiple samples with comprehensive metrics:
+The deployment script provides model export, verification, and evaluation support:
 
 ```sh
 python projects/CalibrationStatusClassification/deploy/main.py \
-  projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-  checkpoint.pth \
-  --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \
-  --device cuda:0 \
-  --work-dir ./work_dirs/ \
-  --evaluate \
-  --num-samples 50 \
-  --verbose
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    checkpoint.pth
 ```
 
-**Evaluation output includes:**
-- Overall accuracy and per-class accuracy
-- Confusion matrix
-- Latency statistics (mean, median, P95, P99, min, max, std)
-- Throughput (samples/sec)
-
-#### 5.4 Evaluate Existing Models (without re-export)
-
-To evaluate previously exported models without re-exporting:
-
-```sh
-# Evaluate ONNX model
-python projects/CalibrationStatusClassification/deploy/main.py \
-  projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-  checkpoint.pth \
-  --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl \
-  --device cuda:0 \
-  --work-dir ./work_dirs/ \
-  --evaluate \
-  --num-samples 100
-```
+For more details on configuration options such as INT8 quantization and custom settings, please refer to the [Deployment guide](../../tools/calibration_classification/README.md#6-deployment)
 
-**Note:** The script will automatically detect and evaluate existing ONNX/TensorRT models in the work directory.
 
 ## Troubleshooting
 
diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
index 02d66642..4888538e 100644
--- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
@@ -9,10 +9,12 @@
 # Export Configuration
 # ==============================================================================
 export = dict(
-    mode="both",  # Export mode: "onnx", "trt", or "both"
+    mode="none",  # Export mode: "onnx", "trt", "both", or "none"
     # - "onnx": Export to ONNX only
     # - "trt": Convert to TensorRT only (requires onnx_file in runtime_io)
     # - "both": Export to ONNX then convert to TensorRT
+    # - "none": Skip export, only run evaluation on existing models
+    #           (requires evaluation.onnx_model and/or evaluation.tensorrt_model)
     verify=True,  # Run verification comparing PyTorch/ONNX/TRT outputs
     device="cuda:0",  # Device for export (use "cuda:0" or "cpu")
     # Note: TensorRT always requires CUDA, will auto-switch if needed
@@ -36,7 +38,7 @@
 evaluation = dict(
     enabled=True,  # Enable full model evaluation (set to True to run evaluation)
     num_samples=100,  # Number of samples to evaluate from info.pkl
-    verbose=True,  # Enable verbose logging showing per-sample results
+    verbose=False,  # Enable verbose logging showing per-sample results
     # Optional: Specify models to evaluate (if None, uses exported models from work_dir)
     onnx_model="/workspace/work_dirs/end2end.onnx",  # Path to ONNX model file to evaluate (e.g., "/path/to/model.onnx")
     tensorrt_model="/workspace/work_dirs/end2end.engine",  # Path to TensorRT engine file to evaluate (e.g., "/path/to/model.engine")
@@ -121,11 +123,15 @@
 #   3) Optionally set evaluation.onnx_model and/or evaluation.tensorrt_model
 #   4) Run the command above
 #
-# Evaluate specific models:
-#   1) Set evaluation.enabled = True
-#   2) Set evaluation.onnx_model = "/path/to/model.onnx"
-#   3) Set evaluation.tensorrt_model = "/path/to/model.engine"
-#   4) Run the command above
+# Evaluate specific models (without export):
+#   1) Set export.mode = "none"
+#   2) Set evaluation.enabled = True
+#   3) Set evaluation.onnx_model = "/path/to/model.onnx"
+#   4) Set evaluation.tensorrt_model = "/path/to/model.engine"
+#   5) Run the command WITHOUT checkpoint:
+#      python projects/CalibrationStatusClassification/deploy/main.py \
+#          projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+#          projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py
 #
 # Override config settings via command line:
 #   python projects/CalibrationStatusClassification/deploy/main.py \
diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index 1700917d..3989b3b9 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -181,7 +181,7 @@ def _validate_config(self) -> None:
             raise ValueError("Missing 'runtime_io' section in deploy config.")
 
         # Validate export mode
-        valid_modes = ["onnx", "trt", "both"]
+        valid_modes = ["onnx", "trt", "both", "none"]
         mode = self.export_config.get("mode", "both")
         if mode not in valid_modes:
             raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}")
@@ -278,7 +278,9 @@ def parse_args() -> argparse.Namespace:
     )
     parser.add_argument("deploy_cfg", help="Deploy config path")
     parser.add_argument("model_cfg", help="Model config path")
-    parser.add_argument("checkpoint", help="Model checkpoint path")
+    parser.add_argument(
+        "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')"
+    )
 
     # Optional overrides
     parser.add_argument("--work-dir", help="Override output directory from config")
@@ -805,7 +807,7 @@ def inference_func(input_tensor):
 
     # Evaluate samples
     for sample_idx in range(num_samples):
-        if sample_idx % 5 == 0:
+        if sample_idx % 100 == 0:
             logger.info(f"Processing sample {sample_idx + 1}/{num_samples}")
 
         try:
@@ -983,76 +985,90 @@ def main():
     mmengine.mkdir_or_exist(osp.abspath(work_dir))
     logger.info(f"Working directory: {work_dir}")
     logger.info(f"Device: {device}")
-    logger.info(f"Export mode: {config.export_config.get('mode', 'both')}")
+    export_mode = config.export_config.get("mode", "both")
+    logger.info(f"Export mode: {export_mode}")
+
+    # Check if eval-only mode
+    is_eval_only = export_mode == "none"
+
+    # Validate checkpoint requirement
+    if not is_eval_only and not args.checkpoint:
+        logger.error("Checkpoint is required when export mode is not 'none'")
+        return
 
     # Determine export paths
     onnx_path = None
     trt_path = None
 
-    if config.should_export_onnx:
-        onnx_settings = config.onnx_settings
-        onnx_path = osp.join(work_dir, onnx_settings["save_file"])
-
-    if config.should_export_tensorrt:
-        # Use existing ONNX if provided, otherwise use the one we'll export
-        if existing_onnx and not config.should_export_onnx:
-            onnx_path = existing_onnx
-            if not osp.exists(onnx_path):
-                logger.error(f"Provided ONNX file does not exist: {onnx_path}")
+    if not is_eval_only:
+        if config.should_export_onnx:
+            onnx_settings = config.onnx_settings
+            onnx_path = osp.join(work_dir, onnx_settings["save_file"])
+
+        if config.should_export_tensorrt:
+            # Use existing ONNX if provided, otherwise use the one we'll export
+            if existing_onnx and not config.should_export_onnx:
+                onnx_path = existing_onnx
+                if not osp.exists(onnx_path):
+                    logger.error(f"Provided ONNX file does not exist: {onnx_path}")
+                    return
+                logger.info(f"Using existing ONNX file: {onnx_path}")
+            elif not onnx_path:
+                # Need ONNX for TensorRT but neither export nor existing file specified
+                logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.")
                 return
-            logger.info(f"Using existing ONNX file: {onnx_path}")
-        elif not onnx_path:
-            # Need ONNX for TensorRT but neither export nor existing file specified
-            logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.")
-            return
-
-        # Set TensorRT output path
-        onnx_settings = config.onnx_settings
-        trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
-        trt_path = osp.join(work_dir, trt_file)
-
-    # Load model
-    logger.info(f"Loading model from checkpoint: {args.checkpoint}")
-    device = torch.device(device)
-    model = get_model(model_cfg, args.checkpoint, device=device)
-
-    # Load sample data
-    logger.info(f"Loading sample data from info.pkl: {info_pkl}")
-    input_tensor_calibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 0.0, sample_idx, device=device)
-    input_tensor_miscalibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 1.0, sample_idx, device=device)
-
-    # Export models
-    if config.should_export_onnx:
-        export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger)
-
-    if config.should_export_tensorrt:
-        logger.info("Converting ONNX to TensorRT...")
-
-        # Ensure CUDA device for TensorRT
-        if device.type != "cuda":
-            logger.warning("TensorRT requires CUDA device, switching to cuda")
-            device = torch.device("cuda")
-            input_tensor_calibrated = input_tensor_calibrated.to(device)
-            input_tensor_miscalibrated = input_tensor_miscalibrated.to(device)
-
-        success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger)
-        if success:
-            logger.info(f"TensorRT conversion successful: {trt_path}")
-        else:
-            logger.error("TensorRT conversion failed")
 
-    # Run verification if requested
-    if config.should_verify:
-        logger.info(
-            "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..."
+            # Set TensorRT output path
+            onnx_settings = config.onnx_settings
+            trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
+            trt_path = osp.join(work_dir, trt_file)
+
+        # Load model
+        logger.info(f"Loading model from checkpoint: {args.checkpoint}")
+        device = torch.device(device)
+        model = get_model(model_cfg, args.checkpoint, device=device)
+
+        # Load sample data
+        logger.info(f"Loading sample data from info.pkl: {info_pkl}")
+        input_tensor_calibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 0.0, sample_idx, device=device)
+        input_tensor_miscalibrated = load_sample_data_from_info_pkl(
+            info_pkl, model_cfg, 1.0, sample_idx, device=device
         )
-        input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated}
 
-        # Only verify formats that were exported or provided
-        onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None
-        trt_path_for_verify = trt_path if config.should_export_tensorrt else None
+        # Export models
+        if config.should_export_onnx:
+            export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger)
+
+        if config.should_export_tensorrt:
+            logger.info("Converting ONNX to TensorRT...")
 
-        run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger)
+            # Ensure CUDA device for TensorRT
+            if device.type != "cuda":
+                logger.warning("TensorRT requires CUDA device, switching to cuda")
+                device = torch.device("cuda")
+                input_tensor_calibrated = input_tensor_calibrated.to(device)
+                input_tensor_miscalibrated = input_tensor_miscalibrated.to(device)
+
+            success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger)
+            if success:
+                logger.info(f"TensorRT conversion successful: {trt_path}")
+            else:
+                logger.error("TensorRT conversion failed")
+
+        # Run verification if requested
+        if config.should_verify:
+            logger.info(
+                "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..."
+            )
+            input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated}
+
+            # Only verify formats that were exported or provided
+            onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None
+            trt_path_for_verify = trt_path if config.should_export_tensorrt else None
+
+            run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger)
+    else:
+        logger.info("Evaluation-only mode: Skipping model loading and export")
 
     # Get evaluation settings from config and CLI
     eval_cfg = config.evaluation_config
@@ -1078,8 +1094,8 @@ def main():
                 logger.info(f"Using config-specified ONNX model: {eval_onnx_path}")
             else:
                 logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}")
-        elif config.should_export_onnx or existing_onnx:
-            # Auto-detect exported ONNX model
+        elif not is_eval_only and (config.should_export_onnx or existing_onnx):
+            # Auto-detect exported ONNX model (only if we just exported)
             if onnx_path and osp.exists(onnx_path):
                 models_to_evaluate.append(("onnx", onnx_path))
             else:
@@ -1094,8 +1110,8 @@ def main():
                 logger.info(f"Using config-specified TensorRT model: {eval_trt_path}")
             else:
                 logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}")
-        elif config.should_export_tensorrt:
-            # Auto-detect exported TensorRT model
+        elif not is_eval_only and config.should_export_tensorrt:
+            # Auto-detect exported TensorRT model (only if we just exported)
             if trt_path and osp.exists(trt_path):
                 models_to_evaluate.append(("tensorrt", trt_path))
             else:
@@ -1144,13 +1160,17 @@ def main():
     logger.info("Deployment completed successfully!")
 
     # Log what was exported
-    exported_formats = []
-    if config.should_export_onnx:
-        exported_formats.append("ONNX")
-    if config.should_export_tensorrt:
-        exported_formats.append("TensorRT")
-
-    logger.info(f"Exported formats: {', '.join(exported_formats)}")
+    if not is_eval_only:
+        exported_formats = []
+        if config.should_export_onnx:
+            exported_formats.append("ONNX")
+        if config.should_export_tensorrt:
+            exported_formats.append("TensorRT")
+
+        if exported_formats:
+            logger.info(f"Exported formats: {', '.join(exported_formats)}")
+    else:
+        logger.info("Evaluation-only mode: No models were exported")
 
 
 if __name__ == "__main__":
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index 9de32e1a..f7188dbf 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -1,6 +1,6 @@
 # tools/calibration_classification
 
-The pipeline to make the model.
+The pipeline to develop the calibration classification model.
 It contains training, evaluation, and visualization for Calibration classification.
 
 - [Support priority](https://github.com/tier4/AWML/blob/main/docs/design/autoware_ml_design.md#support-priority): Tier B
@@ -23,7 +23,7 @@ Prepare the dataset you use.
 - Run docker
 
 ```sh
-docker run -it --rm --gpus --shm-size=64g --name awml -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml
+docker run -it --rm --gpus all --shm-size=64g --name awml-calib -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-calib
 ```
 
 - Create info files for T4dataset X2 Gen2
@@ -159,17 +159,6 @@ Understanding visualization configuration is crucial for calibration classificat
 # In config file
 test_projection_vis_dir = "./test_projection_vis_t4dataset/"
 test_results_vis_dir = "./test_results_vis_t4dataset/"
-
-# In transform pipeline
-dict(
-    type="CalibrationClassificationTransform",
-    mode="test",
-    undistort=True,
-    enable_augmentation=False,
-    data_root=data_root,
-    projection_vis_dir=test_projection_vis_dir,  # LiDAR projection visualization
-    results_vis_dir=test_results_vis_dir,        # Model prediction visualization
-),
 ```
 
 ### 3.3. Usage Strategy
@@ -187,7 +176,7 @@ dict(
 - `projection_vis_dir/`: LiDAR points overlaid on camera images
 - `results_vis_dir/`: Classification results with predicted labels
 
-## 4. Train
+## 4. Training
 ### 4.1. Environment Setup
 Set `CUBLAS_WORKSPACE_CONFIG` for deterministic behavior, please check this [nvidia doc](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility) for more info
 
@@ -195,9 +184,9 @@ Set `CUBLAS_WORKSPACE_CONFIG` for deterministic behavior, please check this [nvi
 export CUBLAS_WORKSPACE_CONFIG=:4096:8
 ```
 
-### 4.2. Train
+### 4.2. Run Training
 
-- Train in general by below command.
+- Run training with the following command:
 
 ```sh
 python tools/calibration_classification/train.py {config_file}
@@ -208,7 +197,7 @@ python tools/calibration_classification/train.py {config_file}
 python tools/calibration_classification/train.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py
 ```
 
-- You can use docker command for training as below.
+- Alternatively, you can use Docker:
 
 ```sh
 docker run -it --rm --gpus --name autoware-ml --shm-size=64g -d -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml bash -c 'python tools/calibration_classification/train.py {config_file}'
@@ -243,9 +232,9 @@ The deployment system supports exporting models to ONNX and TensorRT formats wit
 
 ### 6.1. Deployment Configuration
 
-The deployment uses a **config-first approach** where all settings are defined in the deployment config file. The config has three main sections:
 
 #### Configuration Structure
+The config has four main sections:
 
 ```python
 # Export configuration - controls export behavior
@@ -272,6 +261,15 @@ backend_config = dict(
     ),
     model_inputs=[...],                 # Dynamic shape configuration
 )
+
+# Evaluation configuration - for model evaluation
+evaluation = dict(
+    enabled=False,                     # Enable evaluation by default
+    num_samples=100,                    # Number of samples to evaluate
+    verbose=False,                     # Enable detailed logging
+    onnx_path=None,                    # Optional: path to existing ONNX model
+    tensorrt_path=None,                # Optional: path to existing TensorRT engine
+)
 ```
 
 #### Precision Policies
@@ -328,57 +326,196 @@ In config file:
 **Note:** The checkpoint is used for verification. To skip verification, set `export.verify = False` in config.
 
 
+### 6.4. Evaluate ONNX and TensorRT Models
 
+#### Evaluate Exported Models
 
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    checkpoint.pth \
+    --evaluate \
+    --num-samples 100 \
+    --verbose
+```
 
+#### Command Line Arguments
 
-### 6.4. Evaluate ONNX and TensorRT Models
+- `--evaluate`: Enable full model evaluation on multiple samples
+- `--num-samples`: Number of samples to evaluate (default: 10)
+- `--verbose`: Enable verbose logging during evaluation
+
+## 7. INT8 Quantization Guide
+
+INT8 quantization reduces model size and improves inference speed by converting 32-bit floating-point weights to 8-bit integers. This guide covers Post-Training Quantization (PTQ) using NVIDIA's ModelOpt tool.
+
+### 7.1. Prerequisites
+
+Before starting quantization, ensure you have:
 
-#### ONNX Model Evaluation
+1. **Trained model checkpoint** (e.g., `epoch_25.pth`)
+2. **ONNX model** exported using Section 6
+3. **Dataset info files** (from Section 2.1)
 
-```bash
-python tools/calibration_classification/evaluate_inference.py \
-    --onnx work_dirs/end2end.onnx \
-    --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
+---
+
+### 7.2. Generate Calibration Data
+
+Calibration data is used to determine optimal quantization parameters. Use representative samples from your training dataset.
+
+#### 7.2.1. Basic Usage
+
+```sh
+python tools/calibration_classification/toolkit.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl \
+    --data_root data/t4dataset \
+    --npz_output_path calibration_file.npz \
+    --indices 200
 ```
 
-#### TensorRT Model Evaluation
+#### 7.2.2. Memory Requirements
+
+Calibration data is loaded entirely into GPU memory during quantization. Calculate required memory:
+For 32 GB memory, the maximum you can use is approximately: 32 GB / 1860 x 2880 x 5 x 4 Bytes, around 321 images.
+Use `--indices 200` to limit to first 200 samples.
+
+
+### 7.3. Build Optimization Docker Environment
+
+The quantization requires NVIDIA ModelOpt, which is provided in a separate Docker image.
 
-```bash
-python tools/calibration_classification/evaluate_inference.py \
-    --tensorrt work_dirs/end2end.engine \
-    --model-cfg projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    --info-pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl
+```sh
+DOCKER_BUILDKIT=1 docker build -t autoware-ml-calib-opt \
+    -f projects/CalibrationStatusClassification/DockerfileOpt .
 ```
 
-#### Command Line Arguments
+**What's included:**
+- CUDA 12.6 + cuDNN
+- ONNX 1.16.2 + ONNXRuntime 1.23.0
+- NVIDIA ModelOpt 0.33.1
+
+### 7.4. Launch Optimization Container
+
+```sh
+docker run -it --rm --gpus all --shm-size=32g \
+    --name awml-opt -p 6006:6006 \
+    -v $PWD:/workspace \
+    -v $PWD/data:/workspace/data \
+    autoware-ml-calib-opt
+```
+
+Remember to put your calibration.npz to the workspace/data
+
+### 7.5. Run INT8 Quantization
+
+Inside the Docker container, run the quantization:
+
+```sh
+python3 -m modelopt.onnx.quantization \
+    --onnx_path=work_dirs/end2end.onnx \
+    --quantize_mode=int8 \
+    --calibration_data_path=calibration_file.npz
+```
 
-- `--onnx`: Path to ONNX model file (mutually exclusive with `--tensorrt`)
-- `--tensorrt`: Path to TensorRT engine file (mutually exclusive with `--onnx`)
-- `--model-cfg`: Path to model config file
-- `--info-pkl`: Path to dataset info file
-- `--device`: Device to use for inference (`cpu` or `cuda`, default: `cpu`)
-- `--log-level`: Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, default: `INFO`)
+**Output**: `work_dirs/end2end.quant.onnx`
 
-### 6.5. INT8 Quantization
 
-Set the number of images you want for calibration. Note that you need to consider the size of your memory.
-For 32 GB memory, the maximum you can use is approximately: 1860 x 2880 x 5 x 4 Bytes / 32 GB
+### 7.6. Evaluate Quantized Model
 
-Therefore, please limit the calibration data using the indices parameter
+#### 7.6.1. Evaluate ONNX INT8 Model
 
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    epoch_25.pth \
+    --evaluate \
+    --num-samples 100
+```
+
+**Note**: Configure the deployment config to use the quantized ONNX model:
+```python
+runtime_io = dict(
+    onnx_file="work_dirs/end2end.quant.onnx",  # Point to quantized ONNX
+    # ...
+)
+```
+
+#### 7.6.2. Convert to TensorRT INT8 Engine
+
+For optimal performance, convert the quantized ONNX to TensorRT:
+
+**In deployment config** (`projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py`):
 ```python
-python tools/calibration_classification/toolkit.py  projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py --info_pkl data/t4dataset/calibration_info/t4dataset_gen2_base_infos_train.pkl --data_root data/t4dataset --output_dir /vis --npz_output_path calibration_file.npz --indices 200
+export = dict(
+    mode="trt",  # Convert ONNX to TensorRT
+    verify=True,
+    device="cuda:0",
+    work_dir="/workspace/work_dirs",
+)
+
+runtime_io = dict(
+    onnx_file="work_dirs/end2end.quant.onnx",  # Use quantized ONNX
+    info_pkl="data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl",
+    sample_idx=0,
+)
+
+backend_config = dict(
+    type="tensorrt",
+    common_config=dict(
+        max_workspace_size=1 << 30,
+        precision_policy="strongly_typed",  # Preserve INT8 quantization
+    ),
+    # ... model_inputs configuration
+)
+```
+
+**Run conversion:**
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    epoch_25.pth  # For verification only
 ```
 
+**Output**: `work_dirs/end2end.engine` (INT8 TensorRT engine)
 
+#### 7.6.3. Evaluate TensorRT INT8 Engine
+
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    epoch_25.pth \
+    --evaluate \
+    --num-samples 100
+```
 
+**Note**: The deployment config should point to the TensorRT engine:
 ```python
-DOCKER_BUILDKIT=1 docker build -t autoware-ml-calib-opt -f projects/CalibrationStatusClassification/DockerfileOpt .
-docker run -it --rm --gpus all --shm-size=32g --name awml-opt -p 6006:6006 -v $PWD/:/workspace -v $PWD/data:/workspace/data autoware-ml-calib-opt
-# Access the optimization docker
-python3 -m modelopt.onnx.quantization --onnx_path=end2end.onnx --quantize_mode=int8 --calibration_data_path=calibration_file.npz
+runtime_io = dict(
+    onnx_file="work_dirs/end2end.quant.onnx",
+    # ...
+)
 
-# After getting the end2end.quant.onnx, you can evaluate with quant onnx or conver to int8 engine by following section 6.3
+# TensorRT engine will be automatically loaded from work_dirs/end2end.engine
 ```
+
+### 7.7. Expected Results
+
+**Typical performance improvements:**
+
+| Metric          | FP32 ONNX | INT8 ONNX | INT8 TensorRT |
+|-----------------|-----------|-----------|---------------|
+| Model Size      | 100%      | ~25%      | ~25%          |
+| Inference Speed | 1.0×      | 1.5-2.0×  | 2.5-4.0×      |
+| Accuracy Loss   | baseline  | <1%       | <1%           |
+| Memory Usage    | 100%      | ~40%      | ~30%          |
+
+**Note**: Actual results depend on:
+- Model architecture (ResNet18, etc.)
+- Input resolution
+- Hardware (GPU generation)
+- Calibration data quality
diff --git a/tools/calibration_classification/visualize_lidar_camera_projection.py b/tools/calibration_classification/visualize_lidar_camera_projection.py
index 78e692df..2a2bf85d 100644
--- a/tools/calibration_classification/visualize_lidar_camera_projection.py
+++ b/tools/calibration_classification/visualize_lidar_camera_projection.py
@@ -118,14 +118,14 @@ def _validate_sample_structure(self, sample: Dict[str, Any]) -> bool:
         return all(key in sample for key in required_keys)
 
     def process_single_sample(
-        self, sample: Dict[str, Any], sample_idx: int, save_npz: bool = False
+        self, sample: Dict[str, Any], sample_idx: int, npz_output_path: Optional[str] = None
     ) -> Optional[Dict[str, Any]]:
         """
         Process a single sample using the transform.
         Args:
             sample: Sample dictionary to process.
             sample_idx: Index of the sample for logging purposes.
-            save_npz: Whether to collect result for NPZ saving.
+            npz_output_path: Path for NPZ output. If provided, result will be collected for later saving.
         Returns:
             Transformed sample data if successful, None otherwise.
         """
@@ -136,8 +136,8 @@ def process_single_sample(
 
             result = self.transform(sample)
 
-            # Store the result for NPZ saving only if requested
-            if save_npz:
+            # Store the result for NPZ saving if output path is provided
+            if npz_output_path:
                 self.collected_results.append(result["fused_img"])
 
             self.logger.info(f"Successfully processed sample {sample_idx}")
@@ -177,7 +177,6 @@ def process_samples(
         info_pkl_path: str,
         indices: Optional[List[int]] = None,
         visualize: bool = False,
-        save_npz: bool = False,
         npz_output_path: Optional[str] = None,
     ) -> None:
         """
@@ -186,8 +185,7 @@ def process_samples(
             info_pkl_path: Path to the info.pkl file.
             indices: Optional list of sample indices to process. If None, all samples are processed.
             visualize: Whether to generate visualizations (requires output_dir to be set).
-            save_npz: Whether to collect results for NPZ saving.
-            npz_output_path: Path for saving NPZ file (only used if save_npz=True).
+            npz_output_path: Path for saving NPZ file. If provided, NPZ saving is automatically enabled.
         """
         try:
             samples_list = self.load_info_pkl(info_pkl_path)
@@ -208,15 +206,15 @@ def process_samples(
             self.logger.info(f"Processing {len(samples_to_process)} samples")
             if visualize:
                 self.logger.info("Visualization enabled")
-            if save_npz:
+            if npz_output_path:
                 self.logger.info("NPZ saving enabled")
 
             # Process each sample
             for i, sample in enumerate(samples_to_process):
-                self.process_single_sample(sample, i + 1, save_npz=save_npz)
+                self.process_single_sample(sample, i + 1, npz_output_path=npz_output_path)
 
             # Save NPZ file if requested
-            if save_npz and npz_output_path:
+            if npz_output_path:
                 self.save_npz_file(npz_output_path)
 
             self.logger.info("Finished processing all samples")
@@ -230,7 +228,6 @@ def process_single_sample_from_file(
         info_pkl_path: str,
         sample_idx: int,
         visualize: bool = False,
-        save_npz: bool = False,
         npz_output_path: Optional[str] = None,
     ) -> Optional[Dict[str, Any]]:
         """
@@ -239,8 +236,7 @@ def process_single_sample_from_file(
             info_pkl_path: Path to the info.pkl file.
             sample_idx: Index of the sample to process (0-based).
             visualize: Whether to generate visualizations (requires output_dir to be set).
-            save_npz: Whether to collect results for NPZ saving.
-            npz_output_path: Path for saving NPZ file (only used if save_npz=True).
+            npz_output_path: Path for saving NPZ file. If provided, NPZ saving is automatically enabled.
         Returns:
             Transformed sample data if successful, None otherwise.
         """
@@ -254,20 +250,20 @@ def process_single_sample_from_file(
             self.logger.info(f"Processing sample {sample_idx} from {len(samples_list)} total samples")
             if visualize:
                 self.logger.info("Visualization enabled")
-            if save_npz:
+            if npz_output_path:
                 self.logger.info("NPZ saving enabled")
 
             # Clear previous results for single sample processing
             self.collected_results = []
 
             sample = samples_list[sample_idx]
-            result = self.process_single_sample(sample, sample_idx, save_npz=save_npz)
+            result = self.process_single_sample(sample, sample_idx, npz_output_path=npz_output_path)
 
             if result and visualize and self.output_dir:
                 self.logger.info(f"Visualizations saved to directory: {self.output_dir}")
 
             # Save NPZ file if requested
-            if save_npz and npz_output_path and result:
+            if npz_output_path and result:
                 self.save_npz_file(npz_output_path)
             return result
 
@@ -289,15 +285,15 @@ def create_argument_parser() -> argparse.ArgumentParser:
   # Process all samples with visualization
   python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize
   # Process all samples and save as NPZ
-  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --npz_output_path results.npz --save_npz
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --npz_output_path results.npz
   # Process all samples with both visualization and NPZ saving
-  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --npz_output_path results.npz --save_npz
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --npz_output_path results.npz
   # Process specific sample with visualization
   python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --sample_idx 0
   # Process specific indices with NPZ saving
-  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --save_npz --npz_output_path results.npz --indices 0 1 2
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --npz_output_path results.npz --indices 0 1 2
   # Process first 5 samples (indices 0, 1, 2, 3, 4) with both features
-  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --save_npz --npz_output_path results.npz --indices 5
+  python toolkit.py model_config.py --info_pkl data/info.pkl --data_root data/ --output_dir /vis --visualize --npz_output_path results.npz --indices 5
 """
 
     parser = argparse.ArgumentParser(
@@ -318,8 +314,7 @@ def create_argument_parser() -> argparse.ArgumentParser:
         help="Specific sample indices to process (0-based), or a single number N to process indices 0 to N-1",
     )
     parser.add_argument("--visualize", action="store_true", help="Enable visualization (requires --output_dir)")
-    parser.add_argument("--save_npz", action="store_true", help="Enable NPZ saving (requires --npz_output_path)")
-    parser.add_argument("--npz_output_path", help="Path for saving NPZ file (only used with --save_npz)")
+    parser.add_argument("--npz_output_path", help="Path for saving NPZ file (automatically enables NPZ saving)")
     parser.add_argument(
         "--show_point_details", action="store_true", help="Show detailed point cloud field information"
     )
@@ -340,9 +335,6 @@ def main() -> None:
     if args.visualize and not args.output_dir:
         parser.error("--visualize requires --output_dir to be specified")
 
-    if args.save_npz and not args.npz_output_path:
-        parser.error("--save_npz requires --npz_output_path to be specified")
-
     # Load model configuration
     model_cfg = Config.fromfile(args.model_cfg)
 
@@ -369,7 +361,6 @@ def main() -> None:
             args.info_pkl,
             args.sample_idx,
             visualize=args.visualize,
-            save_npz=args.save_npz,
             npz_output_path=args.npz_output_path,
         )
     else:
@@ -378,7 +369,6 @@ def main() -> None:
             args.info_pkl,
             processed_indices,
             visualize=args.visualize,
-            save_npz=args.save_npz,
             npz_output_path=args.npz_output_path,
         )
 

From 76df38473d57708e839526227d406e6d4616e7b3 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Fri, 3 Oct 2025 11:04:17 +0900
Subject: [PATCH 11/19] chore: remove file

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../evaluate_inference.py                     | 691 ------------------
 1 file changed, 691 deletions(-)
 delete mode 100644 tools/calibration_classification/evaluate_inference.py

diff --git a/tools/calibration_classification/evaluate_inference.py b/tools/calibration_classification/evaluate_inference.py
deleted file mode 100644
index a32b6127..00000000
--- a/tools/calibration_classification/evaluate_inference.py
+++ /dev/null
@@ -1,691 +0,0 @@
-#!/usr/bin/env python3
-"""
-Integrated model evaluation script for calibration classification.
-Supports both ONNX and TensorRT inference modes.
-Simplified version that directly uses info.pkl files instead of dataset.
-"""
-
-import argparse
-import gc
-import logging
-import os
-import pickle
-import signal
-import sys
-import time
-from typing import Any, Dict, Optional, Tuple
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-from mmengine.config import Config
-
-# Now we can import the class directly
-from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import (
-    CalibrationClassificationTransform,
-)
-
-# TensorRT imports (only if needed) - match original test_tensorrt.py
-try:
-    import pycuda.autoinit
-    import pycuda.driver as cuda
-    import tensorrt as trt
-
-    TENSORRT_AVAILABLE = True
-except ImportError:
-    TENSORRT_AVAILABLE = False
-
-# Constants
-LABELS = {"0": "miscalibrated", "1": "calibrated"}
-
-
-def signal_handler(signum, frame):
-    """Handle segmentation faults and other signals gracefully."""
-    print(f"\nReceived signal {signum}. Cleaning up...")
-    try:
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        gc.collect()
-        print("Cleanup completed.")
-    except:
-        pass
-    sys.exit(1)
-
-
-# Register signal handlers
-signal.signal(signal.SIGSEGV, signal_handler)
-signal.signal(signal.SIGINT, signal_handler)
-
-
-def setup_logging(level: str = "INFO") -> logging.Logger:
-    """Setup logging configuration."""
-    logging.basicConfig(level=getattr(logging, level.upper()), format="%(asctime)s - %(levelname)s - %(message)s")
-    return logging.getLogger(__name__)
-
-
-def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]:
-    """
-    Load a single sample from info.pkl file.
-    Args:
-        info_pkl_path: Path to the info.pkl file
-        sample_idx: Index of the sample to load (default: 0)
-    Returns:
-        Sample dictionary with the required structure for CalibrationClassificationTransform
-    Raises:
-        FileNotFoundError: If info.pkl file doesn't exist
-        ValueError: If data format is unexpected or sample index is invalid
-    """
-    if not os.path.exists(info_pkl_path):
-        raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}")
-
-    try:
-        with open(info_pkl_path, "rb") as f:
-            info_data = pickle.load(f)
-    except Exception as e:
-        raise ValueError(f"Failed to load info.pkl file: {e}")
-
-    # Extract samples from info.pkl
-    if isinstance(info_data, dict):
-        if "data_list" in info_data:
-            samples_list = info_data["data_list"]
-        else:
-            raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}")
-    else:
-        raise ValueError(f"Expected dict format, got {type(info_data)}")
-
-    if not samples_list:
-        raise ValueError("No samples found in info.pkl")
-
-    if sample_idx >= len(samples_list):
-        raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})")
-
-    sample = samples_list[sample_idx]
-
-    # Validate sample structure
-    required_keys = ["image", "lidar_points"]
-    if not all(key in sample for key in required_keys):
-        raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}")
-
-    return sample
-
-
-def load_sample_data_from_info_pkl(
-    info_pkl_path: str,
-    transform: CalibrationClassificationTransform,
-    sample_idx: int = 0,
-    device: str = "cpu",
-    miscalibration_probability: float = 0.0,
-) -> Tuple[torch.Tensor, int]:
-    """
-    Load and preprocess sample data from info.pkl using CalibrationClassificationTransform.
-    Args:
-        info_pkl_path: Path to the info.pkl file
-        transform: Reusable CalibrationClassificationTransform instance
-        sample_idx: Index of the sample to load (default: 0)
-        device: Device to load tensor on
-        miscalibration_probability: Probability to generate miscalibrated samples (0.0=calibrated, 1.0=miscalibrated)
-    Returns:
-        Tuple of (preprocessed tensor ready for model inference, ground truth label)
-    """
-    # Load sample data from info.pkl
-    sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
-
-    # Ensure sample_idx is in the data
-    sample_data["sample_idx"] = sample_idx
-
-    # Update transform's miscalibration probability (reuse existing transform instance)
-    transform.miscalibration_probability = miscalibration_probability
-
-    # Apply transform with miscalibration control
-    results = transform.transform(sample_data)
-    input_data_processed = results["fused_img"]  # (H, W, 5)
-    gt_label = results["gt_label"]  # 0=miscalibrated, 1=calibrated
-
-    # Convert to tensor
-    input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float()  # (5, H, W)
-    input_tensor = input_tensor.unsqueeze(0).to(device)  # (1, 5, H, W)
-
-    return input_tensor, gt_label
-
-
-def run_onnx_inference(
-    ort_session,
-    input_tensor: torch.Tensor,
-    logger: logging.Logger,
-) -> Tuple[np.ndarray, float]:
-    """Run ONNX inference directly and return output and latency."""
-    # Get input name from session
-    input_name = ort_session.get_inputs()[0].name
-
-    # Convert to numpy (input_tensor is already in correct shape from load_sample_data_from_info_pkl)
-    onnx_input = {input_name: input_tensor.cpu().numpy()}
-
-    start_time = time.perf_counter()
-    onnx_output = ort_session.run(None, onnx_input)[0]
-    end_time = time.perf_counter()
-    onnx_latency = (end_time - start_time) * 1000
-
-    # Ensure onnx_output is numpy array
-    if not isinstance(onnx_output, np.ndarray):
-        logger.error(f"Unexpected ONNX output type: {type(onnx_output)}")
-        return None, 0.0
-
-    return onnx_output, onnx_latency
-
-
-def run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]:
-    """Run TensorRT inference and return output with timing."""
-    if not TENSORRT_AVAILABLE:
-        raise ImportError("TensorRT and PyCUDA are required for TensorRT inference. Please install them.")
-
-    context = None
-    stream = None
-    start = None
-    end = None
-    d_input = None
-    d_output = None
-
-    try:
-        context = engine.create_execution_context()
-        stream = cuda.Stream()
-        start = cuda.Event()
-        end = cuda.Event()
-
-        # Get tensor names and shapes
-        input_name, output_name = None, None
-        for i in range(engine.num_io_tensors):
-            tensor_name = engine.get_tensor_name(i)
-            if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
-                input_name = tensor_name
-            elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT:
-                output_name = tensor_name
-
-        if input_name is None or output_name is None:
-            raise RuntimeError("Could not find input/output tensor names")
-
-        # Prepare arrays
-        # Convert torch tensor to numpy (input_tensor is guaranteed to be on CPU)
-        input_np = input_tensor.numpy().astype(np.float32)
-
-        # Validate input data quality
-        if np.isnan(input_np).any():
-            raise ValueError(f"Input contains NaN values")
-        if np.isinf(input_np).any():
-            raise ValueError(f"Input contains Inf values")
-
-        logger.debug(
-            f"Input data stats: min={input_np.min():.4f}, max={input_np.max():.4f}, mean={input_np.mean():.4f}"
-        )
-
-        if not input_np.flags["C_CONTIGUOUS"]:
-            input_np = np.ascontiguousarray(input_np)
-
-        # Validate input shape
-        expected_shape = engine.get_tensor_shape(input_name)
-
-        # Check if shapes are compatible (considering -1 as dynamic dimension)
-        def shapes_compatible(actual_shape, expected_shape):
-            """Check if actual shape is compatible with expected shape (which may contain -1 for dynamic dims)."""
-            if len(actual_shape) != len(expected_shape):
-                return False
-            for actual_dim, expected_dim in zip(actual_shape, expected_shape):
-                if expected_dim != -1 and actual_dim != expected_dim:
-                    return False
-            return True
-
-        if not shapes_compatible(input_np.shape, expected_shape):
-            # Only warn/error if shapes are truly incompatible
-            if len(input_np.shape) == len(expected_shape) - 1 and expected_shape[0] == 1:
-                # Add batch dimension if missing
-                try:
-                    input_np = np.expand_dims(input_np, axis=0)
-                    logger.info(f"Added batch dimension to input: {input_np.shape}")
-                except Exception as e:
-                    raise RuntimeError(
-                        f"Cannot add batch dimension to input from {input_np.shape} to {expected_shape}: {e}"
-                    )
-            else:
-                raise RuntimeError(
-                    f"Input shape mismatch: expected {expected_shape}, got {input_np.shape}. Please ensure input has correct batch dimension."
-                )
-        else:
-            # Shapes are compatible (dynamic dimensions match), use actual shape
-            if tuple(expected_shape) != input_np.shape:
-                logger.debug(f"Using dynamic input shape {input_np.shape} (engine expects {expected_shape})")
-
-        context.set_input_shape(input_name, input_np.shape)
-        output_shape = context.get_tensor_shape(output_name)
-        output_np = np.empty(output_shape, dtype=np.float32)
-        if not output_np.flags["C_CONTIGUOUS"]:
-            output_np = np.ascontiguousarray(output_np)
-
-        # Allocate GPU memory
-        d_input = cuda.mem_alloc(input_np.nbytes)
-        d_output = cuda.mem_alloc(output_np.nbytes)
-
-        # Set tensor addresses
-        context.set_tensor_address(input_name, int(d_input))
-        context.set_tensor_address(output_name, int(d_output))
-
-        # Run inference with timing
-        cuda.memcpy_htod_async(d_input, input_np, stream)
-        start.record(stream)
-        context.execute_async_v3(stream_handle=stream.handle)
-        end.record(stream)
-        cuda.memcpy_dtoh_async(output_np, d_output, stream)
-        stream.synchronize()
-
-        latency = end.time_since(start)
-        return output_np, latency
-
-    except Exception as e:
-        logger.error(f"TensorRT inference failed: {e}")
-        raise
-    finally:
-        # Cleanup with better error handling
-        try:
-            if d_input is not None:
-                d_input.free()
-        except Exception as e:
-            logger.warning(f"Failed to free input memory: {e}")
-
-        try:
-            if d_output is not None:
-                d_output.free()
-        except Exception as e:
-            logger.warning(f"Failed to free output memory: {e}")
-
-        # Note: Don't try to free stream, start, end, or context as they are managed by TensorRT
-        # and may cause issues if freed manually
-
-
-def load_tensorrt_engine(engine_path: str, logger: logging.Logger):
-    """Load TensorRT engine from file."""
-    if not TENSORRT_AVAILABLE:
-        raise ImportError("TensorRT is required for TensorRT inference. Please install it.")
-
-    trt_logger = trt.Logger(trt.Logger.WARNING)
-
-    try:
-        with open(engine_path, "rb") as f:
-            engine_bytes = f.read()
-        engine = trt.Runtime(trt_logger).deserialize_cuda_engine(engine_bytes)
-        if engine is None:
-            raise RuntimeError("Failed to deserialize TensorRT engine")
-        return engine
-    except Exception as e:
-        logger.error(f"Error loading TensorRT engine: {e}")
-        logger.error("This might be due to TensorRT version incompatibility.")
-        logger.error("Please rebuild the TensorRT engine with the current TensorRT version.")
-        raise
-
-
-def evaluate_model(
-    model_path: str,
-    model_type: str,
-    model_cfg_path: str,
-    info_pkl_path: str,
-    logger: logging.Logger,
-    device: str = "cpu",
-    num_samples: int = 10,
-    verbose: bool = False,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    """Evaluate model using info.pkl data and return predictions, ground truth, probabilities, and latencies."""
-
-    # Load model config
-    model_cfg = Config.fromfile(model_cfg_path)
-
-    # Load info.pkl data
-    try:
-        with open(info_pkl_path, "rb") as f:
-            info_data = pickle.load(f)
-
-        if "data_list" not in info_data:
-            raise ValueError("Expected 'data_list' key in info.pkl")
-
-        samples_list = info_data["data_list"]
-        logger.info(f"Loaded {len(samples_list)} samples from info.pkl")
-
-        # Limit number of samples for evaluation
-        num_samples = min(num_samples, len(samples_list))
-        logger.info(f"Evaluating {num_samples} samples")
-
-    except Exception as e:
-        logger.error(f"Failed to load info.pkl: {e}")
-        raise
-
-    # Initialize transform once and reuse it for all samples
-    data_root = model_cfg.get("data_root", None)
-    if data_root is None:
-        raise ValueError("data_root not found in model configuration")
-
-    transform = CalibrationClassificationTransform(
-        transform_config=model_cfg.get("transform_config", None),
-        mode="test",
-        data_root=data_root,
-        projection_vis_dir=None,
-        results_vis_dir=None,
-        enable_augmentation=False,
-        miscalibration_probability=0.0,  # Will be updated per sample
-    )
-
-    # Initialize inference engine
-    if model_type == "onnx":
-        try:
-            import onnxruntime as ort
-        except ImportError:
-            raise ImportError(
-                "onnxruntime is required for ONNX inference. Please install it with: pip install onnxruntime"
-            )
-
-        logger.info(f"Using ONNX model: {model_path}")
-        logger.info("Creating ONNX InferenceSession (one-time setup)...")
-
-        # Configure execution providers based on device
-        if device == "cuda":
-            # Try to use CUDA provider, fall back to CPU if not available
-            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-            logger.info("Attempting to use CUDA acceleration for ONNX inference...")
-            try:
-                available_providers = ort.get_available_providers()
-                if "CUDAExecutionProvider" not in available_providers:
-                    logger.warning(
-                        "CUDAExecutionProvider not available. Available providers: {}. "
-                        "Install onnxruntime-gpu for CUDA acceleration: pip install onnxruntime-gpu".format(
-                            available_providers
-                        )
-                    )
-                    logger.warning("Falling back to CPU execution")
-                    providers = ["CPUExecutionProvider"]
-                else:
-                    logger.info("CUDAExecutionProvider is available")
-            except Exception as e:
-                logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU")
-                providers = ["CPUExecutionProvider"]
-        else:
-            providers = ["CPUExecutionProvider"]
-            logger.info("Using CPU for ONNX inference")
-
-        # Create session once and reuse it
-        ort_session = ort.InferenceSession(model_path, providers=providers)
-
-        # Log which provider is actually being used
-        logger.info(f"ONNX session using providers: {ort_session.get_providers()}")
-
-        # Debug: Print ONNX model input info
-        input_name = ort_session.get_inputs()[0].name
-        input_shape = ort_session.get_inputs()[0].shape
-        input_type = ort_session.get_inputs()[0].type
-        logger.info(f"ONNX model expects - Input name: {input_name}, Shape: {input_shape}, Type: {input_type}")
-        logger.info("ONNX InferenceSession created successfully.")
-
-        # Bind session to closure for reuse
-        inference_func = lambda input_tensor: run_onnx_inference(ort_session, input_tensor, logger)
-    elif model_type == "tensorrt":
-        logger.info(f"Using TensorRT model: {model_path}")
-        engine = load_tensorrt_engine(model_path, logger)
-        # TensorRT requires CPU tensors - it manages GPU memory internally
-        inference_func = lambda input_tensor: run_tensorrt_inference(engine, input_tensor.cpu(), logger)
-    else:
-        raise ValueError(f"Unsupported model type: {model_type}")
-
-    # Lists to store results
-    all_predictions = []
-    all_ground_truth = []
-    all_probabilities = []
-    all_latencies = []
-
-    # Evaluate samples
-    for sample_idx in range(num_samples):
-        if sample_idx % 5 == 0:
-            logger.info(f"Processing sample {sample_idx + 1}/{num_samples}")
-
-        try:
-            # Load sample data directly from info.pkl - generate calibrated sample
-            input_tensor_calibrated, gt_label_calibrated = load_sample_data_from_info_pkl(
-                info_pkl_path,
-                transform,
-                sample_idx,
-                device=device,
-                miscalibration_probability=0.0,  # Always calibrated
-            )
-
-            # Load sample data again - generate miscalibrated sample
-            input_tensor_miscalibrated, gt_label_miscalibrated = load_sample_data_from_info_pkl(
-                info_pkl_path,
-                transform,
-                sample_idx,
-                device=device,
-                miscalibration_probability=1.0,  # Always miscalibrated
-            )
-
-            # Test both calibrated and miscalibrated samples
-            test_samples = [
-                (input_tensor_calibrated, gt_label_calibrated),
-                (input_tensor_miscalibrated, gt_label_miscalibrated),
-            ]
-
-            for input_tensor, gt_label in test_samples:
-                # Debug: Print input tensor info only in verbose mode
-                if verbose:
-                    logger.info(f"Sample {sample_idx + 1} (GT={gt_label}) input tensor:")
-                    logger.info(f"  Dtype: {input_tensor.dtype}")
-
-                # Run inference
-                output_np, latency = inference_func(input_tensor)
-
-                if output_np is None:
-                    logger.error(f"Failed to get output for sample {sample_idx}")
-                    continue
-
-                # Convert logits to probabilities
-                logits = torch.from_numpy(output_np)
-                probabilities = F.softmax(logits, dim=-1)
-                predicted_class = torch.argmax(probabilities, dim=-1).item()
-                confidence = probabilities.max().item()
-
-                # Store results
-                all_predictions.append(predicted_class)
-                all_ground_truth.append(gt_label)
-                all_probabilities.append(probabilities.cpu().numpy())
-                all_latencies.append(latency)
-
-                # Print sample results only in verbose mode
-                if verbose:
-                    logger.info(
-                        f"Sample {sample_idx + 1} (GT={gt_label}): Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms"
-                    )
-                    logger.info(f"  Raw logits: {logits.squeeze()}")
-                    logger.info(f"  Probabilities: {probabilities.squeeze()}")
-
-            # Clear GPU memory periodically for TensorRT
-            if model_type == "tensorrt" and sample_idx % 10 == 0:
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-                gc.collect()
-
-        except Exception as e:
-            logger.error(f"Error processing sample {sample_idx}: {e}")
-            continue
-
-    return (
-        np.array(all_predictions),
-        np.array(all_ground_truth),
-        np.array(all_probabilities),
-        np.array(all_latencies),
-    )
-
-
-def print_results(
-    all_predictions: np.ndarray,
-    all_ground_truth: np.ndarray,
-    all_probabilities: np.ndarray,
-    all_latencies: np.ndarray,
-    model_type: str,
-    logger: logging.Logger,
-):
-    """Print evaluation results."""
-    if len(all_predictions) == 0:
-        logger.error("No samples were processed successfully.")
-        return
-
-    correct_predictions = (all_predictions == all_ground_truth).sum()
-    total_samples = len(all_predictions)
-    accuracy = correct_predictions / total_samples
-    avg_latency = np.mean(all_latencies)
-
-    # Print results
-    logger.info(f"\n{'='*50}")
-    logger.info(f"{model_type.upper()} Model Evaluation Results")
-    logger.info(f"{'='*50}")
-    logger.info(f"Total samples: {total_samples}")
-    logger.info(f"Correct predictions: {correct_predictions}")
-    logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
-    logger.info(f"Average latency: {avg_latency:.2f} ms")
-
-    # Calculate per-class accuracy
-    unique_classes = np.unique(all_ground_truth)
-    logger.info(f"\nPer-class accuracy:")
-    for cls in unique_classes:
-        cls_mask = all_ground_truth == cls
-        cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum()
-        cls_total = cls_mask.sum()
-        cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0
-        logger.info(
-            f"  Class {cls} ({LABELS[str(cls)]}): {cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)"
-        )
-
-    # Calculate average confidence
-    avg_confidence = np.mean([prob.max() for prob in all_probabilities])
-    logger.info(f"\nAverage confidence: {avg_confidence:.4f}")
-
-    # Calculate latency statistics
-    min_latency = np.min(all_latencies)
-    max_latency = np.max(all_latencies)
-    std_latency = np.std(all_latencies)
-
-    logger.info(f"\nLatency Statistics:")
-    logger.info(f"  Average latency: {avg_latency:.2f} ms")
-    logger.info(f"  Min latency: {min_latency:.2f} ms")
-    logger.info(f"  Max latency: {max_latency:.2f} ms")
-    logger.info(f"  Std latency: {std_latency:.2f} ms")
-
-    # Show confusion matrix
-    logger.info(f"\nConfusion Matrix:")
-    logger.info(f"Predicted ->")
-    logger.info(f"Actual    0    1")
-    for true_cls in unique_classes:
-        row = []
-        for pred_cls in unique_classes:
-            count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum()
-            row.append(f"{count:4d}")
-        logger.info(f"  {true_cls}    {' '.join(row)}")
-
-    logger.info(f"\n{model_type.upper()} model evaluation completed successfully!")
-    logger.info(f"Model accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
-    logger.info(f"Average latency: {avg_latency:.2f} ms")
-
-
-def main():
-    """Main function."""
-    parser = argparse.ArgumentParser(description="Evaluate calibration classification model using info.pkl")
-    parser.add_argument("--onnx", type=str, help="Path to ONNX model file")
-    parser.add_argument("--tensorrt", type=str, help="Path to TensorRT engine file")
-    parser.add_argument(
-        "--model-cfg",
-        type=str,
-        default="projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb8-25e_j6gen2.py",
-        help="Path to model config file",
-    )
-    parser.add_argument(
-        "--info-pkl",
-        type=str,
-        required=True,
-        help="Path to info.pkl file containing calibration data",
-    )
-    parser.add_argument(
-        "--num-samples",
-        type=int,
-        default=10,
-        help="Number of samples to evaluate (default: 10)",
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        choices=["cpu", "cuda:0"],
-        help="Device to use for data preprocessing. For ONNX: also enables CUDA acceleration if onnxruntime-gpu is installed. For TensorRT: data is always transferred to CPU before inference (TensorRT manages GPU memory internally).",
-    )
-    parser.add_argument(
-        "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging level"
-    )
-    parser.add_argument(
-        "--verbose", action="store_true", help="Enable verbose logging for detailed sample information"
-    )
-
-    args = parser.parse_args()
-
-    # Validate arguments
-    if not args.onnx and not args.tensorrt:
-        parser.error("Either --onnx or --tensorrt must be specified")
-
-    if args.onnx and args.tensorrt:
-        parser.error("Only one of --onnx or --tensorrt can be specified")
-
-    # Setup logging
-    logger = setup_logging(args.log_level)
-
-    try:
-        # Determine model type and path
-        if args.onnx:
-            model_type = "onnx"
-            model_path = args.onnx
-        else:
-            model_type = "tensorrt"
-            model_path = args.tensorrt
-
-        # Check if files exist
-        if not os.path.exists(model_path):
-            raise FileNotFoundError(f"Model file not found: {model_path}")
-        if not os.path.exists(args.model_cfg):
-            raise FileNotFoundError(f"Model config file not found: {args.model_cfg}")
-        if not os.path.exists(args.info_pkl):
-            raise FileNotFoundError(f"Info.pkl file not found: {args.info_pkl}")
-
-        logger.info(f"Starting {model_type.upper()} model evaluation...")
-        logger.info(f"Model path: {model_path}")
-        logger.info(f"Model config: {args.model_cfg}")
-        logger.info(f"Info.pkl: {args.info_pkl}")
-        logger.info(f"Device: {args.device}")
-        logger.info(f"Number of samples: {args.num_samples}")
-        logger.info(f"Verbose logging: {args.verbose}")
-
-        # Evaluate model
-        all_predictions, all_ground_truth, all_probabilities, all_latencies = evaluate_model(
-            model_path, model_type, args.model_cfg, args.info_pkl, logger, args.device, args.num_samples, args.verbose
-        )
-
-        # Print results
-        print_results(all_predictions, all_ground_truth, all_probabilities, all_latencies, model_type, logger)
-
-        # Cleanup
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        gc.collect()
-
-        logger.info("Script completed successfully!")
-
-    except Exception as e:
-        logger.error(f"Error in main execution: {e}")
-        # Cleanup on error
-        try:
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            gc.collect()
-        except:
-            pass
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()

From 524ada7cf3c9dbc51c9b2d627dfa10933e222b90 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Mon, 6 Oct 2025 15:48:38 +0900
Subject: [PATCH 12/19] chore: update evaluation

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../deploy/main.py                            | 52 ++++++++-----------
 tools/calibration_classification/README.md    |  5 --
 2 files changed, 23 insertions(+), 34 deletions(-)

diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index 3989b3b9..b030d0b2 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -1085,37 +1085,31 @@ def main():
         # Determine which models to evaluate
         models_to_evaluate = []
 
-        # Check for ONNX model (config-specified or auto-detected)
+        # Get model paths from config
         eval_onnx_path = eval_cfg.get("onnx_model")
-        if eval_onnx_path:
-            # Use config-specified ONNX model
-            if osp.exists(eval_onnx_path):
-                models_to_evaluate.append(("onnx", eval_onnx_path))
-                logger.info(f"Using config-specified ONNX model: {eval_onnx_path}")
-            else:
-                logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}")
-        elif not is_eval_only and (config.should_export_onnx or existing_onnx):
-            # Auto-detect exported ONNX model (only if we just exported)
-            if onnx_path and osp.exists(onnx_path):
-                models_to_evaluate.append(("onnx", onnx_path))
-            else:
-                logger.warning(f"ONNX model not found at {onnx_path}, skipping ONNX evaluation")
-
-        # Check for TensorRT model (config-specified or auto-detected)
         eval_trt_path = eval_cfg.get("tensorrt_model")
-        if eval_trt_path:
-            # Use config-specified TensorRT model
-            if osp.exists(eval_trt_path):
-                models_to_evaluate.append(("tensorrt", eval_trt_path))
-                logger.info(f"Using config-specified TensorRT model: {eval_trt_path}")
-            else:
-                logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}")
-        elif not is_eval_only and config.should_export_tensorrt:
-            # Auto-detect exported TensorRT model (only if we just exported)
-            if trt_path and osp.exists(trt_path):
-                models_to_evaluate.append(("tensorrt", trt_path))
-            else:
-                logger.warning(f"TensorRT model not found at {trt_path}, skipping TensorRT evaluation")
+
+        # If both paths are None, skip evaluation (no auto-detection)
+        if eval_onnx_path is None and eval_trt_path is None:
+            logger.warning("Both onnx_model and tensorrt_model are None. Skipping evaluation.")
+            logger.warning("To enable evaluation, specify at least one model path in the config.")
+        else:
+            # Only evaluate models that are explicitly specified (not None)
+            if eval_onnx_path is not None:
+                # Evaluate ONNX model
+                if osp.exists(eval_onnx_path):
+                    models_to_evaluate.append(("onnx", eval_onnx_path))
+                    logger.info(f"Using config-specified ONNX model: {eval_onnx_path}")
+                else:
+                    logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}")
+
+            if eval_trt_path is not None:
+                # Evaluate TensorRT model
+                if osp.exists(eval_trt_path):
+                    models_to_evaluate.append(("tensorrt", eval_trt_path))
+                    logger.info(f"Using config-specified TensorRT model: {eval_trt_path}")
+                else:
+                    logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}")
 
         if not models_to_evaluate:
             logger.error(
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index f7188dbf..45d35d76 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -335,15 +335,10 @@ python projects/CalibrationStatusClassification/deploy/main.py \
     projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
     projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
     checkpoint.pth \
-    --evaluate \
-    --num-samples 100 \
     --verbose
 ```
 
 #### Command Line Arguments
-
-- `--evaluate`: Enable full model evaluation on multiple samples
-- `--num-samples`: Number of samples to evaluate (default: 10)
 - `--verbose`: Enable verbose logging during evaluation
 
 ## 7. INT8 Quantization Guide

From 7cde05a87c9aba114ffa989075e8e1e8cb14a3ea Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Mon, 6 Oct 2025 17:28:58 +0900
Subject: [PATCH 13/19] chore: update readme

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 tools/calibration_classification/README.md | 130 ++++++---------------
 1 file changed, 35 insertions(+), 95 deletions(-)

diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index 45d35d76..713238e2 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -283,38 +283,14 @@ Control TensorRT inference precision with the `precision_policy` parameter:
 | `fp32_tf32` | Tensor Cores for FP32 | Ampere+ GPUs, FP32 speedup |
 | `strongly_typed` | Enforces explicit type constraints, preserves QDQ (Quantize-Dequantize) nodes | INT8 quantized models from QAT or PTQ with explicit Q/DQ ops |
 
-### 6.2. Basic Usage
-
-#### Export to Both ONNX and TensorRT
-
-All settings come from the config file:
-
-```sh
-python projects/CalibrationStatusClassification/deploy/main.py \
-    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    checkpoint.pth
-```
-
-#### Override Config Settings via CLI
-
-```sh
-python projects/CalibrationStatusClassification/deploy/main.py \
-    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    checkpoint.pth \
-    --work-dir ./custom_output \
-    --device cuda:1 \
-    --info-pkl /path/to/custom/info.pkl \
-    --sample-idx 5
-```
-
 ### 6.3. Export Modes
 
-#### Export to ONNX Only
+#### Export from checkpoint to Both ONNX and TensorRT
+In config file, set `export.mode = "both"`.
 
-In config file, set `export.mode = "onnx"`.
+#### Export from checkpoint to ONNX Only
 
+In config file, set `export.mode = "onnx"`.
 
 #### Convert Existing ONNX to TensorRT
 
@@ -322,9 +298,16 @@ In config file:
 - Set `export.mode = "trt"`
 - Set `runtime_io.onnx_file = "/path/to/model.onnx"`
 
-
 **Note:** The checkpoint is used for verification. To skip verification, set `export.verify = False` in config.
 
+#### Export
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
+    checkpoint.pth
+```
+
 
 ### 6.4. Evaluate ONNX and TensorRT Models
 
@@ -334,32 +317,20 @@ In config file:
 python projects/CalibrationStatusClassification/deploy/main.py \
     projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
     projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    checkpoint.pth \
-    --verbose
+    checkpoint.pth
 ```
 
-#### Command Line Arguments
-- `--verbose`: Enable verbose logging during evaluation
 
 ## 7. INT8 Quantization Guide
 
 INT8 quantization reduces model size and improves inference speed by converting 32-bit floating-point weights to 8-bit integers. This guide covers Post-Training Quantization (PTQ) using NVIDIA's ModelOpt tool.
 
-### 7.1. Prerequisites
-
-Before starting quantization, ensure you have:
-
-1. **Trained model checkpoint** (e.g., `epoch_25.pth`)
-2. **ONNX model** exported using Section 6
-3. **Dataset info files** (from Section 2.1)
-
----
 
-### 7.2. Generate Calibration Data
+### 7.1. Generate Calibration Data
 
 Calibration data is used to determine optimal quantization parameters. Use representative samples from your training dataset.
 
-#### 7.2.1. Basic Usage
+#### 7.1.1. Basic Usage
 
 ```sh
 python tools/calibration_classification/toolkit.py \
@@ -376,6 +347,8 @@ Calibration data is loaded entirely into GPU memory during quantization. Calcula
 For 32 GB memory, the maximum you can use is approximately: 32 GB / 1860 x 2880 x 5 x 4 Bytes, around 321 images.
 Use `--indices 200` to limit to first 200 samples.
 
+Additionally, ensure sufficient disk storage is available on the target device for calibration file output.
+
 
 ### 7.3. Build Optimization Docker Environment
 
@@ -419,26 +392,7 @@ python3 -m modelopt.onnx.quantization \
 
 ### 7.6. Evaluate Quantized Model
 
-#### 7.6.1. Evaluate ONNX INT8 Model
-
-```sh
-python projects/CalibrationStatusClassification/deploy/main.py \
-    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    epoch_25.pth \
-    --evaluate \
-    --num-samples 100
-```
-
-**Note**: Configure the deployment config to use the quantized ONNX model:
-```python
-runtime_io = dict(
-    onnx_file="work_dirs/end2end.quant.onnx",  # Point to quantized ONNX
-    # ...
-)
-```
-
-#### 7.6.2. Convert to TensorRT INT8 Engine
+#### 7.6.1. Convert to TensorRT INT8 Engine
 
 For optimal performance, convert the quantized ONNX to TensorRT:
 
@@ -472,45 +426,31 @@ backend_config = dict(
 python projects/CalibrationStatusClassification/deploy/main.py \
     projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
     projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    epoch_25.pth  # For verification only
+    epoch_25.pth
 ```
 
 **Output**: `work_dirs/end2end.engine` (INT8 TensorRT engine)
 
 #### 7.6.3. Evaluate TensorRT INT8 Engine
 
-```sh
-python projects/CalibrationStatusClassification/deploy/main.py \
-    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    epoch_25.pth \
-    --evaluate \
-    --num-samples 100
-```
-
-**Note**: The deployment config should point to the TensorRT engine:
+In config file, set `export.mode = "none"` and `"evaluation.enabled=True"`.
 ```python
-runtime_io = dict(
-    onnx_file="work_dirs/end2end.quant.onnx",
-    # ...
+export = dict(
+    mode="none",  # Export mode: "onnx", "trt", "both", or "none"
+    ...
 )
 
-# TensorRT engine will be automatically loaded from work_dirs/end2end.engine
+evaluation = dict(
+    enabled=True,
+    ...
+    onnx_model=None,
+    tensorrt_model="/workspace/work_dirs/end2end.engine",
+)
 ```
 
-### 7.7. Expected Results
-
-**Typical performance improvements:**
-
-| Metric          | FP32 ONNX | INT8 ONNX | INT8 TensorRT |
-|-----------------|-----------|-----------|---------------|
-| Model Size      | 100%      | ~25%      | ~25%          |
-| Inference Speed | 1.0×      | 1.5-2.0×  | 2.5-4.0×      |
-| Accuracy Loss   | baseline  | <1%       | <1%           |
-| Memory Usage    | 100%      | ~40%      | ~30%          |
-
-**Note**: Actual results depend on:
-- Model architecture (ResNet18, etc.)
-- Input resolution
-- Hardware (GPU generation)
-- Calibration data quality
+**Evaluate INT8 engine**
+```sh
+python projects/CalibrationStatusClassification/deploy/main.py \
+    projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py
+```

From 19b0093a0e25a2519b4a39851d0eecec47a5b495 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Mon, 6 Oct 2025 17:42:43 +0900
Subject: [PATCH 14/19] chore: change naming

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../deploy/main.py                            | 25 ++++++-------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index b030d0b2..5efcffba 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -163,7 +163,7 @@ def load_sample_data_from_info_pkl(
 
 
 class DeploymentConfig:
-    """Enhanced configuration container for deployment settings with validation."""
+    """Configuration container for deployment settings."""
 
     def __init__(self, deploy_cfg: Config):
         self.deploy_cfg = deploy_cfg
@@ -287,14 +287,6 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--device", help="Override device from config")
     parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level")
     parser.add_argument("--info-pkl", help="Override info.pkl path from config")
-    parser.add_argument("--sample-idx", type=int, help="Override sample index from config")
-
-    # Evaluation mode
-    parser.add_argument("--evaluate", action="store_true", help="Run full evaluation on multiple samples")
-    parser.add_argument(
-        "--num-samples", type=int, default=10, help="Number of samples to evaluate (only with --evaluate)"
-    )
-    parser.add_argument("--verbose", action="store_true", help="Enable verbose logging during evaluation")
 
     return parser.parse_args()
 
@@ -969,11 +961,10 @@ def main():
     logger.info(f"Loading model config from: {args.model_cfg}")
     model_cfg = Config.fromfile(args.model_cfg)
 
-    # Get settings from config with CLI overrides
-    work_dir = args.work_dir if args.work_dir else config.work_dir
-    device = args.device if args.device else config.device
-    info_pkl = args.info_pkl if args.info_pkl else config.runtime_io_config.get("info_pkl")
-    sample_idx = args.sample_idx if args.sample_idx is not None else config.runtime_io_config.get("sample_idx", 0)
+    work_dir = config.work_dir
+    device = config.device
+    info_pkl = config.runtime_io_config.get("info_pkl")
+    sample_idx = config.runtime_io_config.get("sample_idx", 0)
     existing_onnx = config.runtime_io_config.get("onnx_file")
 
     # Validate required parameters
@@ -1072,9 +1063,9 @@ def main():
 
     # Get evaluation settings from config and CLI
     eval_cfg = config.evaluation_config
-    should_evaluate = args.evaluate or eval_cfg.get("enabled", False)
-    num_samples = args.num_samples if args.num_samples != 10 else eval_cfg.get("num_samples", 10)
-    verbose_mode = args.verbose or eval_cfg.get("verbose", False)
+    should_evaluate = eval_cfg.get("enabled", False)
+    num_samples = eval_cfg.get("num_samples", 10)
+    verbose_mode = eval_cfg.get("verbose", False)
 
     # Run full evaluation if requested
     if should_evaluate:

From 2a816e427911319ea6ae66f92c3327b31e291a46 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Tue, 7 Oct 2025 11:53:05 +0900
Subject: [PATCH 15/19] chore: update deploy config

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../configs/deploy/resnet18_5ch.py            | 80 +++----------------
 1 file changed, 13 insertions(+), 67 deletions(-)

diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
index 4888538e..54496f6b 100644
--- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
@@ -1,4 +1,3 @@
-# projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
 # Deployment configuration for ResNet18 5-channel calibration classification model
 #
 # 1. export: Controls export behavior (mode, verification, device, output directory)
@@ -27,7 +26,7 @@
 runtime_io = dict(
     info_pkl="data/t4dataset/calibration_info/t4dataset_gen2_base_infos_test.pkl",
     sample_idx=0,  # Sample index to use for export and verification
-    onnx_file=None,  # Optional: Path to existing ONNX file
+    onnx_file="/workspace/work_dirs/end2end.onnx",  # Optional: Path to existing ONNX file
     # - If provided with mode="trt", will convert this ONNX to TensorRT
     # - If None with mode="both", will export ONNX first then convert
 )
@@ -37,8 +36,8 @@
 # ==============================================================================
 evaluation = dict(
     enabled=True,  # Enable full model evaluation (set to True to run evaluation)
-    num_samples=100,  # Number of samples to evaluate from info.pkl
-    verbose=False,  # Enable verbose logging showing per-sample results
+    num_samples=10,  # Number of samples to evaluate from info.pkl
+    verbose=True,  # Enable verbose logging showing per-sample results
     # Optional: Specify models to evaluate (if None, uses exported models from work_dir)
     onnx_model="/workspace/work_dirs/end2end.onnx",  # Path to ONNX model file to evaluate (e.g., "/path/to/model.onnx")
     tensorrt_model="/workspace/work_dirs/end2end.engine",  # Path to TensorRT engine file to evaluate (e.g., "/path/to/model.engine")
@@ -63,7 +62,7 @@
         # - "fp16": Enable FP16 mode for faster inference with slight accuracy trade-off
         # - "fp32_tf32": Enable TF32 mode (Tensor Cores for FP32 operations on Ampere+)
         # - "strongly_typed": Enforce strict type checking (prevents automatic precision conversion)
-        precision_policy="fp32_tf32",
+        precision_policy="fp16",
     ),
     # Dynamic shape configuration for different input resolutions
     model_inputs=[
@@ -84,69 +83,16 @@
 # ==============================================================================
 onnx_config = dict(
     type="onnx",
-    export_params=True,  # Include trained parameters in the model
-    keep_initializers_as_inputs=False,  # Don't expose initializers as inputs
-    opset_version=16,  # ONNX opset version (16 recommended for modern features)
-    do_constant_folding=True,  # Optimize by folding constant expressions
-    save_file="end2end.onnx",  # Output filename
-    input_names=["input"],  # Name of input tensor
-    output_names=["output"],  # Name of output tensor
-    # Dynamic axes for variable input dimensions
+    export_params=True,
+    keep_initializers_as_inputs=False,
+    opset_version=16,
+    do_constant_folding=True,
+    save_file="end2end.onnx",
+    input_names=["input"],
+    output_names=["output"],
     dynamic_axes={
-        "input": {0: "batch_size", 2: "height", 3: "width"},  # Batch, H, W are dynamic
-        "output": {0: "batch_size"},  # Batch is dynamic
+        "input": {0: "batch_size", 2: "height", 3: "width"},
+        "output": {0: "batch_size"},
     },
     input_shape=None,
 )
-
-# ==============================================================================
-# Usage Examples
-# ==============================================================================
-#
-# Basic usage (export to both ONNX and TensorRT):
-#   python projects/CalibrationStatusClassification/deploy/main.py \
-#       projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-#       projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-#       checkpoint.pth
-#
-# Export to ONNX only:
-#   Set export.mode = "onnx" in this config, then run the command above
-#
-# Convert existing ONNX to TensorRT:
-#   1) Set export.mode = "trt"
-#   2) Set runtime_io.onnx_file = "/path/to/existing/model.onnx"
-#   3) Run the command above
-#
-# Enable evaluation in config:
-#   1) Set evaluation.enabled = True
-#   2) Set evaluation.num_samples = 100  # Adjust as needed
-#   3) Optionally set evaluation.onnx_model and/or evaluation.tensorrt_model
-#   4) Run the command above
-#
-# Evaluate specific models (without export):
-#   1) Set export.mode = "none"
-#   2) Set evaluation.enabled = True
-#   3) Set evaluation.onnx_model = "/path/to/model.onnx"
-#   4) Set evaluation.tensorrt_model = "/path/to/model.engine"
-#   5) Run the command WITHOUT checkpoint:
-#      python projects/CalibrationStatusClassification/deploy/main.py \
-#          projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-#          projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py
-#
-# Override config settings via command line:
-#   python projects/CalibrationStatusClassification/deploy/main.py \
-#       <deploy_cfg> <model_cfg> <checkpoint> \
-#       --work-dir ./custom_output \
-#       --device cuda:1 \
-#       --info-pkl /path/to/custom/info.pkl \
-#       --sample-idx 5 \
-#       --evaluate \
-#       --num-samples 50 \
-#       --verbose
-#
-# Available precision policies:
-#   - auto: Let TensorRT decide (default, good balance)
-#   - fp16: Faster inference, ~2x speedup, small accuracy loss
-#   - fp32_tf32: Use Tensor Cores on Ampere+ GPUs for FP32
-#   - explicit_int8: INT8 quantization (requires calibration dataset)
-#   - strongly_typed: Strict type enforcement, no automatic conversion

From 10daf63f60a6887fd6b0bb58b4871b8ab3bf545a Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Tue, 7 Oct 2025 11:55:59 +0900
Subject: [PATCH 16/19] chore: refactor code

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../configs/deploy/resnet18_5ch.py            |    2 +-
 .../deploy/__init__.py                        |   21 +
 .../deploy/config.py                          |  161 +++
 .../deploy/evaluator.py                       |  435 +++++++
 .../deploy/exporters.py                       |  680 ++++++++++
 .../deploy/main.py                            | 1132 +----------------
 tools/calibration_classification/README.md    |    8 +-
 .../visualize_lidar_camera_projection.py      |   11 +-
 8 files changed, 1372 insertions(+), 1078 deletions(-)
 create mode 100644 projects/CalibrationStatusClassification/deploy/__init__.py
 create mode 100644 projects/CalibrationStatusClassification/deploy/config.py
 create mode 100644 projects/CalibrationStatusClassification/deploy/evaluator.py
 create mode 100644 projects/CalibrationStatusClassification/deploy/exporters.py

diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
index 54496f6b..0c4673d8 100644
--- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
@@ -8,7 +8,7 @@
 # Export Configuration
 # ==============================================================================
 export = dict(
-    mode="none",  # Export mode: "onnx", "trt", "both", or "none"
+    mode="both",  # Export mode: "onnx", "trt", "both", or "none"
     # - "onnx": Export to ONNX only
     # - "trt": Convert to TensorRT only (requires onnx_file in runtime_io)
     # - "both": Export to ONNX then convert to TensorRT
diff --git a/projects/CalibrationStatusClassification/deploy/__init__.py b/projects/CalibrationStatusClassification/deploy/__init__.py
new file mode 100644
index 00000000..38b7c326
--- /dev/null
+++ b/projects/CalibrationStatusClassification/deploy/__init__.py
@@ -0,0 +1,21 @@
+"""
+Calibration Status Classification Model Deployment Package
+
+This package provides utilities for exporting, verifying, and evaluating
+CalibrationStatusClassification models in ONNX and TensorRT formats.
+"""
+
+from .config import DeploymentConfig, parse_args, setup_logging
+from .evaluator import evaluate_exported_model, print_evaluation_results
+from .exporters import export_to_onnx, export_to_tensorrt, run_verification
+
+__all__ = [
+    "DeploymentConfig",
+    "parse_args",
+    "setup_logging",
+    "export_to_onnx",
+    "export_to_tensorrt",
+    "run_verification",
+    "evaluate_exported_model",
+    "print_evaluation_results",
+]
diff --git a/projects/CalibrationStatusClassification/deploy/config.py b/projects/CalibrationStatusClassification/deploy/config.py
new file mode 100644
index 00000000..e0071066
--- /dev/null
+++ b/projects/CalibrationStatusClassification/deploy/config.py
@@ -0,0 +1,161 @@
+"""
+Configuration classes and constants for deployment.
+"""
+
+import argparse
+import logging
+import os
+import os.path as osp
+from typing import Dict
+
+from mmengine.config import Config
+
+# Constants
+DEFAULT_VERIFICATION_TOLERANCE = 1e-3
+DEFAULT_WORKSPACE_SIZE = 1 << 30  # 1 GB
+EXPECTED_CHANNELS = 5  # RGB + Depth + Intensity
+LABELS = {"0": "miscalibrated", "1": "calibrated"}
+
+# Precision policy mapping
+PRECISION_POLICIES = {
+    "auto": {},  # No special flags, TensorRT decides
+    "fp16": {"FP16": True},
+    "fp32_tf32": {"TF32": True},  # TF32 for FP32 operations
+    "explicit_int8": {"INT8": True},
+    "strongly_typed": {"STRONGLY_TYPED": True},  # Network creation flag
+}
+
+
+class DeploymentConfig:
+    """Configuration container for deployment settings."""
+
+    def __init__(self, deploy_cfg: Config):
+        self.deploy_cfg = deploy_cfg
+        self._validate_config()
+
+    def _validate_config(self) -> None:
+        """Validate configuration structure and required fields."""
+        if "export" not in self.deploy_cfg:
+            raise ValueError(
+                "Missing 'export' section in deploy config. "
+                "Please update your config to the new format with 'export', 'runtime_io' sections."
+            )
+
+        if "runtime_io" not in self.deploy_cfg:
+            raise ValueError("Missing 'runtime_io' section in deploy config.")
+
+        # Validate export mode
+        valid_modes = ["onnx", "trt", "both", "none"]
+        mode = self.export_config.get("mode", "both")
+        if mode not in valid_modes:
+            raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}")
+
+        # Validate precision policy if present
+        backend_cfg = self.deploy_cfg.get("backend_config", {})
+        common_cfg = backend_cfg.get("common_config", {})
+        precision_policy = common_cfg.get("precision_policy", "auto")
+        if precision_policy not in PRECISION_POLICIES:
+            raise ValueError(
+                f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}"
+            )
+
+    @property
+    def export_config(self) -> Dict:
+        """Get export configuration (mode, verify, device, work_dir)."""
+        return self.deploy_cfg.get("export", {})
+
+    @property
+    def runtime_io_config(self) -> Dict:
+        """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file)."""
+        return self.deploy_cfg.get("runtime_io", {})
+
+    @property
+    def evaluation_config(self) -> Dict:
+        """Get evaluation configuration (enabled, num_samples, verbose, model paths)."""
+        return self.deploy_cfg.get("evaluation", {})
+
+    @property
+    def should_export_onnx(self) -> bool:
+        """Check if ONNX export is requested."""
+        mode = self.export_config.get("mode", "both")
+        return mode in ["onnx", "both"]
+
+    @property
+    def should_export_tensorrt(self) -> bool:
+        """Check if TensorRT export is requested."""
+        mode = self.export_config.get("mode", "both")
+        return mode in ["trt", "both"]
+
+    @property
+    def should_verify(self) -> bool:
+        """Check if verification is requested."""
+        return self.export_config.get("verify", False)
+
+    @property
+    def device(self) -> str:
+        """Get device for export."""
+        return self.export_config.get("device", "cuda:0")
+
+    @property
+    def work_dir(self) -> str:
+        """Get working directory."""
+        return self.export_config.get("work_dir", os.getcwd())
+
+    @property
+    def onnx_settings(self) -> Dict:
+        """Get ONNX export settings."""
+        onnx_config = self.deploy_cfg.get("onnx_config", {})
+        return {
+            "opset_version": onnx_config.get("opset_version", 16),
+            "do_constant_folding": onnx_config.get("do_constant_folding", True),
+            "input_names": onnx_config.get("input_names", ["input"]),
+            "output_names": onnx_config.get("output_names", ["output"]),
+            "dynamic_axes": onnx_config.get("dynamic_axes"),
+            "export_params": onnx_config.get("export_params", True),
+            "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False),
+            "save_file": onnx_config.get("save_file", "calibration_classifier.onnx"),
+        }
+
+    @property
+    def tensorrt_settings(self) -> Dict:
+        """Get TensorRT export settings with precision policy support."""
+        backend_config = self.deploy_cfg.get("backend_config", {})
+        common_config = backend_config.get("common_config", {})
+
+        # Get precision policy
+        precision_policy = common_config.get("precision_policy", "auto")
+        policy_flags = PRECISION_POLICIES.get(precision_policy, {})
+
+        return {
+            "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE),
+            "precision_policy": precision_policy,
+            "policy_flags": policy_flags,
+            "model_inputs": backend_config.get("model_inputs", []),
+        }
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Export CalibrationStatusClassification model to ONNX/TensorRT.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument("deploy_cfg", help="Deploy config path")
+    parser.add_argument("model_cfg", help="Model config path")
+    parser.add_argument(
+        "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')"
+    )
+
+    # Optional overrides
+    parser.add_argument("--work-dir", help="Override output directory from config")
+    parser.add_argument("--device", help="Override device from config")
+    parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level")
+    parser.add_argument("--info-pkl", help="Override info.pkl path from config")
+
+    return parser.parse_args()
+
+
+def setup_logging(level: str) -> logging.Logger:
+    """Setup logging configuration."""
+    logging.basicConfig(level=getattr(logging, level), format="%(levelname)s:%(name)s:%(message)s")
+    return logging.getLogger("mmdeploy")
diff --git a/projects/CalibrationStatusClassification/deploy/evaluator.py b/projects/CalibrationStatusClassification/deploy/evaluator.py
new file mode 100644
index 00000000..efc7dd45
--- /dev/null
+++ b/projects/CalibrationStatusClassification/deploy/evaluator.py
@@ -0,0 +1,435 @@
+"""
+Model evaluation utilities for exported models.
+"""
+
+import gc
+import logging
+import os.path as osp
+import time
+from typing import Any, Tuple
+
+import numpy as np
+import onnxruntime as ort
+import pycuda.autoinit
+import pycuda.driver as cuda
+import tensorrt as trt
+import torch
+import torch.nn.functional as F
+from config import LABELS
+from exporters import (
+    _clear_gpu_memory,
+    _create_transform_with_config,
+    _load_info_pkl_file,
+    _numpy_to_tensor,
+    _run_tensorrt_inference,
+    load_info_pkl_data,
+)
+from mmengine.config import Config
+
+
+def _load_samples_from_info_pkl(info_pkl_path: str, num_samples: int, logger: logging.Logger) -> list:
+    """Load and validate samples from info.pkl file."""
+    try:
+        samples_list = _load_info_pkl_file(info_pkl_path)
+        logger.info(f"Loaded {len(samples_list)} samples from info.pkl")
+
+        # Limit number of samples
+        num_samples = min(num_samples, len(samples_list))
+        logger.info(f"Evaluating {num_samples} samples")
+
+        return samples_list[:num_samples]
+
+    except Exception as e:
+        logger.error(f"Failed to load info.pkl: {e}")
+        raise
+
+
+def _create_transform(model_cfg: Config):
+    """Create and configure CalibrationClassificationTransform for evaluation."""
+    return _create_transform_with_config(
+        model_cfg=model_cfg,
+        miscalibration_probability=0.0,
+        projection_vis_dir=None,
+        results_vis_dir=None,
+        binary_save_dir=None,
+    )
+
+
+def _create_onnx_inference_func(model_path: str, device: str, logger: logging.Logger):
+    """Create ONNX inference function with configured providers."""
+    logger.info(f"Using ONNX model: {model_path}")
+    logger.info("Creating ONNX InferenceSession...")
+
+    # Configure execution providers
+    if device.startswith("cuda"):
+        providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        logger.info("Attempting to use CUDA acceleration for ONNX inference...")
+        try:
+            available_providers = ort.get_available_providers()
+            if "CUDAExecutionProvider" not in available_providers:
+                logger.warning(
+                    f"CUDAExecutionProvider not available. Available: {available_providers}. "
+                    "Install onnxruntime-gpu for CUDA acceleration"
+                )
+                providers = ["CPUExecutionProvider"]
+            else:
+                logger.info("CUDAExecutionProvider is available")
+        except Exception as e:
+            logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU")
+            providers = ["CPUExecutionProvider"]
+    else:
+        providers = ["CPUExecutionProvider"]
+        logger.info("Using CPU for ONNX inference")
+
+    # Create session
+    ort_session = ort.InferenceSession(model_path, providers=providers)
+    logger.info(f"ONNX session using providers: {ort_session.get_providers()}")
+
+    # Return inference function
+    def inference_func(input_tensor):
+        input_name = ort_session.get_inputs()[0].name
+        onnx_input = {input_name: input_tensor.cpu().numpy()}
+        start_time = time.perf_counter()
+        onnx_output = ort_session.run(None, onnx_input)[0]
+        latency = (time.perf_counter() - start_time) * 1000
+        return onnx_output, latency
+
+    return inference_func
+
+
+def _create_tensorrt_inference_func(model_path: str, logger: logging.Logger):
+    """Create TensorRT inference function with loaded engine."""
+    logger.info(f"Using TensorRT model: {model_path}")
+
+    # Load TensorRT engine
+    trt_logger = trt.Logger(trt.Logger.WARNING)
+    trt.init_libnvinfer_plugins(trt_logger, "")
+    runtime = trt.Runtime(trt_logger)
+
+    with open(model_path, "rb") as f:
+        engine = runtime.deserialize_cuda_engine(f.read())
+
+    if engine is None:
+        raise RuntimeError("Failed to deserialize TensorRT engine")
+
+    # Return inference function
+    def inference_func(input_tensor):
+        return _run_tensorrt_inference(engine, input_tensor.cpu(), logger)
+
+    return inference_func
+
+
+def _process_single_sample(
+    sample_idx: int,
+    info_pkl_path: str,
+    transform,
+    inference_func,
+    device: str,
+    logger: logging.Logger,
+    verbose: bool = False,
+) -> Tuple[list, list, list, list]:
+    """Process a single sample with both calibrated and miscalibrated versions.
+
+    Returns:
+        Tuple of (predictions, ground_truth, probabilities, latencies) for this sample
+    """
+    predictions = []
+    ground_truth = []
+    probabilities = []
+    latencies = []
+
+    # Test both calibrated and miscalibrated versions
+    for miscalibration_prob, expected_label in [(0.0, 1), (1.0, 0)]:
+        # Update transform's miscalibration probability
+        transform.miscalibration_probability = miscalibration_prob
+
+        # Load and preprocess sample
+        sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
+        sample_data["sample_idx"] = sample_idx
+
+        results = transform.transform(sample_data)
+        input_data_processed = results["fused_img"]
+        gt_label = results["gt_label"]
+
+        # Convert to tensor
+        input_tensor = _numpy_to_tensor(input_data_processed, device)
+
+        # Run inference
+        output_np, latency = inference_func(input_tensor)
+
+        if output_np is None:
+            logger.error(f"Failed to get output for sample {sample_idx}")
+            continue
+
+        # Convert logits to probabilities
+        logits = torch.from_numpy(output_np)
+        probs = F.softmax(logits, dim=-1)
+        predicted_class = torch.argmax(probs, dim=-1).item()
+        confidence = probs.max().item()
+
+        # Store results
+        predictions.append(predicted_class)
+        ground_truth.append(gt_label)
+        probabilities.append(probs.cpu().numpy())
+        latencies.append(latency)
+
+        # Print sample results only in verbose mode
+        if verbose:
+            logger.info(
+                f"Sample {sample_idx + 1} (GT={gt_label}): "
+                f"Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms"
+            )
+
+    return predictions, ground_truth, probabilities, latencies
+
+
+def evaluate_exported_model(
+    model_path: str,
+    model_type: str,
+    model_cfg: Config,
+    info_pkl_path: str,
+    logger: logging.Logger,
+    device: str = "cpu",
+    num_samples: int = 10,
+    verbose: bool = False,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Evaluate exported model (ONNX or TensorRT) using info.pkl data.
+
+    Args:
+        model_path: Path to model file (.onnx or .engine)
+        model_type: Type of model ("onnx" or "tensorrt")
+        model_cfg: Model configuration
+        info_pkl_path: Path to info.pkl file
+        logger: Logger instance
+        device: Device for preprocessing
+        num_samples: Number of samples to evaluate
+        verbose: Enable verbose logging
+
+    Returns:
+        Tuple of (predictions, ground_truth, probabilities, latencies)
+    """
+    # Load samples
+    samples_list = _load_samples_from_info_pkl(info_pkl_path, num_samples, logger)
+
+    # Create transform
+    transform = _create_transform(model_cfg)
+
+    # Create inference function based on model type
+    if model_type == "onnx":
+        inference_func = _create_onnx_inference_func(model_path, device, logger)
+    elif model_type == "tensorrt":
+        inference_func = _create_tensorrt_inference_func(model_path, logger)
+    else:
+        raise ValueError(f"Unsupported model type: {model_type}")
+
+    # Process all samples
+    all_predictions = []
+    all_ground_truth = []
+    all_probabilities = []
+    all_latencies = []
+
+    for sample_idx in range(len(samples_list)):
+        if sample_idx % 100 == 0:
+            logger.info(f"Processing sample {sample_idx + 1}/{len(samples_list)}")
+
+        try:
+            predictions, ground_truth, probabilities, latencies = _process_single_sample(
+                sample_idx=sample_idx,
+                info_pkl_path=info_pkl_path,
+                transform=transform,
+                inference_func=inference_func,
+                device=device,
+                logger=logger,
+                verbose=verbose,
+            )
+
+            all_predictions.extend(predictions)
+            all_ground_truth.extend(ground_truth)
+            all_probabilities.extend(probabilities)
+            all_latencies.extend(latencies)
+
+            # Clear GPU memory periodically
+            if model_type == "tensorrt" and sample_idx % 10 == 0:
+                _clear_gpu_memory()
+
+        except Exception as e:
+            logger.error(f"Error processing sample {sample_idx}: {e}")
+            continue
+
+    return (
+        np.array(all_predictions),
+        np.array(all_ground_truth),
+        np.array(all_probabilities),
+        np.array(all_latencies),
+    )
+
+
+def print_evaluation_results(
+    all_predictions: np.ndarray,
+    all_ground_truth: np.ndarray,
+    all_probabilities: np.ndarray,
+    all_latencies: np.ndarray,
+    model_type: str,
+    logger: logging.Logger,
+) -> None:
+    """Print comprehensive evaluation results with metrics and statistics."""
+
+    if len(all_predictions) == 0:
+        logger.error("No samples were processed successfully.")
+        return
+
+    correct_predictions = (all_predictions == all_ground_truth).sum()
+    total_samples = len(all_predictions)
+    accuracy = correct_predictions / total_samples
+    avg_latency = np.mean(all_latencies)
+
+    # Print header
+    logger.info(f"\n{'='*60}")
+    logger.info(f"{model_type.upper()} Model Evaluation Results")
+    logger.info(f"{'='*60}")
+    logger.info(f"Total samples: {total_samples}")
+    logger.info(f"Correct predictions: {correct_predictions}")
+    logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
+    logger.info(f"Average latency: {avg_latency:.2f} ms")
+
+    # Calculate per-class accuracy
+    unique_classes = np.unique(all_ground_truth)
+    logger.info(f"\nPer-class accuracy:")
+    for cls in unique_classes:
+        cls_mask = all_ground_truth == cls
+        cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum()
+        cls_total = cls_mask.sum()
+        cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0
+        logger.info(
+            f"  Class {cls} ({LABELS[str(cls)]}): "
+            f"{cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)"
+        )
+
+    # Calculate average confidence
+    avg_confidence = np.mean([prob.max() for prob in all_probabilities])
+    logger.info(f"\nAverage confidence: {avg_confidence:.4f}")
+
+    # Calculate latency statistics
+    min_latency = np.min(all_latencies)
+    max_latency = np.max(all_latencies)
+    std_latency = np.std(all_latencies)
+    p50_latency = np.percentile(all_latencies, 50)
+    p95_latency = np.percentile(all_latencies, 95)
+    p99_latency = np.percentile(all_latencies, 99)
+
+    logger.info(f"\nLatency Statistics:")
+    logger.info(f"  Average: {avg_latency:.2f} ms")
+    logger.info(f"  Median (P50): {p50_latency:.2f} ms")
+    logger.info(f"  P95: {p95_latency:.2f} ms")
+    logger.info(f"  P99: {p99_latency:.2f} ms")
+    logger.info(f"  Min: {min_latency:.2f} ms")
+    logger.info(f"  Max: {max_latency:.2f} ms")
+    logger.info(f"  Std: {std_latency:.2f} ms")
+
+    # Show confusion matrix
+    logger.info(f"\nConfusion Matrix:")
+    logger.info(f"{'':>10} Predicted")
+    logger.info(f"{'Actual':>10} {'0 (misc)':>10} {'1 (calib)':>10}")
+    logger.info(f"{'-'*32}")
+    for true_cls in unique_classes:
+        row_label = f"{true_cls} ({LABELS[str(true_cls)][:4]})"
+        row = [f"{row_label:>10}"]
+        for pred_cls in unique_classes:
+            count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum()
+            row.append(f"{count:>10}")
+        logger.info(" ".join(row))
+
+    logger.info(f"\n{'='*60}")
+    logger.info(f"Evaluation Summary:")
+    logger.info(f"  Model Type: {model_type.upper()}")
+    logger.info(f"  Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
+    logger.info(f"  Avg Latency: {avg_latency:.2f} ms")
+    logger.info(f"  Throughput: {1000/avg_latency:.2f} samples/sec")
+    logger.info(f"{'='*60}\n")
+
+
+def get_models_to_evaluate(eval_cfg: dict, logger: logging.Logger) -> list:
+    """Determine which models to evaluate based on config.
+
+    Returns:
+        List of (model_type, model_path) tuples
+    """
+    models_to_evaluate = []
+
+    # Get model paths from config
+    eval_onnx_path = eval_cfg.get("onnx_model")
+    eval_trt_path = eval_cfg.get("tensorrt_model")
+
+    # If both paths are None, skip evaluation
+    if eval_onnx_path is None and eval_trt_path is None:
+        logger.warning("Both onnx_model and tensorrt_model are None. Skipping evaluation.")
+        logger.warning("To enable evaluation, specify at least one model path in the config.")
+        return models_to_evaluate
+
+    # Only evaluate models that are explicitly specified (not None)
+    if eval_onnx_path is not None:
+        if osp.exists(eval_onnx_path):
+            models_to_evaluate.append(("onnx", eval_onnx_path))
+            logger.info(f"Using config-specified ONNX model: {eval_onnx_path}")
+        else:
+            logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}")
+
+    if eval_trt_path is not None:
+        if osp.exists(eval_trt_path):
+            models_to_evaluate.append(("tensorrt", eval_trt_path))
+            logger.info(f"Using config-specified TensorRT model: {eval_trt_path}")
+        else:
+            logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}")
+
+    return models_to_evaluate
+
+
+def run_full_evaluation(
+    models_to_evaluate: list,
+    model_cfg: Config,
+    info_pkl: str,
+    device: Any,
+    num_samples: int,
+    verbose_mode: bool,
+    logger: logging.Logger,
+) -> None:
+    """Run evaluation for all specified models."""
+    if not models_to_evaluate:
+        logger.error(
+            "No models available for evaluation. Please export models first or specify model paths in config."
+        )
+        return
+
+    # Evaluate each model
+    for model_type, model_path in models_to_evaluate:
+        logger.info(f"\nEvaluating {model_type.upper()} model...")
+        logger.info(f"Model path: {model_path}")
+        logger.info(f"Number of samples: {num_samples}")
+        logger.info(f"Verbose mode: {verbose_mode}")
+
+        try:
+            # Run evaluation
+            predictions, ground_truth, probabilities, latencies = evaluate_exported_model(
+                model_path=model_path,
+                model_type=model_type,
+                model_cfg=model_cfg,
+                info_pkl_path=info_pkl,
+                logger=logger,
+                device=device.type if isinstance(device, torch.device) else device,
+                num_samples=num_samples,
+                verbose=verbose_mode,
+            )
+
+            # Print results
+            print_evaluation_results(predictions, ground_truth, probabilities, latencies, model_type, logger)
+
+            # Cleanup
+            _clear_gpu_memory()
+
+        except Exception as e:
+            logger.error(f"Evaluation failed for {model_type.upper()} model: {e}")
+            import traceback
+
+            logger.error(traceback.format_exc())
+            continue
diff --git a/projects/CalibrationStatusClassification/deploy/exporters.py b/projects/CalibrationStatusClassification/deploy/exporters.py
new file mode 100644
index 00000000..6d5655ff
--- /dev/null
+++ b/projects/CalibrationStatusClassification/deploy/exporters.py
@@ -0,0 +1,680 @@
+"""
+Model export and inference verification utilities.
+"""
+
+import gc
+import logging
+import os
+import os.path as osp
+import pickle
+import time
+from typing import Any, Dict, Optional, Tuple
+
+import numpy as np
+import onnx
+import onnxruntime as ort
+import onnxsim
+import pycuda.autoinit
+import pycuda.driver as cuda
+import tensorrt as trt
+import torch
+from config import DEFAULT_VERIFICATION_TOLERANCE, LABELS, DeploymentConfig
+from mmengine.config import Config
+
+from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import (
+    CalibrationClassificationTransform,
+)
+
+# ============================================================================
+# Data Loading Utilities
+# ============================================================================
+
+
+def _load_info_pkl_file(info_pkl_path: str) -> list:
+    """
+    Load and parse info.pkl file.
+
+    Args:
+        info_pkl_path: Path to the info.pkl file
+
+    Returns:
+        List of samples from data_list
+
+    Raises:
+        FileNotFoundError: If info.pkl file doesn't exist
+        ValueError: If data format is unexpected
+    """
+    if not os.path.exists(info_pkl_path):
+        raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}")
+
+    try:
+        with open(info_pkl_path, "rb") as f:
+            info_data = pickle.load(f)
+    except Exception as e:
+        raise ValueError(f"Failed to load info.pkl file: {e}")
+
+    # Extract samples from info.pkl
+    if isinstance(info_data, dict):
+        if "data_list" in info_data:
+            samples_list = info_data["data_list"]
+        else:
+            raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}")
+    else:
+        raise ValueError(f"Expected dict format, got {type(info_data)}")
+
+    if not samples_list:
+        raise ValueError("No samples found in info.pkl")
+
+    return samples_list
+
+
+def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]:
+    """
+    Load a single sample from info.pkl file.
+
+    Args:
+        info_pkl_path: Path to the info.pkl file
+        sample_idx: Index of the sample to load (default: 0)
+
+    Returns:
+        Sample dictionary with the required structure for CalibrationClassificationTransform
+
+    Raises:
+        FileNotFoundError: If info.pkl file doesn't exist
+        ValueError: If data format is unexpected or sample index is invalid
+    """
+    samples_list = _load_info_pkl_file(info_pkl_path)
+
+    if sample_idx >= len(samples_list):
+        raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})")
+
+    sample = samples_list[sample_idx]
+
+    # Validate sample structure
+    required_keys = ["image", "lidar_points"]
+    if not all(key in sample for key in required_keys):
+        raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}")
+
+    return sample
+
+
+def _create_transform_with_config(
+    model_cfg: Config,
+    miscalibration_probability: float = 0.0,
+    projection_vis_dir: Optional[str] = None,
+    results_vis_dir: Optional[str] = None,
+    binary_save_dir: Optional[str] = None,
+) -> CalibrationClassificationTransform:
+    """
+    Create CalibrationClassificationTransform with model configuration.
+
+    Args:
+        model_cfg: Model configuration
+        miscalibration_probability: Probability of miscalibration
+        projection_vis_dir: Optional projection visualization directory
+        results_vis_dir: Optional results visualization directory
+        binary_save_dir: Optional binary save directory
+
+    Returns:
+        Configured CalibrationClassificationTransform
+    """
+    data_root = model_cfg.get("data_root")
+    if data_root is None:
+        raise ValueError("data_root not found in model configuration")
+
+    transform_config = model_cfg.get("transform_config")
+    if transform_config is None:
+        raise ValueError("transform_config not found in model configuration")
+
+    return CalibrationClassificationTransform(
+        transform_config=transform_config,
+        mode="test",
+        max_depth=model_cfg.get("max_depth", 128.0),
+        dilation_size=model_cfg.get("dilation_size", 1),
+        undistort=True,
+        miscalibration_probability=miscalibration_probability,
+        enable_augmentation=False,
+        data_root=data_root,
+        projection_vis_dir=projection_vis_dir,
+        results_vis_dir=results_vis_dir,
+        binary_save_dir=binary_save_dir,
+    )
+
+
+def _numpy_to_tensor(numpy_array: np.ndarray, device: str = "cpu") -> torch.Tensor:
+    """
+    Convert numpy array (H, W, C) to tensor (1, C, H, W).
+
+    Args:
+        numpy_array: Input numpy array with shape (H, W, C)
+        device: Device to load tensor on
+
+    Returns:
+        Tensor with shape (1, C, H, W)
+    """
+    tensor = torch.from_numpy(numpy_array).permute(2, 0, 1).float()  # (C, H, W)
+    return tensor.unsqueeze(0).to(device)  # (1, C, H, W)
+
+
+def _clear_gpu_memory() -> None:
+    """Clear GPU cache and run garbage collection."""
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    gc.collect()
+
+
+def load_sample_data_from_info_pkl(
+    info_pkl_path: str,
+    model_cfg: Config,
+    miscalibration_probability: float,
+    sample_idx: int = 0,
+    device: str = "cpu",
+) -> torch.Tensor:
+    """
+    Load and preprocess sample data from info.pkl using CalibrationClassificationTransform.
+
+    Args:
+        info_pkl_path: Path to the info.pkl file
+        model_cfg: Model configuration containing data_root setting
+        miscalibration_probability: Probability of loading a miscalibrated sample
+        sample_idx: Index of the sample to load (default: 0)
+        device: Device to load tensor on
+
+    Returns:
+        Preprocessed tensor ready for model inference
+    """
+    # Load sample data from info.pkl
+    sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
+
+    # Create transform
+    transform = _create_transform_with_config(
+        model_cfg=model_cfg,
+        miscalibration_probability=miscalibration_probability,
+        projection_vis_dir=model_cfg.get("test_projection_vis_dir", None),
+        results_vis_dir=model_cfg.get("test_results_vis_dir", None),
+        binary_save_dir=model_cfg.get("binary_save_dir", None),
+    )
+
+    # Apply transform
+    results = transform.transform(sample_data)
+    input_data_processed = results["fused_img"]  # (H, W, 5)
+
+    # Convert to tensor
+    return _numpy_to_tensor(input_data_processed, device)
+
+
+# ============================================================================
+# Model Export Functions
+# ============================================================================
+
+
+def export_to_onnx(
+    model: torch.nn.Module,
+    input_tensor: torch.Tensor,
+    output_path: str,
+    config: DeploymentConfig,
+    logger: logging.Logger,
+) -> None:
+    """Export model to ONNX format."""
+    settings = config.onnx_settings
+    model.eval()
+
+    logger.info("Exporting model to ONNX format...")
+    logger.info(f"Input shape: {input_tensor.shape}")
+    logger.info(f"Output path: {output_path}")
+    logger.info(f"ONNX opset version: {settings['opset_version']}")
+
+    with torch.no_grad():
+        torch.onnx.export(
+            model,
+            input_tensor,
+            output_path,
+            export_params=settings["export_params"],
+            keep_initializers_as_inputs=settings["keep_initializers_as_inputs"],
+            opset_version=settings["opset_version"],
+            do_constant_folding=settings["do_constant_folding"],
+            input_names=settings["input_names"],
+            output_names=settings["output_names"],
+            dynamic_axes=settings["dynamic_axes"],
+            verbose=False,
+        )
+
+    logger.info(f"ONNX export completed: {output_path}")
+
+    # Optional model simplification
+    _optimize_onnx_model(output_path, logger)
+
+
+def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None:
+    """Optimize ONNX model using onnxsim."""
+    logger.info("Simplifying ONNX model...")
+    model_simplified, success = onnxsim.simplify(onnx_path)
+    if success:
+        onnx.save(model_simplified, onnx_path)
+        logger.info(f"ONNX model simplified successfully. Saved to {onnx_path}")
+    else:
+        logger.warning("ONNX model simplification failed")
+
+
+def export_to_tensorrt(
+    onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger
+) -> bool:
+    """Export ONNX model to a TensorRT engine with precision policy support."""
+    settings = config.tensorrt_settings
+    precision_policy = settings["precision_policy"]
+    policy_flags = settings["policy_flags"]
+
+    logger.info(f"Building TensorRT engine with precision policy: {precision_policy}")
+
+    # Initialize TensorRT
+    trt_logger = trt.Logger(trt.Logger.WARNING)
+    trt.init_libnvinfer_plugins(trt_logger, "")
+
+    builder = trt.Builder(trt_logger)
+    builder_config = builder.create_builder_config()
+    builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"])
+
+    # Create network with appropriate flags
+    flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+
+    # Handle strongly typed flag (network creation flag, not builder flag)
+    if policy_flags.get("STRONGLY_TYPED"):
+        flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
+        logger.info("Using strongly typed TensorRT network creation")
+
+    network = builder.create_network(flags)
+
+    # Apply precision flags to builder config
+    for flag_name, enabled in policy_flags.items():
+        if flag_name == "STRONGLY_TYPED":
+            continue  # Already handled as network creation flag above
+        if enabled and hasattr(trt.BuilderFlag, flag_name):
+            builder_config.set_flag(getattr(trt.BuilderFlag, flag_name))
+            logger.info(f"BuilderFlag.{flag_name} enabled")
+
+    # Setup optimization profile
+    profile = builder.create_optimization_profile()
+    _configure_input_shapes(profile, input_tensor, settings["model_inputs"], logger)
+    builder_config.add_optimization_profile(profile)
+
+    # Parse ONNX model
+    parser = trt.OnnxParser(network, trt_logger)
+    with open(onnx_path, "rb") as f:
+        if not parser.parse(f.read()):
+            _log_parser_errors(parser, logger)
+            return False
+        logger.info("Successfully parsed the ONNX file")
+
+    # Build engine
+    logger.info("Building TensorRT engine...")
+    serialized_engine = builder.build_serialized_network(network, builder_config)
+
+    if serialized_engine is None:
+        logger.error("Failed to build TensorRT engine")
+        return False
+
+    # Save engine
+    with open(output_path, "wb") as f:
+        f.write(serialized_engine)
+
+    logger.info(f"TensorRT engine saved to {output_path}")
+    logger.info(f"Engine max workspace size: {settings['max_workspace_size'] / (1024**3):.2f} GB")
+
+    return True
+
+
+def _configure_input_shapes(profile, input_tensor: torch.Tensor, model_inputs: list, logger: logging.Logger) -> None:
+    """Configure input shapes for TensorRT optimization profile."""
+    if model_inputs:
+        input_shapes = model_inputs[0].get("input_shapes", {})
+        for input_name, shapes in input_shapes.items():
+            min_shape = shapes.get("min_shape", list(input_tensor.shape))
+            opt_shape = shapes.get("opt_shape", list(input_tensor.shape))
+            max_shape = shapes.get("max_shape", list(input_tensor.shape))
+
+            logger.info(f"Setting input shapes - min: {min_shape}, opt: {opt_shape}, max: {max_shape}")
+            profile.set_shape(input_name, min_shape, opt_shape, max_shape)
+    else:
+        # Default shapes based on input tensor
+        input_shape = list(input_tensor.shape)
+        logger.info(f"Using default input shape: {input_shape}")
+        profile.set_shape("input", input_shape, input_shape, input_shape)
+
+
+def _log_parser_errors(parser, logger: logging.Logger) -> None:
+    """Log TensorRT parser errors."""
+    logger.error("Failed to parse ONNX model")
+    for error in range(parser.num_errors):
+        logger.error(f"Parser error: {parser.get_error(error)}")
+
+
+# ============================================================================
+# Inference and Verification Functions
+# ============================================================================
+
+
+def run_pytorch_inference(
+    model: torch.nn.Module, input_tensor: torch.Tensor, logger: logging.Logger
+) -> Tuple[torch.Tensor, float]:
+    """Run PyTorch inference on CPU for verification and return output with latency."""
+    # Move to CPU to avoid GPU memory issues
+    model_cpu = model.cpu()
+    input_cpu = input_tensor.cpu()
+
+    model_cpu.eval()
+    with torch.no_grad():
+        # Measure inference time
+        start_time = time.perf_counter()
+        output = model_cpu(input_cpu)
+        end_time = time.perf_counter()
+
+        latency = (end_time - start_time) * 1000  # Convert to milliseconds
+
+        # Handle different output formats
+        if hasattr(output, "output"):
+            output = output.output
+        elif isinstance(output, dict) and "output" in output:
+            output = output["output"]
+
+        if not isinstance(output, torch.Tensor):
+            raise ValueError(f"Unexpected PyTorch output type: {type(output)}")
+
+    logger.info(f"PyTorch inference latency: {latency:.2f} ms")
+    logger.info(f"Output verification:")
+    logger.info(f"  Output: {output.cpu().numpy()}")
+    return output, latency
+
+
+def run_onnx_inference(
+    onnx_path: str,
+    input_tensor: torch.Tensor,
+    ref_output: torch.Tensor,
+    logger: logging.Logger,
+) -> bool:
+    """Verify ONNX model output against PyTorch model."""
+    # Clear GPU cache
+    _clear_gpu_memory()
+
+    # ONNX inference with timing
+    providers = ["CPUExecutionProvider"]
+    ort_session = ort.InferenceSession(onnx_path, providers=providers)
+    onnx_input = {ort_session.get_inputs()[0].name: input_tensor.cpu().numpy()}
+
+    start_time = time.perf_counter()
+    onnx_output = ort_session.run(None, onnx_input)[0]
+    end_time = time.perf_counter()
+    onnx_latency = (end_time - start_time) * 1000
+
+    logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms")
+
+    # Ensure onnx_output is numpy array before comparison
+    if not isinstance(onnx_output, np.ndarray):
+        logger.error(f"Unexpected ONNX output type: {type(onnx_output)}")
+        return False
+
+    # Compare outputs
+    return _compare_outputs(ref_output.cpu().numpy(), onnx_output, "ONNX", logger)
+
+
+def run_tensorrt_inference(
+    tensorrt_path: str,
+    input_tensor: torch.Tensor,
+    ref_output: torch.Tensor,
+    logger: logging.Logger,
+) -> bool:
+    """Verify TensorRT model output against PyTorch model."""
+    # Clear GPU cache
+    _clear_gpu_memory()
+
+    # Load TensorRT engine
+    trt_logger = trt.Logger(trt.Logger.WARNING)
+    trt.init_libnvinfer_plugins(trt_logger, "")
+    runtime = trt.Runtime(trt_logger)
+
+    with open(tensorrt_path, "rb") as f:
+        engine = runtime.deserialize_cuda_engine(f.read())
+
+    if engine is None:
+        logger.error("Failed to deserialize TensorRT engine")
+        return False
+
+    # Run TensorRT inference with timing
+    trt_output, latency = _run_tensorrt_inference(engine, input_tensor.cpu(), logger)
+    logger.info(f"TensorRT inference latency: {latency:.2f} ms")
+
+    # Compare outputs
+    return _compare_outputs(ref_output.cpu().numpy(), trt_output, "TensorRT", logger)
+
+
+def _run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]:
+    """Run TensorRT inference and return output with timing."""
+    context = engine.create_execution_context()
+    stream = cuda.Stream()
+    start = cuda.Event()
+    end = cuda.Event()
+
+    # Get tensor names and shapes
+    input_name, output_name = None, None
+    for i in range(engine.num_io_tensors):
+        tensor_name = engine.get_tensor_name(i)
+        if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
+            input_name = tensor_name
+        elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT:
+            output_name = tensor_name
+
+    if input_name is None or output_name is None:
+        raise RuntimeError("Could not find input/output tensor names")
+
+    # Prepare arrays
+    input_np = input_tensor.numpy().astype(np.float32)
+    if not input_np.flags["C_CONTIGUOUS"]:
+        input_np = np.ascontiguousarray(input_np)
+
+    context.set_input_shape(input_name, input_np.shape)
+    output_shape = context.get_tensor_shape(output_name)
+    output_np = np.empty(output_shape, dtype=np.float32)
+    if not output_np.flags["C_CONTIGUOUS"]:
+        output_np = np.ascontiguousarray(output_np)
+
+    # Allocate GPU memory
+    d_input = cuda.mem_alloc(input_np.nbytes)
+    d_output = cuda.mem_alloc(output_np.nbytes)
+
+    try:
+        # Set tensor addresses
+        context.set_tensor_address(input_name, int(d_input))
+        context.set_tensor_address(output_name, int(d_output))
+
+        # Run inference with timing
+        cuda.memcpy_htod_async(d_input, input_np, stream)
+        start.record(stream)
+        context.execute_async_v3(stream_handle=stream.handle)
+        end.record(stream)
+        cuda.memcpy_dtoh_async(output_np, d_output, stream)
+        stream.synchronize()
+
+        latency = end.time_since(start)
+        return output_np, latency
+
+    finally:
+        # Cleanup
+        try:
+            d_input.free()
+            d_output.free()
+        except:
+            pass
+
+
+def _compare_outputs(
+    pytorch_output: np.ndarray, backend_output: np.ndarray, backend_name: str, logger: logging.Logger
+) -> bool:
+    """Compare outputs between PyTorch and backend."""
+    if not isinstance(backend_output, np.ndarray):
+        logger.error(f"Unexpected {backend_name} output type: {type(backend_output)}")
+        return False
+
+    max_diff = np.abs(pytorch_output - backend_output).max()
+    mean_diff = np.abs(pytorch_output - backend_output).mean()
+
+    logger.info(f"Output verification:")
+    logger.info(f"  {backend_name} output: {backend_output}")
+    logger.info(f"  Max difference with PyTorch: {max_diff:.6f}")
+    logger.info(f"  Mean difference with PyTorch: {mean_diff:.6f}")
+
+    success = max_diff < DEFAULT_VERIFICATION_TOLERANCE
+    if not success:
+        logger.warning(f"Large difference detected: {max_diff:.6f}")
+
+    return success
+
+
+def run_verification(
+    model: torch.nn.Module,
+    onnx_path: str,
+    trt_path: Optional[str],
+    input_tensors: Dict[str, torch.Tensor],
+    logger: logging.Logger,
+) -> None:
+    """Run model verification for available backends."""
+    _clear_gpu_memory()
+
+    for key, input_tensor in input_tensors.items():
+        logger.info("=" * 50)
+        logger.info(f"Verifying {LABELS[key]} sample...")
+        logger.info("-" * 50)
+        logger.info("Verifying PyTorch model...")
+        pytorch_output, pytorch_latency = run_pytorch_inference(model, input_tensor, logger)
+        logger.info(
+            f"PyTorch output for {LABELS[key]}: [SCORE_MISCALIBRATED, SCORE_CALIBRATED] = {pytorch_output.cpu().numpy()}"
+        )
+        score_calibrated = pytorch_output.cpu().numpy()[0, 1] - pytorch_output.cpu().numpy()[0, 0]
+        if key == "0" and score_calibrated < 0:
+            logger.info(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
+        elif key == "0" and score_calibrated > 0:
+            logger.warning(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
+        elif key == "1" and score_calibrated > 0:
+            logger.info(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
+        elif key == "1" and score_calibrated < 0:
+            logger.warning(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
+
+        if onnx_path and osp.exists(onnx_path):
+            logger.info("-" * 50)
+            logger.info("Verifying ONNX model...")
+            if run_onnx_inference(onnx_path, input_tensor, pytorch_output, logger):
+                logger.info("ONNX model verification passed!")
+            else:
+                logger.error("ONNX model verification failed!")
+
+        if trt_path and osp.exists(trt_path):
+            logger.info("-" * 50)
+            logger.info("Verifying TensorRT model...")
+            if run_tensorrt_inference(trt_path, input_tensor, pytorch_output, logger):
+                logger.info("TensorRT model verification passed!")
+            else:
+                logger.error("TensorRT model verification failed!")
+    logger.info("=" * 50)
+
+
+# ============================================================================
+# Export Pipeline Helper Functions
+# ============================================================================
+
+
+def validate_and_prepare_paths(
+    config: DeploymentConfig, work_dir: str, existing_onnx: Optional[str], logger: logging.Logger
+) -> Tuple[Optional[str], Optional[str]]:
+    """Determine and validate export paths for ONNX and TensorRT models.
+
+    Returns:
+        Tuple of (onnx_path, trt_path)
+    """
+    onnx_path = None
+    trt_path = None
+
+    if config.should_export_onnx:
+        onnx_settings = config.onnx_settings
+        onnx_path = osp.join(work_dir, onnx_settings["save_file"])
+
+    if config.should_export_tensorrt:
+        # Use existing ONNX if provided, otherwise use the one we'll export
+        if existing_onnx and not config.should_export_onnx:
+            onnx_path = existing_onnx
+            if not osp.exists(onnx_path):
+                logger.error(f"Provided ONNX file does not exist: {onnx_path}")
+                return None, None
+            logger.info(f"Using existing ONNX file: {onnx_path}")
+        elif not onnx_path:
+            # Need ONNX for TensorRT but neither export nor existing file specified
+            logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.")
+            return None, None
+
+        # Set TensorRT output path
+        onnx_settings = config.onnx_settings
+        trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
+        trt_path = osp.join(work_dir, trt_file)
+
+    return onnx_path, trt_path
+
+
+def export_models(
+    model: torch.nn.Module,
+    config: DeploymentConfig,
+    onnx_path: Optional[str],
+    trt_path: Optional[str],
+    input_tensor_calibrated: torch.Tensor,
+    device: torch.device,
+    logger: logging.Logger,
+) -> Tuple[bool, torch.device]:
+    """Export models to ONNX and/or TensorRT formats.
+
+    Returns:
+        Tuple of (success, updated_device)
+    """
+    # Export ONNX
+    if config.should_export_onnx and onnx_path:
+        export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger)
+
+    # Export TensorRT
+    if config.should_export_tensorrt and trt_path and onnx_path:
+        logger.info("Converting ONNX to TensorRT...")
+
+        # Ensure CUDA device for TensorRT
+        if device.type != "cuda":
+            logger.warning("TensorRT requires CUDA device, switching to cuda")
+            device = torch.device("cuda")
+
+        success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger)
+        if success:
+            logger.info(f"TensorRT conversion successful: {trt_path}")
+        else:
+            logger.error("TensorRT conversion failed")
+            return False, device
+
+    return True, device
+
+
+def run_model_verification(
+    model: torch.nn.Module,
+    config: DeploymentConfig,
+    onnx_path: Optional[str],
+    trt_path: Optional[str],
+    input_tensor_calibrated: torch.Tensor,
+    input_tensor_miscalibrated: torch.Tensor,
+    existing_onnx: Optional[str],
+    logger: logging.Logger,
+) -> None:
+    """Run model verification for exported formats."""
+    if not config.should_verify:
+        return
+
+    logger.info(
+        "Running verification for miscalibrated and calibrated samples with "
+        "an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..."
+    )
+    input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated}
+
+    # Only verify formats that were exported or provided
+    onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None
+    trt_path_for_verify = trt_path if config.should_export_tensorrt else None
+
+    run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger)
diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index 5efcffba..918bafe7 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -13,940 +13,21 @@
 - Confusion matrix and per-class accuracy analysis
 """
 
-import argparse
-import gc
-import logging
-import os
 import os.path as osp
-import pickle
-import time
-from typing import Any, Dict, Optional, Tuple
 
 import mmengine
-import numpy as np
-import onnx
-import onnxruntime as ort
-import onnxsim
-import pycuda.autoinit
-import pycuda.driver as cuda
-import tensorrt as trt
 import torch
-import torch.nn.functional as F
+from config import DeploymentConfig, parse_args, setup_logging
+from evaluator import get_models_to_evaluate, run_full_evaluation
+from exporters import (
+    export_models,
+    load_sample_data_from_info_pkl,
+    run_model_verification,
+    validate_and_prepare_paths,
+)
 from mmengine.config import Config
 from mmpretrain.apis import get_model
 
-from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import (
-    CalibrationClassificationTransform,
-)
-
-# Constants
-DEFAULT_VERIFICATION_TOLERANCE = 1e-3
-DEFAULT_WORKSPACE_SIZE = 1 << 30  # 1 GB
-EXPECTED_CHANNELS = 5  # RGB + Depth + Intensity
-LABELS = {"0": "miscalibrated", "1": "calibrated"}
-
-# Precision policy mapping
-PRECISION_POLICIES = {
-    "auto": {},  # No special flags, TensorRT decides
-    "fp16": {"FP16": True},
-    "fp32_tf32": {"TF32": True},  # TF32 for FP32 operations
-    "explicit_int8": {"INT8": True},
-    "strongly_typed": {"STRONGLY_TYPED": True},  # Network creation flag
-}
-
-
-def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]:
-    """
-    Load a single sample from info.pkl file.
-
-    Args:
-        info_pkl_path: Path to the info.pkl file
-        sample_idx: Index of the sample to load (default: 0)
-
-    Returns:
-        Sample dictionary with the required structure for CalibrationClassificationTransform
-
-    Raises:
-        FileNotFoundError: If info.pkl file doesn't exist
-        ValueError: If data format is unexpected or sample index is invalid
-    """
-    if not os.path.exists(info_pkl_path):
-        raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}")
-
-    try:
-        with open(info_pkl_path, "rb") as f:
-            info_data = pickle.load(f)
-    except Exception as e:
-        raise ValueError(f"Failed to load info.pkl file: {e}")
-
-    # Extract samples from info.pkl
-    if isinstance(info_data, dict):
-        if "data_list" in info_data:
-            samples_list = info_data["data_list"]
-        else:
-            raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}")
-    else:
-        raise ValueError(f"Expected dict format, got {type(info_data)}")
-
-    if not samples_list:
-        raise ValueError("No samples found in info.pkl")
-
-    if sample_idx >= len(samples_list):
-        raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})")
-
-    sample = samples_list[sample_idx]
-
-    # Validate sample structure
-    required_keys = ["image", "lidar_points"]
-    if not all(key in sample for key in required_keys):
-        raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}")
-
-    return sample
-
-
-def load_sample_data_from_info_pkl(
-    info_pkl_path: str,
-    model_cfg: Config,
-    miscalibration_probability: float,
-    sample_idx: int = 0,
-    device: str = "cpu",
-) -> torch.Tensor:
-    """
-    Load and preprocess sample data from info.pkl using CalibrationClassificationTransform.
-
-    Args:
-        info_pkl_path: Path to the info.pkl file
-        model_cfg: Model configuration containing data_root setting
-        miscalibration_probability: Probability of loading a miscalibrated sample
-        sample_idx: Index of the sample to load (default: 0)
-        device: Device to load tensor on
-
-    Returns:
-        Preprocessed tensor ready for model inference
-    """
-    # Load sample data from info.pkl
-    sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
-
-    # Get data_root from model config
-    data_root = model_cfg.get("data_root", None)
-    if data_root is None:
-        raise ValueError("data_root not found in model configuration")
-
-    # Create transform for deployment
-    transform_config = model_cfg.get("transform_config", None)
-    if transform_config is None:
-        raise ValueError("transform_config not found in model configuration")
-
-    transform = CalibrationClassificationTransform(
-        transform_config=transform_config,
-        mode="test",
-        max_depth=model_cfg.get("max_depth", 128.0),
-        dilation_size=model_cfg.get("dilation_size", 1),
-        undistort=True,
-        miscalibration_probability=miscalibration_probability,
-        enable_augmentation=False,
-        data_root=data_root,
-        projection_vis_dir=model_cfg.get("test_projection_vis_dir", None),
-        results_vis_dir=model_cfg.get("test_results_vis_dir", None),
-        binary_save_dir=model_cfg.get("binary_save_dir", None),
-    )
-
-    # Apply transform
-    results = transform.transform(sample_data)
-    input_data_processed = results["fused_img"]  # (H, W, 5)
-
-    # Convert to tensor
-    input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float()  # (5, H, W)
-    input_tensor = input_tensor.unsqueeze(0).to(device)  # (1, 5, H, W)
-
-    return input_tensor
-
-
-class DeploymentConfig:
-    """Configuration container for deployment settings."""
-
-    def __init__(self, deploy_cfg: Config):
-        self.deploy_cfg = deploy_cfg
-        self._validate_config()
-
-    def _validate_config(self) -> None:
-        """Validate configuration structure and required fields."""
-        if "export" not in self.deploy_cfg:
-            raise ValueError(
-                "Missing 'export' section in deploy config. "
-                "Please update your config to the new format with 'export', 'runtime_io' sections."
-            )
-
-        if "runtime_io" not in self.deploy_cfg:
-            raise ValueError("Missing 'runtime_io' section in deploy config.")
-
-        # Validate export mode
-        valid_modes = ["onnx", "trt", "both", "none"]
-        mode = self.export_config.get("mode", "both")
-        if mode not in valid_modes:
-            raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}")
-
-        # Validate precision policy if present
-        backend_cfg = self.deploy_cfg.get("backend_config", {})
-        common_cfg = backend_cfg.get("common_config", {})
-        precision_policy = common_cfg.get("precision_policy", "auto")
-        if precision_policy not in PRECISION_POLICIES:
-            raise ValueError(
-                f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}"
-            )
-
-    @property
-    def export_config(self) -> Dict:
-        """Get export configuration (mode, verify, device, work_dir)."""
-        return self.deploy_cfg.get("export", {})
-
-    @property
-    def runtime_io_config(self) -> Dict:
-        """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file)."""
-        return self.deploy_cfg.get("runtime_io", {})
-
-    @property
-    def evaluation_config(self) -> Dict:
-        """Get evaluation configuration (enabled, num_samples, verbose, model paths)."""
-        return self.deploy_cfg.get("evaluation", {})
-
-    @property
-    def should_export_onnx(self) -> bool:
-        """Check if ONNX export is requested."""
-        mode = self.export_config.get("mode", "both")
-        return mode in ["onnx", "both"]
-
-    @property
-    def should_export_tensorrt(self) -> bool:
-        """Check if TensorRT export is requested."""
-        mode = self.export_config.get("mode", "both")
-        return mode in ["trt", "both"]
-
-    @property
-    def should_verify(self) -> bool:
-        """Check if verification is requested."""
-        return self.export_config.get("verify", False)
-
-    @property
-    def device(self) -> str:
-        """Get device for export."""
-        return self.export_config.get("device", "cuda:0")
-
-    @property
-    def work_dir(self) -> str:
-        """Get working directory."""
-        return self.export_config.get("work_dir", os.getcwd())
-
-    @property
-    def onnx_settings(self) -> Dict:
-        """Get ONNX export settings."""
-        onnx_config = self.deploy_cfg.get("onnx_config", {})
-        return {
-            "opset_version": onnx_config.get("opset_version", 16),
-            "do_constant_folding": onnx_config.get("do_constant_folding", True),
-            "input_names": onnx_config.get("input_names", ["input"]),
-            "output_names": onnx_config.get("output_names", ["output"]),
-            "dynamic_axes": onnx_config.get("dynamic_axes"),
-            "export_params": onnx_config.get("export_params", True),
-            "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False),
-            "save_file": onnx_config.get("save_file", "calibration_classifier.onnx"),
-        }
-
-    @property
-    def tensorrt_settings(self) -> Dict:
-        """Get TensorRT export settings with precision policy support."""
-        backend_config = self.deploy_cfg.get("backend_config", {})
-        common_config = backend_config.get("common_config", {})
-
-        # Get precision policy
-        precision_policy = common_config.get("precision_policy", "auto")
-        policy_flags = PRECISION_POLICIES.get(precision_policy, {})
-
-        return {
-            "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE),
-            "precision_policy": precision_policy,
-            "policy_flags": policy_flags,
-            "model_inputs": backend_config.get("model_inputs", []),
-        }
-
-
-def parse_args() -> argparse.Namespace:
-    """Parse command line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Export CalibrationStatusClassification model to ONNX/TensorRT.",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument("deploy_cfg", help="Deploy config path")
-    parser.add_argument("model_cfg", help="Model config path")
-    parser.add_argument(
-        "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')"
-    )
-
-    # Optional overrides
-    parser.add_argument("--work-dir", help="Override output directory from config")
-    parser.add_argument("--device", help="Override device from config")
-    parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level")
-    parser.add_argument("--info-pkl", help="Override info.pkl path from config")
-
-    return parser.parse_args()
-
-
-def setup_logging(level: str) -> logging.Logger:
-    """Setup logging configuration."""
-    logging.basicConfig(level=getattr(logging, level), format="%(levelname)s:%(name)s:%(message)s")
-    return logging.getLogger("mmdeploy")
-
-
-def export_to_onnx(
-    model: torch.nn.Module,
-    input_tensor: torch.Tensor,
-    output_path: str,
-    config: DeploymentConfig,
-    logger: logging.Logger,
-) -> None:
-    """Export model to ONNX format."""
-    settings = config.onnx_settings
-    model.eval()
-
-    logger.info("Exporting model to ONNX format...")
-    logger.info(f"Input shape: {input_tensor.shape}")
-    logger.info(f"Output path: {output_path}")
-    logger.info(f"ONNX opset version: {settings['opset_version']}")
-
-    with torch.no_grad():
-        torch.onnx.export(
-            model,
-            input_tensor,
-            output_path,
-            export_params=settings["export_params"],
-            keep_initializers_as_inputs=settings["keep_initializers_as_inputs"],
-            opset_version=settings["opset_version"],
-            do_constant_folding=settings["do_constant_folding"],
-            input_names=settings["input_names"],
-            output_names=settings["output_names"],
-            dynamic_axes=settings["dynamic_axes"],
-            verbose=False,
-        )
-
-    logger.info(f"ONNX export completed: {output_path}")
-
-    # Optional model simplification
-    _optimize_onnx_model(output_path, logger)
-
-
-def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None:
-    """Optimize ONNX model using onnxsim."""
-    logger.info("Simplifying ONNX model...")
-    model_simplified, success = onnxsim.simplify(onnx_path)
-    if success:
-        onnx.save(model_simplified, onnx_path)
-        logger.info(f"ONNX model simplified successfully. Saved to {onnx_path}")
-    else:
-        logger.warning("ONNX model simplification failed")
-
-
-def export_to_tensorrt(
-    onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger
-) -> bool:
-    """Export ONNX model to a TensorRT engine with precision policy support."""
-    settings = config.tensorrt_settings
-    precision_policy = settings["precision_policy"]
-    policy_flags = settings["policy_flags"]
-
-    logger.info(f"Building TensorRT engine with precision policy: {precision_policy}")
-
-    # Initialize TensorRT
-    trt_logger = trt.Logger(trt.Logger.WARNING)
-    trt.init_libnvinfer_plugins(trt_logger, "")
-
-    builder = trt.Builder(trt_logger)
-    builder_config = builder.create_builder_config()
-    builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"])
-
-    # Create network with appropriate flags
-    flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-
-    # Handle strongly typed flag (network creation flag, not builder flag)
-    if policy_flags.get("STRONGLY_TYPED"):
-        flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
-        logger.info("Using strongly typed TensorRT network creation")
-
-    network = builder.create_network(flags)
-
-    # Apply precision flags to builder config
-    for flag_name, enabled in policy_flags.items():
-        if flag_name == "STRONGLY_TYPED":
-            continue  # Already handled as network creation flag above
-        if enabled and hasattr(trt.BuilderFlag, flag_name):
-            builder_config.set_flag(getattr(trt.BuilderFlag, flag_name))
-            logger.info(f"BuilderFlag.{flag_name} enabled")
-
-    # Setup optimization profile
-    profile = builder.create_optimization_profile()
-    _configure_input_shapes(profile, input_tensor, settings["model_inputs"], logger)
-    builder_config.add_optimization_profile(profile)
-
-    # Parse ONNX model
-    parser = trt.OnnxParser(network, trt_logger)
-    with open(onnx_path, "rb") as f:
-        if not parser.parse(f.read()):
-            _log_parser_errors(parser, logger)
-            return False
-        logger.info("Successfully parsed the ONNX file")
-
-    # Build engine
-    logger.info("Building TensorRT engine...")
-    serialized_engine = builder.build_serialized_network(network, builder_config)
-
-    if serialized_engine is None:
-        logger.error("Failed to build TensorRT engine")
-        return False
-
-    # Save engine
-    with open(output_path, "wb") as f:
-        f.write(serialized_engine)
-
-    logger.info(f"TensorRT engine saved to {output_path}")
-    logger.info(f"Engine max workspace size: {settings['max_workspace_size'] / (1024**3):.2f} GB")
-
-    return True
-
-
-def _configure_input_shapes(profile, input_tensor: torch.Tensor, model_inputs: list, logger: logging.Logger) -> None:
-    """Configure input shapes for TensorRT optimization profile."""
-    if model_inputs:
-        input_shapes = model_inputs[0].get("input_shapes", {})
-        for input_name, shapes in input_shapes.items():
-            min_shape = shapes.get("min_shape", list(input_tensor.shape))
-            opt_shape = shapes.get("opt_shape", list(input_tensor.shape))
-            max_shape = shapes.get("max_shape", list(input_tensor.shape))
-
-            logger.info(f"Setting input shapes - min: {min_shape}, opt: {opt_shape}, max: {max_shape}")
-            profile.set_shape(input_name, min_shape, opt_shape, max_shape)
-    else:
-        # Default shapes based on input tensor
-        input_shape = list(input_tensor.shape)
-        logger.info(f"Using default input shape: {input_shape}")
-        profile.set_shape("input", input_shape, input_shape, input_shape)
-
-
-def _log_parser_errors(parser, logger: logging.Logger) -> None:
-    """Log TensorRT parser errors."""
-    logger.error("Failed to parse ONNX model")
-    for error in range(parser.num_errors):
-        logger.error(f"Parser error: {parser.get_error(error)}")
-
-
-def run_pytorch_inference(
-    model: torch.nn.Module, input_tensor: torch.Tensor, logger: logging.Logger
-) -> Tuple[torch.Tensor, float]:
-    """Run PyTorch inference on CPU for verification and return output with latency."""
-    # Move to CPU to avoid GPU memory issues
-    model_cpu = model.cpu()
-    input_cpu = input_tensor.cpu()
-
-    model_cpu.eval()
-    with torch.no_grad():
-        # Measure inference time
-        start_time = time.perf_counter()
-        output = model_cpu(input_cpu)
-        end_time = time.perf_counter()
-
-        latency = (end_time - start_time) * 1000  # Convert to milliseconds
-
-        # Handle different output formats
-        if hasattr(output, "output"):
-            output = output.output
-        elif isinstance(output, dict) and "output" in output:
-            output = output["output"]
-
-        if not isinstance(output, torch.Tensor):
-            raise ValueError(f"Unexpected PyTorch output type: {type(output)}")
-
-    logger.info(f"PyTorch inference latency: {latency:.2f} ms")
-    logger.info(f"Output verification:")
-    logger.info(f"  Output: {output.cpu().numpy()}")
-    return output, latency
-
-
-def run_onnx_inference(
-    onnx_path: str,
-    input_tensor: torch.Tensor,
-    ref_output: torch.Tensor,
-    logger: logging.Logger,
-) -> bool:
-    """Verify ONNX model output against PyTorch model."""
-    # Clear GPU cache
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-
-    # ONNX inference with timing
-    providers = ["CPUExecutionProvider"]
-    ort_session = ort.InferenceSession(onnx_path, providers=providers)
-    onnx_input = {ort_session.get_inputs()[0].name: input_tensor.cpu().numpy()}
-
-    start_time = time.perf_counter()
-    onnx_output = ort_session.run(None, onnx_input)[0]
-    end_time = time.perf_counter()
-    onnx_latency = (end_time - start_time) * 1000
-
-    logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms")
-
-    # Ensure onnx_output is numpy array before comparison
-    if not isinstance(onnx_output, np.ndarray):
-        logger.error(f"Unexpected ONNX output type: {type(onnx_output)}")
-        return False
-
-    # Compare outputs
-    return _compare_outputs(ref_output.cpu().numpy(), onnx_output, "ONNX", logger)
-
-
-def run_tensorrt_inference(
-    tensorrt_path: str,
-    input_tensor: torch.Tensor,
-    ref_output: torch.Tensor,
-    logger: logging.Logger,
-) -> bool:
-    """Verify TensorRT model output against PyTorch model."""
-    # Clear GPU cache
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-
-    # Load TensorRT engine
-    trt_logger = trt.Logger(trt.Logger.WARNING)
-    trt.init_libnvinfer_plugins(trt_logger, "")
-    runtime = trt.Runtime(trt_logger)
-
-    with open(tensorrt_path, "rb") as f:
-        engine = runtime.deserialize_cuda_engine(f.read())
-
-    if engine is None:
-        logger.error("Failed to deserialize TensorRT engine")
-        return False
-
-    # Run TensorRT inference with timing
-    trt_output, latency = _run_tensorrt_inference(engine, input_tensor.cpu(), logger)
-    logger.info(f"TensorRT inference latency: {latency:.2f} ms")
-
-    # Compare outputs
-    return _compare_outputs(ref_output.cpu().numpy(), trt_output, "TensorRT", logger)
-
-
-def _run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]:
-    """Run TensorRT inference and return output with timing."""
-    context = engine.create_execution_context()
-    stream = cuda.Stream()
-    start = cuda.Event()
-    end = cuda.Event()
-
-    # Get tensor names and shapes
-    input_name, output_name = None, None
-    for i in range(engine.num_io_tensors):
-        tensor_name = engine.get_tensor_name(i)
-        if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
-            input_name = tensor_name
-        elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT:
-            output_name = tensor_name
-
-    if input_name is None or output_name is None:
-        raise RuntimeError("Could not find input/output tensor names")
-
-    # Prepare arrays
-    input_np = input_tensor.numpy().astype(np.float32)
-    if not input_np.flags["C_CONTIGUOUS"]:
-        input_np = np.ascontiguousarray(input_np)
-
-    context.set_input_shape(input_name, input_np.shape)
-    output_shape = context.get_tensor_shape(output_name)
-    output_np = np.empty(output_shape, dtype=np.float32)
-    if not output_np.flags["C_CONTIGUOUS"]:
-        output_np = np.ascontiguousarray(output_np)
-
-    # Allocate GPU memory
-    d_input = cuda.mem_alloc(input_np.nbytes)
-    d_output = cuda.mem_alloc(output_np.nbytes)
-
-    try:
-        # Set tensor addresses
-        context.set_tensor_address(input_name, int(d_input))
-        context.set_tensor_address(output_name, int(d_output))
-
-        # Run inference with timing
-        cuda.memcpy_htod_async(d_input, input_np, stream)
-        start.record(stream)
-        context.execute_async_v3(stream_handle=stream.handle)
-        end.record(stream)
-        cuda.memcpy_dtoh_async(output_np, d_output, stream)
-        stream.synchronize()
-
-        latency = end.time_since(start)
-        return output_np, latency
-
-    finally:
-        # Cleanup
-        try:
-            d_input.free()
-            d_output.free()
-        except:
-            pass
-
-
-def _compare_outputs(
-    pytorch_output: np.ndarray, backend_output: np.ndarray, backend_name: str, logger: logging.Logger
-) -> bool:
-    """Compare outputs between PyTorch and backend."""
-    if not isinstance(backend_output, np.ndarray):
-        logger.error(f"Unexpected {backend_name} output type: {type(backend_output)}")
-        return False
-
-    max_diff = np.abs(pytorch_output - backend_output).max()
-    mean_diff = np.abs(pytorch_output - backend_output).mean()
-
-    logger.info(f"Output verification:")
-    logger.info(f"  {backend_name} output: {backend_output}")
-    logger.info(f"  Max difference with PyTorch: {max_diff:.6f}")
-    logger.info(f"  Mean difference with PyTorch: {mean_diff:.6f}")
-
-    success = max_diff < DEFAULT_VERIFICATION_TOLERANCE
-    if not success:
-        logger.warning(f"Large difference detected: {max_diff:.6f}")
-
-    return success
-
-
-def run_verification(
-    model: torch.nn.Module,
-    onnx_path: str,
-    trt_path: Optional[str],
-    input_tensors: Dict[str, torch.Tensor],
-    logger: logging.Logger,
-) -> None:
-    """Run model verification for available backends."""
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-
-    for key, input_tensor in input_tensors.items():
-        logger.info("=" * 50)
-        logger.info(f"Verifying {LABELS[key]} sample...")
-        logger.info("-" * 50)
-        logger.info("Verifying PyTorch model...")
-        pytorch_output, pytorch_latency = run_pytorch_inference(model, input_tensor, logger)
-        logger.info(
-            f"PyTorch output for {LABELS[key]}: [SCORE_MISCALIBRATED, SCORE_CALIBRATED] = {pytorch_output.cpu().numpy()}"
-        )
-        score_calibrated = pytorch_output.cpu().numpy()[0, 1] - pytorch_output.cpu().numpy()[0, 0]
-        if key == "0" and score_calibrated < 0:
-            logger.info(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
-        elif key == "0" and score_calibrated > 0:
-            logger.warning(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
-        elif key == "1" and score_calibrated > 0:
-            logger.info(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
-        elif key == "1" and score_calibrated < 0:
-            logger.warning(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
-
-        if onnx_path and osp.exists(onnx_path):
-            logger.info("-" * 50)
-            logger.info("Verifying ONNX model...")
-            if run_onnx_inference(onnx_path, input_tensor, pytorch_output, logger):
-                logger.info("ONNX model verification passed!")
-            else:
-                logger.error("ONNX model verification failed!")
-
-        if trt_path and osp.exists(trt_path):
-            logger.info("-" * 50)
-            logger.info("Verifying TensorRT model...")
-            if run_tensorrt_inference(trt_path, input_tensor, pytorch_output, logger):
-                logger.info("TensorRT model verification passed!")
-            else:
-                logger.error("TensorRT model verification failed!")
-    logger.info("=" * 50)
-
-
-def evaluate_exported_model(
-    model_path: str,
-    model_type: str,
-    model_cfg: Config,
-    info_pkl_path: str,
-    logger: logging.Logger,
-    device: str = "cpu",
-    num_samples: int = 10,
-    verbose: bool = False,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    """
-    Evaluate exported model (ONNX or TensorRT) using info.pkl data.
-
-    Args:
-        model_path: Path to model file (.onnx or .engine)
-        model_type: Type of model ("onnx" or "tensorrt")
-        model_cfg: Model configuration
-        info_pkl_path: Path to info.pkl file
-        logger: Logger instance
-        device: Device for preprocessing
-        num_samples: Number of samples to evaluate
-        verbose: Enable verbose logging
-
-    Returns:
-        Tuple of (predictions, ground_truth, probabilities, latencies)
-    """
-
-    # Load info.pkl data
-    try:
-        with open(info_pkl_path, "rb") as f:
-            info_data = pickle.load(f)
-
-        if "data_list" not in info_data:
-            raise ValueError("Expected 'data_list' key in info.pkl")
-
-        samples_list = info_data["data_list"]
-        logger.info(f"Loaded {len(samples_list)} samples from info.pkl")
-
-        # Limit number of samples for evaluation
-        num_samples = min(num_samples, len(samples_list))
-        logger.info(f"Evaluating {num_samples} samples")
-
-    except Exception as e:
-        logger.error(f"Failed to load info.pkl: {e}")
-        raise
-
-    # Initialize transform once and reuse it for all samples
-    data_root = model_cfg.get("data_root", None)
-    if data_root is None:
-        raise ValueError("data_root not found in model configuration")
-
-    transform_config = model_cfg.get("transform_config", None)
-    if transform_config is None:
-        raise ValueError("transform_config not found in model configuration")
-
-    transform = CalibrationClassificationTransform(
-        transform_config=transform_config,
-        mode="test",
-        max_depth=model_cfg.get("max_depth", 128.0),
-        dilation_size=model_cfg.get("dilation_size", 1),
-        undistort=True,
-        miscalibration_probability=0.0,  # Will be updated per sample
-        enable_augmentation=False,
-        data_root=data_root,
-        projection_vis_dir=None,
-        results_vis_dir=None,
-        binary_save_dir=None,
-    )
-
-    # Initialize inference engine
-    if model_type == "onnx":
-        logger.info(f"Using ONNX model: {model_path}")
-        logger.info("Creating ONNX InferenceSession...")
-
-        # Configure execution providers
-        if device.startswith("cuda"):
-            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-            logger.info("Attempting to use CUDA acceleration for ONNX inference...")
-            try:
-                available_providers = ort.get_available_providers()
-                if "CUDAExecutionProvider" not in available_providers:
-                    logger.warning(
-                        f"CUDAExecutionProvider not available. Available: {available_providers}. "
-                        "Install onnxruntime-gpu for CUDA acceleration"
-                    )
-                    providers = ["CPUExecutionProvider"]
-                else:
-                    logger.info("CUDAExecutionProvider is available")
-            except Exception as e:
-                logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU")
-                providers = ["CPUExecutionProvider"]
-        else:
-            providers = ["CPUExecutionProvider"]
-            logger.info("Using CPU for ONNX inference")
-
-        # Create session once and reuse it
-        ort_session = ort.InferenceSession(model_path, providers=providers)
-        logger.info(f"ONNX session using providers: {ort_session.get_providers()}")
-
-        # Define inference function
-        def inference_func(input_tensor):
-            input_name = ort_session.get_inputs()[0].name
-            onnx_input = {input_name: input_tensor.cpu().numpy()}
-            start_time = time.perf_counter()
-            onnx_output = ort_session.run(None, onnx_input)[0]
-            latency = (time.perf_counter() - start_time) * 1000
-            return onnx_output, latency
-
-    elif model_type == "tensorrt":
-        logger.info(f"Using TensorRT model: {model_path}")
-
-        # Load TensorRT engine
-        trt_logger = trt.Logger(trt.Logger.WARNING)
-        trt.init_libnvinfer_plugins(trt_logger, "")
-        runtime = trt.Runtime(trt_logger)
-
-        with open(model_path, "rb") as f:
-            engine = runtime.deserialize_cuda_engine(f.read())
-
-        if engine is None:
-            raise RuntimeError("Failed to deserialize TensorRT engine")
-
-        # Define inference function
-        def inference_func(input_tensor):
-            return _run_tensorrt_inference(engine, input_tensor.cpu(), logger)
-
-    else:
-        raise ValueError(f"Unsupported model type: {model_type}")
-
-    # Lists to store results
-    all_predictions = []
-    all_ground_truth = []
-    all_probabilities = []
-    all_latencies = []
-
-    # Evaluate samples
-    for sample_idx in range(num_samples):
-        if sample_idx % 100 == 0:
-            logger.info(f"Processing sample {sample_idx + 1}/{num_samples}")
-
-        try:
-            # Test both calibrated and miscalibrated versions
-            for miscalibration_prob, expected_label in [(0.0, 1), (1.0, 0)]:
-                # Update transform's miscalibration probability
-                transform.miscalibration_probability = miscalibration_prob
-
-                # Load and preprocess sample
-                sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
-                sample_data["sample_idx"] = sample_idx
-
-                results = transform.transform(sample_data)
-                input_data_processed = results["fused_img"]
-                gt_label = results["gt_label"]
-
-                # Convert to tensor
-                input_tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float()
-                input_tensor = input_tensor.unsqueeze(0).to(device)
-
-                # Run inference
-                output_np, latency = inference_func(input_tensor)
-
-                if output_np is None:
-                    logger.error(f"Failed to get output for sample {sample_idx}")
-                    continue
-
-                # Convert logits to probabilities
-                logits = torch.from_numpy(output_np)
-                probabilities = F.softmax(logits, dim=-1)
-                predicted_class = torch.argmax(probabilities, dim=-1).item()
-                confidence = probabilities.max().item()
-
-                # Store results
-                all_predictions.append(predicted_class)
-                all_ground_truth.append(gt_label)
-                all_probabilities.append(probabilities.cpu().numpy())
-                all_latencies.append(latency)
-
-                # Print sample results only in verbose mode
-                if verbose:
-                    logger.info(
-                        f"Sample {sample_idx + 1} (GT={gt_label}): "
-                        f"Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms"
-                    )
-
-            # Clear GPU memory periodically
-            if model_type == "tensorrt" and sample_idx % 10 == 0:
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-                gc.collect()
-
-        except Exception as e:
-            logger.error(f"Error processing sample {sample_idx}: {e}")
-            continue
-
-    return (
-        np.array(all_predictions),
-        np.array(all_ground_truth),
-        np.array(all_probabilities),
-        np.array(all_latencies),
-    )
-
-
-def print_evaluation_results(
-    all_predictions: np.ndarray,
-    all_ground_truth: np.ndarray,
-    all_probabilities: np.ndarray,
-    all_latencies: np.ndarray,
-    model_type: str,
-    logger: logging.Logger,
-) -> None:
-    """Print comprehensive evaluation results with metrics and statistics."""
-
-    if len(all_predictions) == 0:
-        logger.error("No samples were processed successfully.")
-        return
-
-    correct_predictions = (all_predictions == all_ground_truth).sum()
-    total_samples = len(all_predictions)
-    accuracy = correct_predictions / total_samples
-    avg_latency = np.mean(all_latencies)
-
-    # Print header
-    logger.info(f"\n{'='*60}")
-    logger.info(f"{model_type.upper()} Model Evaluation Results")
-    logger.info(f"{'='*60}")
-    logger.info(f"Total samples: {total_samples}")
-    logger.info(f"Correct predictions: {correct_predictions}")
-    logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
-    logger.info(f"Average latency: {avg_latency:.2f} ms")
-
-    # Calculate per-class accuracy
-    unique_classes = np.unique(all_ground_truth)
-    logger.info(f"\nPer-class accuracy:")
-    for cls in unique_classes:
-        cls_mask = all_ground_truth == cls
-        cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum()
-        cls_total = cls_mask.sum()
-        cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0
-        logger.info(
-            f"  Class {cls} ({LABELS[str(cls)]}): "
-            f"{cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)"
-        )
-
-    # Calculate average confidence
-    avg_confidence = np.mean([prob.max() for prob in all_probabilities])
-    logger.info(f"\nAverage confidence: {avg_confidence:.4f}")
-
-    # Calculate latency statistics
-    min_latency = np.min(all_latencies)
-    max_latency = np.max(all_latencies)
-    std_latency = np.std(all_latencies)
-    p50_latency = np.percentile(all_latencies, 50)
-    p95_latency = np.percentile(all_latencies, 95)
-    p99_latency = np.percentile(all_latencies, 99)
-
-    logger.info(f"\nLatency Statistics:")
-    logger.info(f"  Average: {avg_latency:.2f} ms")
-    logger.info(f"  Median (P50): {p50_latency:.2f} ms")
-    logger.info(f"  P95: {p95_latency:.2f} ms")
-    logger.info(f"  P99: {p99_latency:.2f} ms")
-    logger.info(f"  Min: {min_latency:.2f} ms")
-    logger.info(f"  Max: {max_latency:.2f} ms")
-    logger.info(f"  Std: {std_latency:.2f} ms")
-
-    # Show confusion matrix
-    logger.info(f"\nConfusion Matrix:")
-    logger.info(f"{'':>10} Predicted")
-    logger.info(f"{'Actual':>10} {'0 (misc)':>10} {'1 (calib)':>10}")
-    logger.info(f"{'-'*32}")
-    for true_cls in unique_classes:
-        row_label = f"{true_cls} ({LABELS[str(true_cls)][:4]})"
-        row = [f"{row_label:>10}"]
-        for pred_cls in unique_classes:
-            count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum()
-            row.append(f"{count:>10}")
-        logger.info(" ".join(row))
-
-    logger.info(f"\n{'='*60}")
-    logger.info(f"Evaluation Summary:")
-    logger.info(f"  Model Type: {model_type.upper()}")
-    logger.info(f"  Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
-    logger.info(f"  Avg Latency: {avg_latency:.2f} ms")
-    logger.info(f"  Throughput: {1000/avg_latency:.2f} samples/sec")
-    logger.info(f"{'='*60}\n")
-
 
 def main():
     """Main deployment function."""
@@ -961,11 +42,13 @@ def main():
     logger.info(f"Loading model config from: {args.model_cfg}")
     model_cfg = Config.fromfile(args.model_cfg)
 
+    # Extract runtime configuration
     work_dir = config.work_dir
     device = config.device
     info_pkl = config.runtime_io_config.get("info_pkl")
     sample_idx = config.runtime_io_config.get("sample_idx", 0)
     existing_onnx = config.runtime_io_config.get("onnx_file")
+    export_mode = config.export_config.get("mode", "both")
 
     # Validate required parameters
     if not info_pkl:
@@ -976,43 +59,33 @@ def main():
     mmengine.mkdir_or_exist(osp.abspath(work_dir))
     logger.info(f"Working directory: {work_dir}")
     logger.info(f"Device: {device}")
-    export_mode = config.export_config.get("mode", "both")
     logger.info(f"Export mode: {export_mode}")
 
     # Check if eval-only mode
     is_eval_only = export_mode == "none"
 
+    # Validate eval-only mode configuration
+    if is_eval_only:
+        eval_enabled = config.evaluation_config.get("enabled", False)
+        if not eval_enabled:
+            logger.error(
+                "Configuration error: export mode is 'none' (evaluation-only mode) "
+                "but evaluation.enabled is False. "
+                "Please set evaluation.enabled = True in your config."
+            )
+            return
+
     # Validate checkpoint requirement
     if not is_eval_only and not args.checkpoint:
         logger.error("Checkpoint is required when export mode is not 'none'")
         return
 
-    # Determine export paths
-    onnx_path = None
-    trt_path = None
-
+    # Export phase
     if not is_eval_only:
-        if config.should_export_onnx:
-            onnx_settings = config.onnx_settings
-            onnx_path = osp.join(work_dir, onnx_settings["save_file"])
-
-        if config.should_export_tensorrt:
-            # Use existing ONNX if provided, otherwise use the one we'll export
-            if existing_onnx and not config.should_export_onnx:
-                onnx_path = existing_onnx
-                if not osp.exists(onnx_path):
-                    logger.error(f"Provided ONNX file does not exist: {onnx_path}")
-                    return
-                logger.info(f"Using existing ONNX file: {onnx_path}")
-            elif not onnx_path:
-                # Need ONNX for TensorRT but neither export nor existing file specified
-                logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.")
-                return
-
-            # Set TensorRT output path
-            onnx_settings = config.onnx_settings
-            trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
-            trt_path = osp.join(work_dir, trt_file)
+        # Determine export paths
+        onnx_path, trt_path = validate_and_prepare_paths(config, work_dir, existing_onnx, logger)
+        if onnx_path is None and trt_path is None:
+            return
 
         # Load model
         logger.info(f"Loading model from checkpoint: {args.checkpoint}")
@@ -1027,135 +100,62 @@ def main():
         )
 
         # Export models
-        if config.should_export_onnx:
-            export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger)
+        success, device = export_models(model, config, onnx_path, trt_path, input_tensor_calibrated, device, logger)
+        if not success:
+            logger.error("Export failed")
+            return
+
+        # Update tensors if device changed
+        if device.type == "cuda":
+            input_tensor_calibrated = input_tensor_calibrated.to(device)
+            input_tensor_miscalibrated = input_tensor_miscalibrated.to(device)
+
+        # Run verification
+        run_model_verification(
+            model,
+            config,
+            onnx_path,
+            trt_path,
+            input_tensor_calibrated,
+            input_tensor_miscalibrated,
+            existing_onnx,
+            logger,
+        )
 
+        # Log exported formats
+        exported_formats = []
+        if config.should_export_onnx:
+            exported_formats.append("ONNX")
         if config.should_export_tensorrt:
-            logger.info("Converting ONNX to TensorRT...")
-
-            # Ensure CUDA device for TensorRT
-            if device.type != "cuda":
-                logger.warning("TensorRT requires CUDA device, switching to cuda")
-                device = torch.device("cuda")
-                input_tensor_calibrated = input_tensor_calibrated.to(device)
-                input_tensor_miscalibrated = input_tensor_miscalibrated.to(device)
-
-            success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger)
-            if success:
-                logger.info(f"TensorRT conversion successful: {trt_path}")
-            else:
-                logger.error("TensorRT conversion failed")
-
-        # Run verification if requested
-        if config.should_verify:
-            logger.info(
-                "Running verification for miscalibrated and calibrated samples with an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..."
-            )
-            input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated}
-
-            # Only verify formats that were exported or provided
-            onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None
-            trt_path_for_verify = trt_path if config.should_export_tensorrt else None
+            exported_formats.append("TensorRT")
+        if exported_formats:
+            logger.info(f"Exported formats: {', '.join(exported_formats)}")
 
-            run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger)
+        logger.info("Deployment completed successfully!")
     else:
         logger.info("Evaluation-only mode: Skipping model loading and export")
 
-    # Get evaluation settings from config and CLI
+    # Evaluation phase
     eval_cfg = config.evaluation_config
     should_evaluate = eval_cfg.get("enabled", False)
     num_samples = eval_cfg.get("num_samples", 10)
     verbose_mode = eval_cfg.get("verbose", False)
 
-    # Run full evaluation if requested
     if should_evaluate:
         logger.info(f"\n{'='*60}")
         logger.info("Starting full model evaluation...")
         logger.info(f"{'='*60}")
 
-        # Determine which models to evaluate
-        models_to_evaluate = []
-
-        # Get model paths from config
-        eval_onnx_path = eval_cfg.get("onnx_model")
-        eval_trt_path = eval_cfg.get("tensorrt_model")
-
-        # If both paths are None, skip evaluation (no auto-detection)
-        if eval_onnx_path is None and eval_trt_path is None:
-            logger.warning("Both onnx_model and tensorrt_model are None. Skipping evaluation.")
-            logger.warning("To enable evaluation, specify at least one model path in the config.")
-        else:
-            # Only evaluate models that are explicitly specified (not None)
-            if eval_onnx_path is not None:
-                # Evaluate ONNX model
-                if osp.exists(eval_onnx_path):
-                    models_to_evaluate.append(("onnx", eval_onnx_path))
-                    logger.info(f"Using config-specified ONNX model: {eval_onnx_path}")
-                else:
-                    logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}")
-
-            if eval_trt_path is not None:
-                # Evaluate TensorRT model
-                if osp.exists(eval_trt_path):
-                    models_to_evaluate.append(("tensorrt", eval_trt_path))
-                    logger.info(f"Using config-specified TensorRT model: {eval_trt_path}")
-                else:
-                    logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}")
-
-        if not models_to_evaluate:
-            logger.error(
-                "No models available for evaluation. Please export models first or specify model paths in config."
-            )
-        else:
-            # Evaluate each model
-            for model_type, model_path in models_to_evaluate:
-                logger.info(f"\nEvaluating {model_type.upper()} model...")
-                logger.info(f"Model path: {model_path}")
-                logger.info(f"Number of samples: {num_samples}")
-                logger.info(f"Verbose mode: {verbose_mode}")
-
-                try:
-                    # Run evaluation
-                    predictions, ground_truth, probabilities, latencies = evaluate_exported_model(
-                        model_path=model_path,
-                        model_type=model_type,
-                        model_cfg=model_cfg,
-                        info_pkl_path=info_pkl,
-                        logger=logger,
-                        device=device.type if isinstance(device, torch.device) else device,
-                        num_samples=num_samples,
-                        verbose=verbose_mode,
-                    )
-
-                    # Print results
-                    print_evaluation_results(predictions, ground_truth, probabilities, latencies, model_type, logger)
-
-                    # Cleanup
-                    if torch.cuda.is_available():
-                        torch.cuda.empty_cache()
-                    gc.collect()
-
-                except Exception as e:
-                    logger.error(f"Evaluation failed for {model_type.upper()} model: {e}")
-                    import traceback
-
-                    logger.error(traceback.format_exc())
-                    continue
-
-    logger.info("Deployment completed successfully!")
-
-    # Log what was exported
-    if not is_eval_only:
-        exported_formats = []
-        if config.should_export_onnx:
-            exported_formats.append("ONNX")
-        if config.should_export_tensorrt:
-            exported_formats.append("TensorRT")
-
-        if exported_formats:
-            logger.info(f"Exported formats: {', '.join(exported_formats)}")
-    else:
-        logger.info("Evaluation-only mode: No models were exported")
+        models_to_evaluate = get_models_to_evaluate(eval_cfg, logger)
+        run_full_evaluation(
+            models_to_evaluate,
+            model_cfg,
+            info_pkl,
+            device,
+            num_samples,
+            verbose_mode,
+            logger,
+        )
 
 
 if __name__ == "__main__":
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index 713238e2..f170e90d 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -212,9 +212,7 @@ tensorboard --logdir work_dirs --bind_all
 ```
 
 ## 5. Evaluation
-### 5.1. PyTorch Model Evaluation
-
-- Evaluation
+PyTorch Model Evaluation
 
 ```sh
 python tools/calibration_classification/test.py {config_file} {checkpoint_file}
@@ -224,6 +222,7 @@ python tools/calibration_classification/test.py {config_file} {checkpoint_file}
 python tools/calibration_classification/test.py projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py  epoch_25.pth --out {output_file}
 ```
 
+For ONNX and TensorRT Evaluation check [Section 6.4](#64-evaluate-onnx-and-tensorrt-models).
 
 
 ## 6. Deployment
@@ -425,8 +424,7 @@ backend_config = dict(
 ```sh
 python projects/CalibrationStatusClassification/deploy/main.py \
     projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py \
-    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py \
-    epoch_25.pth
+    projects/CalibrationStatusClassification/configs/t4dataset/resnet18_5ch_1xb16-50e_j6gen2.py
 ```
 
 **Output**: `work_dirs/end2end.engine` (INT8 TensorRT engine)
diff --git a/tools/calibration_classification/visualize_lidar_camera_projection.py b/tools/calibration_classification/visualize_lidar_camera_projection.py
index 2a2bf85d..2beb3a7f 100644
--- a/tools/calibration_classification/visualize_lidar_camera_projection.py
+++ b/tools/calibration_classification/visualize_lidar_camera_projection.py
@@ -15,9 +15,9 @@
 )
 
 
-class CalibrationToolkit:
+class CalibrationClassifierToolkit:
     """
-    A comprehensive tool for processing LiDAR-camera calibration data.
+    A tool for processing LiDAR-camera calibration data.
     This class provides functionality to load calibration data from info.pkl files
     and optionally generate visualizations or save results as NPZ files.
     Attributes:
@@ -30,7 +30,7 @@ class CalibrationToolkit:
 
     def __init__(self, model_cfg: Config, data_root: Optional[str] = None, output_dir: Optional[str] = None):
         """
-        Initialize the CalibrationToolkit.
+        Initialize the CalibrationClassifierToolkit.
         Args:
             model_cfg: Model configuration
             data_root: Root directory for data files. If None, absolute paths are used.
@@ -40,7 +40,7 @@ def __init__(self, model_cfg: Config, data_root: Optional[str] = None, output_di
         self.data_root = data_root
         self.output_dir = output_dir
         self.transform = None
-        self.logger = MMLogger.get_instance(name="calibration_toolkit")
+        self.logger = MMLogger.get_instance(name="calibration_classifier_toolkit")
         self.collected_results = []
         self._initialize_transform()
 
@@ -50,7 +50,6 @@ def _initialize_transform(self) -> None:
         if transform_config is None:
             raise ValueError("transform_config not found in model configuration")
 
-        # Only set projection_vis_dir if output_dir is provided (for visualization)
         projection_vis_dir = self.output_dir if self.output_dir else None
 
         self.transform = CalibrationClassificationTransform(
@@ -339,7 +338,7 @@ def main() -> None:
     model_cfg = Config.fromfile(args.model_cfg)
 
     # Initialize toolkit
-    toolkit = CalibrationToolkit(model_cfg=model_cfg, data_root=args.data_root, output_dir=args.output_dir)
+    toolkit = CalibrationClassifierToolkit(model_cfg=model_cfg, data_root=args.data_root, output_dir=args.output_dir)
 
     # Process indices argument
     processed_indices = None

From a29452e533da35a0e9bead43cedc7ce43dc0b6a7 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 8 Oct 2025 14:28:18 +0900
Subject: [PATCH 17/19] feat: new design for deployment

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 autoware_ml/deployment/__init__.py            |  20 +
 autoware_ml/deployment/backends/__init__.py   |  13 +
 .../deployment/backends/base_backend.py       |  85 ++
 .../deployment/backends/onnx_backend.py       |  80 ++
 .../deployment/backends/pytorch_backend.py    |  79 ++
 .../deployment/backends/tensorrt_backend.py   | 135 ++++
 autoware_ml/deployment/cli/__init__.py        |   3 +
 autoware_ml/deployment/core/__init__.py       |  25 +
 autoware_ml/deployment/core/base_config.py    | 213 +++++
 .../deployment/core/base_data_loader.py       | 118 +++
 autoware_ml/deployment/core/base_evaluator.py | 138 ++++
 autoware_ml/deployment/core/verification.py   | 178 +++++
 autoware_ml/deployment/exporters/__init__.py  |  11 +
 .../deployment/exporters/base_exporter.py     |  67 ++
 .../deployment/exporters/onnx_exporter.py     | 121 +++
 .../deployment/exporters/tensorrt_exporter.py | 157 ++++
 .../configs/deploy/resnet18_5ch.py            |  13 +-
 .../deploy/__init__.py                        |  26 +-
 .../deploy/config.py                          | 161 ----
 .../deploy/data_loader.py                     | 183 +++++
 .../deploy/evaluator.py                       | 741 +++++++++---------
 .../deploy/exporters.py                       | 680 ----------------
 .../deploy/main.py                            | 223 ++++--
 tools/calibration_classification/README.md    |   6 +-
 24 files changed, 2163 insertions(+), 1313 deletions(-)
 create mode 100644 autoware_ml/deployment/__init__.py
 create mode 100644 autoware_ml/deployment/backends/__init__.py
 create mode 100644 autoware_ml/deployment/backends/base_backend.py
 create mode 100644 autoware_ml/deployment/backends/onnx_backend.py
 create mode 100644 autoware_ml/deployment/backends/pytorch_backend.py
 create mode 100644 autoware_ml/deployment/backends/tensorrt_backend.py
 create mode 100644 autoware_ml/deployment/cli/__init__.py
 create mode 100644 autoware_ml/deployment/core/__init__.py
 create mode 100644 autoware_ml/deployment/core/base_config.py
 create mode 100644 autoware_ml/deployment/core/base_data_loader.py
 create mode 100644 autoware_ml/deployment/core/base_evaluator.py
 create mode 100644 autoware_ml/deployment/core/verification.py
 create mode 100644 autoware_ml/deployment/exporters/__init__.py
 create mode 100644 autoware_ml/deployment/exporters/base_exporter.py
 create mode 100644 autoware_ml/deployment/exporters/onnx_exporter.py
 create mode 100644 autoware_ml/deployment/exporters/tensorrt_exporter.py
 delete mode 100644 projects/CalibrationStatusClassification/deploy/config.py
 create mode 100644 projects/CalibrationStatusClassification/deploy/data_loader.py
 delete mode 100644 projects/CalibrationStatusClassification/deploy/exporters.py

diff --git a/autoware_ml/deployment/__init__.py b/autoware_ml/deployment/__init__.py
new file mode 100644
index 00000000..01d77ae9
--- /dev/null
+++ b/autoware_ml/deployment/__init__.py
@@ -0,0 +1,20 @@
+"""
+Autoware ML Unified Deployment Framework
+
+This package provides a unified, task-agnostic deployment framework for
+exporting, verifying, and evaluating machine learning models across different
+tasks (classification, detection, segmentation, etc.) and backends (ONNX,
+TensorRT, TorchScript, etc.).
+"""
+
+from .core.base_config import BaseDeploymentConfig
+from .core.base_data_loader import BaseDataLoader
+from .core.base_evaluator import BaseEvaluator
+
+__all__ = [
+    "BaseDeploymentConfig",
+    "BaseDataLoader",
+    "BaseEvaluator",
+]
+
+__version__ = "1.0.0"
diff --git a/autoware_ml/deployment/backends/__init__.py b/autoware_ml/deployment/backends/__init__.py
new file mode 100644
index 00000000..59e790d7
--- /dev/null
+++ b/autoware_ml/deployment/backends/__init__.py
@@ -0,0 +1,13 @@
+"""Inference backends for different model formats."""
+
+from .base_backend import BaseBackend
+from .onnx_backend import ONNXBackend
+from .pytorch_backend import PyTorchBackend
+from .tensorrt_backend import TensorRTBackend
+
+__all__ = [
+    "BaseBackend",
+    "PyTorchBackend",
+    "ONNXBackend",
+    "TensorRTBackend",
+]
diff --git a/autoware_ml/deployment/backends/base_backend.py b/autoware_ml/deployment/backends/base_backend.py
new file mode 100644
index 00000000..b57669c3
--- /dev/null
+++ b/autoware_ml/deployment/backends/base_backend.py
@@ -0,0 +1,85 @@
+"""
+Abstract base class for inference backends.
+
+Provides a unified interface for running inference across different
+backend formats (PyTorch, ONNX, TensorRT, etc.).
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Tuple
+
+import numpy as np
+import torch
+
+
+class BaseBackend(ABC):
+    """
+    Abstract base class for inference backends.
+
+    This class defines a unified interface for running inference
+    across different model formats and runtime engines.
+    """
+
+    def __init__(self, model_path: str, device: str = "cpu"):
+        """
+        Initialize backend.
+
+        Args:
+            model_path: Path to model file
+            device: Device to run inference on ('cpu', 'cuda', 'cuda:0', etc.)
+        """
+        self.model_path = model_path
+        self.device = device
+        self._model = None
+
+    @abstractmethod
+    def load_model(self) -> None:
+        """
+        Load model from file.
+
+        Raises:
+            FileNotFoundError: If model file doesn't exist
+            RuntimeError: If model loading fails
+        """
+        pass
+
+    @abstractmethod
+    def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]:
+        """
+        Run inference on input tensor.
+
+        Args:
+            input_tensor: Input tensor for inference
+
+        Returns:
+            Tuple of (output_array, latency_ms):
+            - output_array: Model output as numpy array
+            - latency_ms: Inference time in milliseconds
+
+        Raises:
+            RuntimeError: If inference fails
+            ValueError: If input format is invalid
+        """
+        pass
+
+    def __enter__(self):
+        """Context manager entry."""
+        self.load_model()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit."""
+        self.cleanup()
+
+    def cleanup(self) -> None:
+        """
+        Clean up resources.
+
+        Override this method in subclasses to release backend-specific resources.
+        """
+        pass
+
+    @property
+    def is_loaded(self) -> bool:
+        """Check if model is loaded."""
+        return self._model is not None
diff --git a/autoware_ml/deployment/backends/onnx_backend.py b/autoware_ml/deployment/backends/onnx_backend.py
new file mode 100644
index 00000000..3fa51d82
--- /dev/null
+++ b/autoware_ml/deployment/backends/onnx_backend.py
@@ -0,0 +1,80 @@
+"""ONNX Runtime inference backend."""
+
+import os
+import time
+from typing import Tuple
+
+import numpy as np
+import onnxruntime as ort
+import torch
+
+from .base_backend import BaseBackend
+
+
+class ONNXBackend(BaseBackend):
+    """
+    ONNX Runtime inference backend.
+
+    Runs inference using ONNX Runtime on CPU or CUDA.
+    """
+
+    def __init__(self, model_path: str, device: str = "cpu"):
+        """
+        Initialize ONNX backend.
+
+        Args:
+            model_path: Path to ONNX model file
+            device: Device to run inference on ('cpu' or 'cuda')
+        """
+        super().__init__(model_path, device)
+        self._session = None
+
+    def load_model(self) -> None:
+        """Load ONNX model."""
+        if not os.path.exists(self.model_path):
+            raise FileNotFoundError(f"ONNX model not found: {self.model_path}")
+
+        # Select execution provider based on device
+        if self.device.startswith("cuda"):
+            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        else:
+            providers = ["CPUExecutionProvider"]
+
+        try:
+            self._session = ort.InferenceSession(self.model_path, providers=providers)
+            self._model = self._session  # For is_loaded check
+        except Exception as e:
+            raise RuntimeError(f"Failed to load ONNX model: {e}")
+
+    def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]:
+        """
+        Run inference on input tensor.
+
+        Args:
+            input_tensor: Input tensor for inference
+
+        Returns:
+            Tuple of (output_array, latency_ms)
+        """
+        if not self.is_loaded:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+
+        input_array = input_tensor.cpu().numpy()
+
+        # Prepare input dictionary
+        input_name = self._session.get_inputs()[0].name
+        onnx_input = {input_name: input_array}
+
+        # Run inference
+        start_time = time.perf_counter()
+        output = self._session.run(None, onnx_input)[0]
+        end_time = time.perf_counter()
+
+        latency_ms = (end_time - start_time) * 1000
+
+        return output, latency_ms
+
+    def cleanup(self) -> None:
+        """Clean up ONNX Runtime resources."""
+        self._session = None
+        self._model = None
diff --git a/autoware_ml/deployment/backends/pytorch_backend.py b/autoware_ml/deployment/backends/pytorch_backend.py
new file mode 100644
index 00000000..83d9f703
--- /dev/null
+++ b/autoware_ml/deployment/backends/pytorch_backend.py
@@ -0,0 +1,79 @@
+"""PyTorch inference backend."""
+
+import time
+from typing import Tuple
+
+import numpy as np
+import torch
+
+from .base_backend import BaseBackend
+
+
+class PyTorchBackend(BaseBackend):
+    """
+    PyTorch inference backend.
+
+    Runs inference using native PyTorch models.
+    """
+
+    def __init__(self, model: torch.nn.Module, device: str = "cpu"):
+        """
+        Initialize PyTorch backend.
+
+        Args:
+            model: PyTorch model instance (already loaded)
+            device: Device to run inference on
+        """
+        super().__init__(model_path="", device=device)
+        self._model = model
+        self._torch_device = torch.device(device)
+        self._model.to(self._torch_device)
+        self._model.eval()
+
+    def load_model(self) -> None:
+        """Model is already loaded in __init__."""
+        if self._model is None:
+            raise RuntimeError("Model was not provided during initialization")
+
+    def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]:
+        """
+        Run inference on input tensor.
+
+        Args:
+            input_tensor: Input tensor for inference
+
+        Returns:
+            Tuple of (output_array, latency_ms)
+        """
+        if not self.is_loaded:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+
+        # Move input to correct device
+        input_tensor = input_tensor.to(self._torch_device)
+
+        # Run inference with timing
+        with torch.no_grad():
+            start_time = time.perf_counter()
+            output = self._model(input_tensor)
+            end_time = time.perf_counter()
+
+        latency_ms = (end_time - start_time) * 1000
+
+        # Handle different output formats
+        if hasattr(output, "output"):
+            output = output.output
+        elif isinstance(output, dict) and "output" in output:
+            output = output["output"]
+
+        if not isinstance(output, torch.Tensor):
+            raise ValueError(f"Unexpected PyTorch output type: {type(output)}")
+
+        # Convert to numpy
+        output_array = output.cpu().numpy()
+
+        return output_array, latency_ms
+
+    def cleanup(self) -> None:
+        """Clean up PyTorch resources."""
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
diff --git a/autoware_ml/deployment/backends/tensorrt_backend.py b/autoware_ml/deployment/backends/tensorrt_backend.py
new file mode 100644
index 00000000..366ac2a1
--- /dev/null
+++ b/autoware_ml/deployment/backends/tensorrt_backend.py
@@ -0,0 +1,135 @@
+"""TensorRT inference backend."""
+
+import os
+import time
+from typing import Tuple
+
+import numpy as np
+import pycuda.autoinit  # noqa: F401
+import pycuda.driver as cuda
+import tensorrt as trt
+import torch
+
+from .base_backend import BaseBackend
+
+
+class TensorRTBackend(BaseBackend):
+    """
+    TensorRT inference backend.
+
+    Runs inference using NVIDIA TensorRT engine.
+    """
+
+    def __init__(self, model_path: str, device: str = "cuda:0"):
+        """
+        Initialize TensorRT backend.
+
+        Args:
+            model_path: Path to TensorRT engine file
+            device: CUDA device to use (ignored, TensorRT uses current CUDA context)
+        """
+        super().__init__(model_path, device)
+        self._engine = None
+        self._context = None
+        self._logger = trt.Logger(trt.Logger.WARNING)
+
+    def load_model(self) -> None:
+        """Load TensorRT engine."""
+        if not os.path.exists(self.model_path):
+            raise FileNotFoundError(f"TensorRT engine not found: {self.model_path}")
+
+        # Initialize TensorRT
+        trt.init_libnvinfer_plugins(self._logger, "")
+        runtime = trt.Runtime(self._logger)
+
+        # Load engine
+        try:
+            with open(self.model_path, "rb") as f:
+                self._engine = runtime.deserialize_cuda_engine(f.read())
+
+            if self._engine is None:
+                raise RuntimeError("Failed to deserialize TensorRT engine")
+
+            self._context = self._engine.create_execution_context()
+            self._model = self._engine  # For is_loaded check
+        except Exception as e:
+            raise RuntimeError(f"Failed to load TensorRT engine: {e}")
+
+    def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]:
+        """
+        Run inference on input tensor.
+
+        Args:
+            input_tensor: Input tensor for inference
+
+        Returns:
+            Tuple of (output_array, latency_ms)
+        """
+        if not self.is_loaded:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+
+        # Convert to numpy and ensure correct format
+        input_array = input_tensor.cpu().numpy().astype(np.float32)
+        if not input_array.flags["C_CONTIGUOUS"]:
+            input_array = np.ascontiguousarray(input_array)
+
+        # Get tensor names
+        input_name = None
+        output_name = None
+        for i in range(self._engine.num_io_tensors):
+            tensor_name = self._engine.get_tensor_name(i)
+            if self._engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
+                input_name = tensor_name
+            elif self._engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT:
+                output_name = tensor_name
+
+        if input_name is None or output_name is None:
+            raise RuntimeError("Could not find input/output tensor names")
+
+        # Set input shape and get output shape
+        self._context.set_input_shape(input_name, input_array.shape)
+        output_shape = self._context.get_tensor_shape(output_name)
+        output_array = np.empty(output_shape, dtype=np.float32)
+        if not output_array.flags["C_CONTIGUOUS"]:
+            output_array = np.ascontiguousarray(output_array)
+
+        # Allocate GPU memory
+        d_input = cuda.mem_alloc(input_array.nbytes)
+        d_output = cuda.mem_alloc(output_array.nbytes)
+
+        # Create CUDA stream and events for timing
+        stream = cuda.Stream()
+        start = cuda.Event()
+        end = cuda.Event()
+
+        try:
+            # Set tensor addresses
+            self._context.set_tensor_address(input_name, int(d_input))
+            self._context.set_tensor_address(output_name, int(d_output))
+
+            # Run inference with timing
+            cuda.memcpy_htod_async(d_input, input_array, stream)
+            start.record(stream)
+            self._context.execute_async_v3(stream_handle=stream.handle)
+            end.record(stream)
+            cuda.memcpy_dtoh_async(output_array, d_output, stream)
+            stream.synchronize()
+
+            latency_ms = end.time_since(start)
+
+            return output_array, latency_ms
+
+        finally:
+            # Cleanup GPU memory
+            try:
+                d_input.free()
+                d_output.free()
+            except Exception:
+                # Silently ignore cleanup errors
+                pass
+
+    def cleanup(self) -> None:
+        """Clean up TensorRT resources."""
+        self._context = None
+        self._engine = None
+        self._model = None
diff --git a/autoware_ml/deployment/cli/__init__.py b/autoware_ml/deployment/cli/__init__.py
new file mode 100644
index 00000000..3036280e
--- /dev/null
+++ b/autoware_ml/deployment/cli/__init__.py
@@ -0,0 +1,3 @@
+"""Command-line interface for deployment."""
+
+__all__ = []
diff --git a/autoware_ml/deployment/core/__init__.py b/autoware_ml/deployment/core/__init__.py
new file mode 100644
index 00000000..f5969ce4
--- /dev/null
+++ b/autoware_ml/deployment/core/__init__.py
@@ -0,0 +1,25 @@
+"""Core components for deployment framework."""
+
+from .base_config import (
+    BackendConfig,
+    BaseDeploymentConfig,
+    ExportConfig,
+    RuntimeConfig,
+    parse_base_args,
+    setup_logging,
+)
+from .base_data_loader import BaseDataLoader
+from .base_evaluator import BaseEvaluator
+from .verification import verify_model_outputs
+
+__all__ = [
+    "BaseDeploymentConfig",
+    "ExportConfig",
+    "RuntimeConfig",
+    "BackendConfig",
+    "setup_logging",
+    "parse_base_args",
+    "BaseDataLoader",
+    "BaseEvaluator",
+    "verify_model_outputs",
+]
diff --git a/autoware_ml/deployment/core/base_config.py b/autoware_ml/deployment/core/base_config.py
new file mode 100644
index 00000000..9c88a1be
--- /dev/null
+++ b/autoware_ml/deployment/core/base_config.py
@@ -0,0 +1,213 @@
+"""
+Base configuration classes for deployment framework.
+
+This module provides the foundation for task-agnostic deployment configuration.
+Task-specific deployment configs should extend BaseDeploymentConfig.
+"""
+
+import argparse
+import logging
+from typing import Any, Dict, List, Optional
+
+from mmengine.config import Config
+
+# Constants
+DEFAULT_VERIFICATION_TOLERANCE = 1e-3
+DEFAULT_WORKSPACE_SIZE = 1 << 30  # 1 GB
+
+# Precision policy mapping for TensorRT
+PRECISION_POLICIES = {
+    "auto": {},  # No special flags, TensorRT decides
+    "fp16": {"FP16": True},
+    "fp32_tf32": {"TF32": True},  # TF32 for FP32 operations
+    "explicit_int8": {"INT8": True},
+    "strongly_typed": {"STRONGLY_TYPED": True},  # Network creation flag
+}
+
+
+class ExportConfig:
+    """Configuration for model export settings."""
+
+    def __init__(self, config_dict: Dict[str, Any]):
+        self.mode = config_dict.get("mode", "both")
+        self.verify = config_dict.get("verify", False)
+        self.device = config_dict.get("device", "cuda:0")
+        self.work_dir = config_dict.get("work_dir", "work_dirs")
+
+    def should_export_onnx(self) -> bool:
+        """Check if ONNX export is requested."""
+        return self.mode in ["onnx", "both"]
+
+    def should_export_tensorrt(self) -> bool:
+        """Check if TensorRT export is requested."""
+        return self.mode in ["trt", "both"]
+
+
+class RuntimeConfig:
+    """Configuration for runtime I/O settings."""
+
+    def __init__(self, config_dict: Dict[str, Any]):
+        self._config = config_dict
+
+    def get(self, key: str, default: Any = None) -> Any:
+        """Get a runtime configuration value."""
+        return self._config.get(key, default)
+
+    def __getitem__(self, key: str) -> Any:
+        """Dictionary-style access to runtime config."""
+        return self._config[key]
+
+
+class BackendConfig:
+    """Configuration for backend-specific settings."""
+
+    def __init__(self, config_dict: Dict[str, Any]):
+        self.common_config = config_dict.get("common_config", {})
+        self.model_inputs = config_dict.get("model_inputs", [])
+
+    def get_precision_policy(self) -> str:
+        """Get precision policy name."""
+        return self.common_config.get("precision_policy", "auto")
+
+    def get_precision_flags(self) -> Dict[str, bool]:
+        """Get TensorRT precision flags for the configured policy."""
+        policy = self.get_precision_policy()
+        return PRECISION_POLICIES.get(policy, {})
+
+    def get_max_workspace_size(self) -> int:
+        """Get maximum workspace size for TensorRT."""
+        return self.common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE)
+
+
+class BaseDeploymentConfig:
+    """
+    Base configuration container for deployment settings.
+
+    This class provides a task-agnostic interface for deployment configuration.
+    Task-specific configs should extend this class and add task-specific settings.
+    """
+
+    def __init__(self, deploy_cfg: Config):
+        """
+        Initialize deployment configuration.
+
+        Args:
+            deploy_cfg: MMEngine Config object containing deployment settings
+        """
+        self.deploy_cfg = deploy_cfg
+        self._validate_config()
+
+        # Initialize config sections
+        self.export_config = ExportConfig(deploy_cfg.get("export", {}))
+        self.runtime_config = RuntimeConfig(deploy_cfg.get("runtime_io", {}))
+        self.backend_config = BackendConfig(deploy_cfg.get("backend_config", {}))
+
+    def _validate_config(self) -> None:
+        """Validate configuration structure and required fields."""
+        # Validate required sections
+        if "export" not in self.deploy_cfg:
+            raise ValueError(
+                "Missing 'export' section in deploy config. " "Please update your config to include 'export' section."
+            )
+
+        # Validate export mode
+        valid_modes = ["onnx", "trt", "both", "none"]
+        mode = self.deploy_cfg.get("export", {}).get("mode", "both")
+        if mode not in valid_modes:
+            raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}")
+
+        # Validate precision policy if present
+        backend_cfg = self.deploy_cfg.get("backend_config", {})
+        common_cfg = backend_cfg.get("common_config", {})
+        precision_policy = common_cfg.get("precision_policy", "auto")
+        if precision_policy not in PRECISION_POLICIES:
+            raise ValueError(
+                f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}"
+            )
+
+    @property
+    def evaluation_config(self) -> Dict:
+        """Get evaluation configuration."""
+        return self.deploy_cfg.get("evaluation", {})
+
+    @property
+    def onnx_config(self) -> Dict:
+        """Get ONNX configuration."""
+        return self.deploy_cfg.get("onnx_config", {})
+
+    def get_onnx_settings(self) -> Dict[str, Any]:
+        """
+        Get ONNX export settings.
+
+        Returns:
+            Dictionary containing ONNX export parameters
+        """
+        onnx_config = self.onnx_config
+        return {
+            "opset_version": onnx_config.get("opset_version", 16),
+            "do_constant_folding": onnx_config.get("do_constant_folding", True),
+            "input_names": onnx_config.get("input_names", ["input"]),
+            "output_names": onnx_config.get("output_names", ["output"]),
+            "dynamic_axes": onnx_config.get("dynamic_axes"),
+            "export_params": onnx_config.get("export_params", True),
+            "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False),
+            "save_file": onnx_config.get("save_file", "model.onnx"),
+        }
+
+    def get_tensorrt_settings(self) -> Dict[str, Any]:
+        """
+        Get TensorRT export settings with precision policy support.
+
+        Returns:
+            Dictionary containing TensorRT export parameters
+        """
+        return {
+            "max_workspace_size": self.backend_config.get_max_workspace_size(),
+            "precision_policy": self.backend_config.get_precision_policy(),
+            "policy_flags": self.backend_config.get_precision_flags(),
+            "model_inputs": self.backend_config.model_inputs,
+        }
+
+
+def setup_logging(level: str = "INFO") -> logging.Logger:
+    """
+    Setup logging configuration.
+
+    Args:
+        level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+
+    Returns:
+        Configured logger instance
+    """
+    logging.basicConfig(level=getattr(logging, level), format="%(levelname)s:%(name)s:%(message)s")
+    return logging.getLogger("deployment")
+
+
+def parse_base_args(parser: Optional[argparse.ArgumentParser] = None) -> argparse.ArgumentParser:
+    """
+    Create argument parser with common deployment arguments.
+
+    Args:
+        parser: Optional existing ArgumentParser to add arguments to
+
+    Returns:
+        ArgumentParser with deployment arguments
+    """
+    if parser is None:
+        parser = argparse.ArgumentParser(
+            description="Deploy model to ONNX/TensorRT",
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        )
+
+    parser.add_argument("deploy_cfg", help="Deploy config path")
+    parser.add_argument("model_cfg", help="Model config path")
+    parser.add_argument(
+        "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')"
+    )
+
+    # Optional overrides
+    parser.add_argument("--work-dir", help="Override output directory from config")
+    parser.add_argument("--device", help="Override device from config")
+    parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level")
+
+    return parser
diff --git a/autoware_ml/deployment/core/base_data_loader.py b/autoware_ml/deployment/core/base_data_loader.py
new file mode 100644
index 00000000..b0ee3313
--- /dev/null
+++ b/autoware_ml/deployment/core/base_data_loader.py
@@ -0,0 +1,118 @@
+"""
+Abstract base class for data loading in deployment.
+
+Each task (classification, detection, segmentation, etc.) must implement
+a concrete DataLoader that extends this base class.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+import torch
+
+
+class BaseDataLoader(ABC):
+    """
+    Abstract base class for task-specific data loaders.
+
+    This class defines the interface that all task-specific data loaders
+    must implement. It handles loading raw data from disk and preprocessing
+    it into a format suitable for model inference.
+    """
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize data loader.
+
+        Args:
+            config: Configuration dictionary containing task-specific settings
+        """
+        self.config = config
+
+    @abstractmethod
+    def load_sample(self, index: int) -> Dict[str, Any]:
+        """
+        Load a single sample from the dataset.
+
+        Args:
+            index: Sample index to load
+
+        Returns:
+            Dictionary containing raw sample data. Structure is task-specific,
+            but should typically include:
+            - Raw input data (image, point cloud, etc.)
+            - Ground truth labels/annotations (if available)
+            - Any metadata needed for evaluation
+
+        Raises:
+            IndexError: If index is out of range
+            FileNotFoundError: If sample data files don't exist
+        """
+        pass
+
+    @abstractmethod
+    def preprocess(self, sample: Dict[str, Any]) -> torch.Tensor:
+        """
+        Preprocess raw sample data into model input format.
+
+        Args:
+            sample: Raw sample data returned by load_sample()
+
+        Returns:
+            Preprocessed tensor ready for model inference.
+            Shape and format depend on the specific task.
+
+        Raises:
+            ValueError: If sample format is invalid
+        """
+        pass
+
+    @abstractmethod
+    def get_num_samples(self) -> int:
+        """
+        Get total number of samples in the dataset.
+
+        Returns:
+            Total number of samples available
+        """
+        pass
+
+    def load_and_preprocess(self, index: int) -> torch.Tensor:
+        """
+        Convenience method to load and preprocess a sample in one call.
+
+        Args:
+            index: Sample index to load
+
+        Returns:
+            Preprocessed tensor ready for inference
+        """
+        sample = self.load_sample(index)
+        return self.preprocess(sample)
+
+    def get_batch(self, indices: list) -> torch.Tensor:
+        """
+        Load and preprocess multiple samples into a batch.
+
+        Args:
+            indices: List of sample indices to load
+
+        Returns:
+            Batched tensor with shape [batch_size, ...]
+        """
+        tensors = [self.load_and_preprocess(idx) for idx in indices]
+        return torch.stack(tensors)
+
+    def validate_sample(self, sample: Dict[str, Any]) -> bool:
+        """
+        Validate that a sample has the expected structure.
+
+        Override this method in task-specific loaders to add validation.
+
+        Args:
+            sample: Sample to validate
+
+        Returns:
+            True if valid, False otherwise
+        """
+        return True
diff --git a/autoware_ml/deployment/core/base_evaluator.py b/autoware_ml/deployment/core/base_evaluator.py
new file mode 100644
index 00000000..9d9aa1ae
--- /dev/null
+++ b/autoware_ml/deployment/core/base_evaluator.py
@@ -0,0 +1,138 @@
+"""
+Abstract base class for model evaluation in deployment.
+
+Each task (classification, detection, segmentation, etc.) must implement
+a concrete Evaluator that extends this base class to compute task-specific metrics.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+import numpy as np
+
+from .base_data_loader import BaseDataLoader
+
+
+class BaseEvaluator(ABC):
+    """
+    Abstract base class for task-specific evaluators.
+
+    This class defines the interface that all task-specific evaluators
+    must implement. It handles running inference on a dataset and computing
+    evaluation metrics appropriate for the task.
+    """
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize evaluator.
+
+        Args:
+            config: Configuration dictionary containing evaluation settings
+        """
+        self.config = config
+
+    @abstractmethod
+    def evaluate(
+        self,
+        model_path: str,
+        data_loader: BaseDataLoader,
+        num_samples: int,
+        backend: str = "pytorch",
+        device: str = "cpu",
+        verbose: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Run full evaluation on a model.
+
+        Args:
+            model_path: Path to model checkpoint/weights
+            data_loader: DataLoader for loading samples
+            num_samples: Number of samples to evaluate
+            backend: Backend to use ('pytorch', 'onnx', 'tensorrt')
+            device: Device to run inference on
+            verbose: Whether to print detailed progress
+
+        Returns:
+            Dictionary containing evaluation metrics. The exact metrics
+            depend on the task, but should include:
+            - Primary metric(s) for the task
+            - Per-class metrics (if applicable)
+            - Inference latency statistics
+            - Any other relevant metrics
+
+        Example:
+            For classification:
+            {
+                "accuracy": 0.95,
+                "precision": 0.94,
+                "recall": 0.96,
+                "per_class_accuracy": {...},
+                "confusion_matrix": [...],
+                "avg_latency_ms": 5.2,
+            }
+
+            For detection:
+            {
+                "mAP": 0.72,
+                "mAP_50": 0.85,
+                "mAP_75": 0.68,
+                "per_class_ap": {...},
+                "avg_latency_ms": 15.3,
+            }
+        """
+        pass
+
+    @abstractmethod
+    def print_results(self, results: Dict[str, Any]) -> None:
+        """
+        Pretty print evaluation results.
+
+        Args:
+            results: Results dictionary returned by evaluate()
+        """
+        pass
+
+    def compute_latency_stats(self, latencies: list) -> Dict[str, float]:
+        """
+        Compute latency statistics from a list of latency measurements.
+
+        Args:
+            latencies: List of latency values in milliseconds
+
+        Returns:
+            Dictionary with latency statistics
+        """
+        if not latencies:
+            return {
+                "mean_ms": 0.0,
+                "std_ms": 0.0,
+                "min_ms": 0.0,
+                "max_ms": 0.0,
+                "median_ms": 0.0,
+            }
+
+        latencies_array = np.array(latencies)
+
+        return {
+            "mean_ms": float(np.mean(latencies_array)),
+            "std_ms": float(np.std(latencies_array)),
+            "min_ms": float(np.min(latencies_array)),
+            "max_ms": float(np.max(latencies_array)),
+            "median_ms": float(np.median(latencies_array)),
+        }
+
+    def format_latency_stats(self, stats: Dict[str, float]) -> str:
+        """
+        Format latency statistics as a readable string.
+
+        Args:
+            stats: Latency statistics dictionary
+
+        Returns:
+            Formatted string
+        """
+        return (
+            f"Latency: {stats['mean_ms']:.2f} ± {stats['std_ms']:.2f} ms "
+            f"(min: {stats['min_ms']:.2f}, max: {stats['max_ms']:.2f}, "
+            f"median: {stats['median_ms']:.2f})"
+        )
diff --git a/autoware_ml/deployment/core/verification.py b/autoware_ml/deployment/core/verification.py
new file mode 100644
index 00000000..c1cfe87c
--- /dev/null
+++ b/autoware_ml/deployment/core/verification.py
@@ -0,0 +1,178 @@
+"""
+Unified model verification module.
+
+Provides utilities for verifying exported models against reference PyTorch outputs.
+"""
+
+import logging
+from typing import Dict, List, Optional
+
+import numpy as np
+import torch
+
+from ..backends import BaseBackend, ONNXBackend, PyTorchBackend, TensorRTBackend
+
+DEFAULT_TOLERANCE = 1e-3
+
+
+def verify_model_outputs(
+    pytorch_model: torch.nn.Module,
+    test_inputs: Dict[str, torch.Tensor],
+    onnx_path: Optional[str] = None,
+    tensorrt_path: Optional[str] = None,
+    device: str = "cpu",
+    tolerance: float = DEFAULT_TOLERANCE,
+    logger: logging.Logger = None,
+) -> Dict[str, bool]:
+    """
+    Verify exported models against PyTorch reference.
+
+    Args:
+        pytorch_model: Reference PyTorch model
+        test_inputs: Dictionary of test inputs (e.g., {'sample1': tensor1, ...})
+        onnx_path: Optional path to ONNX model
+        tensorrt_path: Optional path to TensorRT engine
+        device: Device for PyTorch inference
+        tolerance: Maximum allowed difference
+        logger: Optional logger instance
+
+    Returns:
+        Dictionary with verification results for each backend
+    """
+    if logger is None:
+        logger = logging.getLogger(__name__)
+
+    results = {}
+
+    # Run PyTorch inference to get reference outputs
+    logger.info("=" * 60)
+    logger.info("Running verification...")
+    logger.info("=" * 60)
+
+    pytorch_backend = PyTorchBackend(pytorch_model, device=device)
+    pytorch_backend.load_model()
+
+    # Verify each backend
+    for sample_name, input_tensor in test_inputs.items():
+        logger.info(f"\n{'='*60}")
+        logger.info(f"Verifying sample: {sample_name}")
+        logger.info(f"{'='*60}")
+
+        # Get PyTorch reference
+        logger.info("Running PyTorch inference...")
+        pytorch_output, pytorch_latency = pytorch_backend.infer(input_tensor)
+        logger.info(f"  PyTorch latency: {pytorch_latency:.2f} ms")
+        logger.info(f"  PyTorch output: {pytorch_output}")
+
+        # Verify ONNX
+        if onnx_path:
+            logger.info("\nVerifying ONNX model...")
+            onnx_success = _verify_backend(
+                ONNXBackend(onnx_path, device="cpu"),
+                input_tensor,
+                pytorch_output,
+                tolerance,
+                "ONNX",
+                logger,
+            )
+            results[f"{sample_name}_onnx"] = onnx_success
+
+        # Verify TensorRT
+        if tensorrt_path:
+            logger.info("\nVerifying TensorRT model...")
+            trt_success = _verify_backend(
+                TensorRTBackend(tensorrt_path, device="cuda"),
+                input_tensor,
+                pytorch_output,
+                tolerance,
+                "TensorRT",
+                logger,
+            )
+            results[f"{sample_name}_tensorrt"] = trt_success
+
+    logger.info(f"\n{'='*60}")
+    logger.info("Verification Summary")
+    logger.info(f"{'='*60}")
+    for key, success in results.items():
+        status = "✓ PASSED" if success else "✗ FAILED"
+        logger.info(f"  {key}: {status}")
+    logger.info(f"{'='*60}")
+
+    return results
+
+
+def _verify_backend(
+    backend: BaseBackend,
+    input_tensor: torch.Tensor,
+    reference_output: np.ndarray,
+    tolerance: float,
+    backend_name: str,
+    logger: logging.Logger,
+) -> bool:
+    """
+    Verify a single backend against reference output.
+
+    Args:
+        backend: Backend instance to verify
+        input_tensor: Input tensor
+        reference_output: Reference output from PyTorch
+        tolerance: Maximum allowed difference
+        backend_name: Name of backend for logging
+        logger: Logger instance
+
+    Returns:
+        True if verification passed
+    """
+    try:
+        with backend:
+            output, latency = backend.infer(input_tensor)
+
+        logger.info(f"  {backend_name} latency: {latency:.2f} ms")
+        logger.info(f"  {backend_name} output: {output}")
+
+        # Compare outputs
+        max_diff = np.abs(reference_output - output).max()
+        mean_diff = np.abs(reference_output - output).mean()
+
+        logger.info(f"  Max difference: {max_diff:.6f}")
+        logger.info(f"  Mean difference: {mean_diff:.6f}")
+
+        if max_diff < tolerance:
+            logger.info(f"  {backend_name} verification PASSED ✓")
+            return True
+        else:
+            logger.warning(
+                f"  {backend_name} verification FAILED ✗ " f"(max diff: {max_diff:.6f} > tolerance: {tolerance:.6f})"
+            )
+            return False
+
+    except Exception as e:
+        logger.error(f"  {backend_name} verification failed with error: {e}")
+        return False
+
+
+def compare_outputs(
+    output1: np.ndarray,
+    output2: np.ndarray,
+    tolerance: float = DEFAULT_TOLERANCE,
+) -> Dict[str, float]:
+    """
+    Compare two model outputs and return difference statistics.
+
+    Args:
+        output1: First output array
+        output2: Second output array
+        tolerance: Tolerance for comparison
+
+    Returns:
+        Dictionary with comparison statistics
+    """
+    diff = np.abs(output1 - output2)
+
+    return {
+        "max_diff": float(np.max(diff)),
+        "mean_diff": float(np.mean(diff)),
+        "median_diff": float(np.median(diff)),
+        "std_diff": float(np.std(diff)),
+        "passed": float(np.max(diff)) < tolerance,
+    }
diff --git a/autoware_ml/deployment/exporters/__init__.py b/autoware_ml/deployment/exporters/__init__.py
new file mode 100644
index 00000000..e9e1bbc9
--- /dev/null
+++ b/autoware_ml/deployment/exporters/__init__.py
@@ -0,0 +1,11 @@
+"""Model exporters for different backends."""
+
+from .base_exporter import BaseExporter
+from .onnx_exporter import ONNXExporter
+from .tensorrt_exporter import TensorRTExporter
+
+__all__ = [
+    "BaseExporter",
+    "ONNXExporter",
+    "TensorRTExporter",
+]
diff --git a/autoware_ml/deployment/exporters/base_exporter.py b/autoware_ml/deployment/exporters/base_exporter.py
new file mode 100644
index 00000000..73a89026
--- /dev/null
+++ b/autoware_ml/deployment/exporters/base_exporter.py
@@ -0,0 +1,67 @@
+"""
+Abstract base class for model exporters.
+
+Provides a unified interface for exporting models to different formats.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+import torch
+
+
+class BaseExporter(ABC):
+    """
+    Abstract base class for model exporters.
+
+    This class defines a unified interface for exporting models
+    to different backend formats (ONNX, TensorRT, TorchScript, etc.).
+    """
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize exporter.
+
+        Args:
+            config: Configuration dictionary for export settings
+        """
+        self.config = config
+
+    @abstractmethod
+    def export(
+        self,
+        model: torch.nn.Module,
+        sample_input: torch.Tensor,
+        output_path: str,
+    ) -> bool:
+        """
+        Export model to target format.
+
+        Args:
+            model: PyTorch model to export
+            sample_input: Sample input tensor for tracing/shape inference
+            output_path: Path to save exported model
+
+        Returns:
+            True if export succeeded, False otherwise
+
+        Raises:
+            RuntimeError: If export fails
+        """
+        pass
+
+    def validate_export(self, output_path: str) -> bool:
+        """
+        Validate that the exported model file is valid.
+
+        Override this in subclasses to add format-specific validation.
+
+        Args:
+            output_path: Path to exported model file
+
+        Returns:
+            True if valid, False otherwise
+        """
+        import os
+
+        return os.path.exists(output_path) and os.path.getsize(output_path) > 0
diff --git a/autoware_ml/deployment/exporters/onnx_exporter.py b/autoware_ml/deployment/exporters/onnx_exporter.py
new file mode 100644
index 00000000..67660c2e
--- /dev/null
+++ b/autoware_ml/deployment/exporters/onnx_exporter.py
@@ -0,0 +1,121 @@
+"""ONNX model exporter."""
+
+import logging
+from typing import Any, Dict
+
+import onnx
+import onnxsim
+import torch
+
+from .base_exporter import BaseExporter
+
+
+class ONNXExporter(BaseExporter):
+    """
+    ONNX model exporter.
+
+    Exports PyTorch models to ONNX format with optional simplification.
+    """
+
+    def __init__(self, config: Dict[str, Any], logger: logging.Logger = None):
+        """
+        Initialize ONNX exporter.
+
+        Args:
+            config: ONNX export configuration
+            logger: Optional logger instance
+        """
+        super().__init__(config)
+        self.logger = logger or logging.getLogger(__name__)
+
+    def export(
+        self,
+        model: torch.nn.Module,
+        sample_input: torch.Tensor,
+        output_path: str,
+    ) -> bool:
+        """
+        Export model to ONNX format.
+
+        Args:
+            model: PyTorch model to export
+            sample_input: Sample input tensor
+            output_path: Path to save ONNX model
+
+        Returns:
+            True if export succeeded
+        """
+        model.eval()
+
+        self.logger.info("Exporting model to ONNX format...")
+        self.logger.info(f"  Input shape: {sample_input.shape}")
+        self.logger.info(f"  Output path: {output_path}")
+        self.logger.info(f"  Opset version: {self.config.get('opset_version', 16)}")
+
+        try:
+            with torch.no_grad():
+                torch.onnx.export(
+                    model,
+                    sample_input,
+                    output_path,
+                    export_params=self.config.get("export_params", True),
+                    keep_initializers_as_inputs=self.config.get("keep_initializers_as_inputs", False),
+                    opset_version=self.config.get("opset_version", 16),
+                    do_constant_folding=self.config.get("do_constant_folding", True),
+                    input_names=self.config.get("input_names", ["input"]),
+                    output_names=self.config.get("output_names", ["output"]),
+                    dynamic_axes=self.config.get("dynamic_axes"),
+                    verbose=False,
+                )
+
+            self.logger.info(f"ONNX export completed: {output_path}")
+
+            # Optional model simplification
+            if self.config.get("simplify", True):
+                self._simplify_model(output_path)
+
+            return True
+
+        except Exception as e:
+            self.logger.error(f"ONNX export failed: {e}")
+            return False
+
+    def _simplify_model(self, onnx_path: str) -> None:
+        """
+        Simplify ONNX model using onnxsim.
+
+        Args:
+            onnx_path: Path to ONNX model file
+        """
+        self.logger.info("Simplifying ONNX model...")
+        try:
+            model_simplified, success = onnxsim.simplify(onnx_path)
+            if success:
+                onnx.save(model_simplified, onnx_path)
+                self.logger.info(f"ONNX model simplified successfully")
+            else:
+                self.logger.warning("ONNX model simplification failed")
+        except Exception as e:
+            self.logger.warning(f"ONNX simplification error: {e}")
+
+    def validate_export(self, output_path: str) -> bool:
+        """
+        Validate ONNX model.
+
+        Args:
+            output_path: Path to ONNX model file
+
+        Returns:
+            True if valid
+        """
+        if not super().validate_export(output_path):
+            return False
+
+        try:
+            model = onnx.load(output_path)
+            onnx.checker.check_model(model)
+            self.logger.info("ONNX model validation passed")
+            return True
+        except Exception as e:
+            self.logger.error(f"ONNX model validation failed: {e}")
+            return False
diff --git a/autoware_ml/deployment/exporters/tensorrt_exporter.py b/autoware_ml/deployment/exporters/tensorrt_exporter.py
new file mode 100644
index 00000000..fc0c4d04
--- /dev/null
+++ b/autoware_ml/deployment/exporters/tensorrt_exporter.py
@@ -0,0 +1,157 @@
+"""TensorRT model exporter."""
+
+import logging
+from typing import Any, Dict
+
+import tensorrt as trt
+import torch
+
+from .base_exporter import BaseExporter
+
+
+class TensorRTExporter(BaseExporter):
+    """
+    TensorRT model exporter.
+
+    Converts ONNX models to TensorRT engine format with precision policy support.
+    """
+
+    def __init__(self, config: Dict[str, Any], logger: logging.Logger = None):
+        """
+        Initialize TensorRT exporter.
+
+        Args:
+            config: TensorRT export configuration
+            logger: Optional logger instance
+        """
+        super().__init__(config)
+        self.logger = logger or logging.getLogger(__name__)
+
+    def export(
+        self,
+        model: torch.nn.Module,  # Not used for TensorRT, kept for interface compatibility
+        sample_input: torch.Tensor,
+        output_path: str,
+        onnx_path: str = None,
+    ) -> bool:
+        """
+        Export ONNX model to TensorRT engine.
+
+        Args:
+            model: Not used (TensorRT converts from ONNX)
+            sample_input: Sample input for shape configuration
+            output_path: Path to save TensorRT engine
+            onnx_path: Path to source ONNX model
+
+        Returns:
+            True if export succeeded
+        """
+        if onnx_path is None:
+            self.logger.error("onnx_path is required for TensorRT export")
+            return False
+
+        precision_policy = self.config.get("precision_policy", "auto")
+        policy_flags = self.config.get("policy_flags", {})
+
+        self.logger.info(f"Building TensorRT engine with precision policy: {precision_policy}")
+        self.logger.info(f"  ONNX source: {onnx_path}")
+        self.logger.info(f"  Engine output: {output_path}")
+
+        # Initialize TensorRT
+        trt_logger = trt.Logger(trt.Logger.WARNING)
+        trt.init_libnvinfer_plugins(trt_logger, "")
+
+        builder = trt.Builder(trt_logger)
+        builder_config = builder.create_builder_config()
+
+        max_workspace_size = self.config.get("max_workspace_size", 1 << 30)
+        builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=max_workspace_size)
+
+        # Create network with appropriate flags
+        flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+
+        # Handle strongly typed flag (network creation flag)
+        if policy_flags.get("STRONGLY_TYPED"):
+            flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
+            self.logger.info("Using strongly typed TensorRT network creation")
+
+        network = builder.create_network(flags)
+
+        # Apply precision flags to builder config
+        for flag_name, enabled in policy_flags.items():
+            if flag_name == "STRONGLY_TYPED":
+                continue  # Already handled
+            if enabled and hasattr(trt.BuilderFlag, flag_name):
+                builder_config.set_flag(getattr(trt.BuilderFlag, flag_name))
+                self.logger.info(f"BuilderFlag.{flag_name} enabled")
+
+        # Setup optimization profile
+        profile = builder.create_optimization_profile()
+        self._configure_input_shapes(profile, sample_input)
+        builder_config.add_optimization_profile(profile)
+
+        # Parse ONNX model
+        parser = trt.OnnxParser(network, trt_logger)
+
+        try:
+            with open(onnx_path, "rb") as f:
+                if not parser.parse(f.read()):
+                    self._log_parser_errors(parser)
+                    return False
+                self.logger.info("Successfully parsed ONNX file")
+
+            # Build engine
+            self.logger.info("Building TensorRT engine (this may take a while)...")
+            serialized_engine = builder.build_serialized_network(network, builder_config)
+
+            if serialized_engine is None:
+                self.logger.error("Failed to build TensorRT engine")
+                return False
+
+            # Save engine
+            with open(output_path, "wb") as f:
+                f.write(serialized_engine)
+
+            self.logger.info(f"TensorRT engine saved to {output_path}")
+            self.logger.info(f"Engine max workspace size: {max_workspace_size / (1024**3):.2f} GB")
+
+            return True
+
+        except Exception as e:
+            self.logger.error(f"TensorRT export failed: {e}")
+            return False
+
+    def _configure_input_shapes(
+        self,
+        profile: trt.IOptimizationProfile,
+        sample_input: torch.Tensor,
+    ) -> None:
+        """
+        Configure input shapes for TensorRT optimization profile.
+
+        Args:
+            profile: TensorRT optimization profile
+            sample_input: Sample input tensor
+        """
+        model_inputs = self.config.get("model_inputs", [])
+
+        if model_inputs:
+            input_shapes = model_inputs[0].get("input_shapes", {})
+            for input_name, shapes in input_shapes.items():
+                min_shape = shapes.get("min_shape", list(sample_input.shape))
+                opt_shape = shapes.get("opt_shape", list(sample_input.shape))
+                max_shape = shapes.get("max_shape", list(sample_input.shape))
+
+                self.logger.info(f"Setting input shapes - min: {min_shape}, " f"opt: {opt_shape}, max: {max_shape}")
+                profile.set_shape(input_name, min_shape, opt_shape, max_shape)
+        else:
+            # Default shapes based on input tensor
+            input_shape = list(sample_input.shape)
+            self.logger.info(f"Using default input shape: {input_shape}")
+            profile.set_shape("input", input_shape, input_shape, input_shape)
+
+    def _log_parser_errors(self, parser: trt.OnnxParser) -> None:
+        """Log TensorRT parser errors."""
+        self.logger.error("Failed to parse ONNX model")
+        for error in range(parser.num_errors):
+            self.logger.error(f"Parser error: {parser.get_error(error)}")
diff --git a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
index 0c4673d8..9aa85cb7 100644
--- a/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
+++ b/projects/CalibrationStatusClassification/configs/deploy/resnet18_5ch.py
@@ -8,7 +8,7 @@
 # Export Configuration
 # ==============================================================================
 export = dict(
-    mode="both",  # Export mode: "onnx", "trt", "both", or "none"
+    mode="none",  # Export mode: "onnx", "trt", "both", or "none"
     # - "onnx": Export to ONNX only
     # - "trt": Convert to TensorRT only (requires onnx_file in runtime_io)
     # - "both": Export to ONNX then convert to TensorRT
@@ -38,11 +38,12 @@
     enabled=True,  # Enable full model evaluation (set to True to run evaluation)
     num_samples=10,  # Number of samples to evaluate from info.pkl
     verbose=True,  # Enable verbose logging showing per-sample results
-    # Optional: Specify models to evaluate (if None, uses exported models from work_dir)
-    onnx_model="/workspace/work_dirs/end2end.onnx",  # Path to ONNX model file to evaluate (e.g., "/path/to/model.onnx")
-    tensorrt_model="/workspace/work_dirs/end2end.engine",  # Path to TensorRT engine file to evaluate (e.g., "/path/to/model.engine")
-    # Note: If models are None, will automatically detect and evaluate exported models
-    # Note: Command line args (--evaluate, --num-samples, --verbose) override these settings
+    # Specify models to evaluate
+    models=dict(
+        onnx="/workspace/work_dirs/end2end.onnx",  # Path to ONNX model file
+        tensorrt="/workspace/work_dirs/end2end.engine",  # Path to TensorRT engine file
+        # pytorch="/workspace/work_dirs/best_accuracy_top1_epoch_28.pth",  # Optional: PyTorch checkpoint
+    ),
 )
 
 # ==============================================================================
diff --git a/projects/CalibrationStatusClassification/deploy/__init__.py b/projects/CalibrationStatusClassification/deploy/__init__.py
index 38b7c326..d2c31034 100644
--- a/projects/CalibrationStatusClassification/deploy/__init__.py
+++ b/projects/CalibrationStatusClassification/deploy/__init__.py
@@ -1,21 +1,21 @@
 """
-Calibration Status Classification Model Deployment Package
+Calibration Status Classification Model Deployment Package (Refactored)
 
 This package provides utilities for exporting, verifying, and evaluating
-CalibrationStatusClassification models in ONNX and TensorRT formats.
+CalibrationStatusClassification models in ONNX and TensorRT formats using
+the unified deployment framework.
 """
 
-from .config import DeploymentConfig, parse_args, setup_logging
-from .evaluator import evaluate_exported_model, print_evaluation_results
-from .exporters import export_to_onnx, export_to_tensorrt, run_verification
+from .data_loader import CalibrationDataLoader
+from .evaluator import (
+    ClassificationEvaluator,
+    get_models_to_evaluate,
+    run_full_evaluation,
+)
 
 __all__ = [
-    "DeploymentConfig",
-    "parse_args",
-    "setup_logging",
-    "export_to_onnx",
-    "export_to_tensorrt",
-    "run_verification",
-    "evaluate_exported_model",
-    "print_evaluation_results",
+    "CalibrationDataLoader",
+    "ClassificationEvaluator",
+    "get_models_to_evaluate",
+    "run_full_evaluation",
 ]
diff --git a/projects/CalibrationStatusClassification/deploy/config.py b/projects/CalibrationStatusClassification/deploy/config.py
deleted file mode 100644
index e0071066..00000000
--- a/projects/CalibrationStatusClassification/deploy/config.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""
-Configuration classes and constants for deployment.
-"""
-
-import argparse
-import logging
-import os
-import os.path as osp
-from typing import Dict
-
-from mmengine.config import Config
-
-# Constants
-DEFAULT_VERIFICATION_TOLERANCE = 1e-3
-DEFAULT_WORKSPACE_SIZE = 1 << 30  # 1 GB
-EXPECTED_CHANNELS = 5  # RGB + Depth + Intensity
-LABELS = {"0": "miscalibrated", "1": "calibrated"}
-
-# Precision policy mapping
-PRECISION_POLICIES = {
-    "auto": {},  # No special flags, TensorRT decides
-    "fp16": {"FP16": True},
-    "fp32_tf32": {"TF32": True},  # TF32 for FP32 operations
-    "explicit_int8": {"INT8": True},
-    "strongly_typed": {"STRONGLY_TYPED": True},  # Network creation flag
-}
-
-
-class DeploymentConfig:
-    """Configuration container for deployment settings."""
-
-    def __init__(self, deploy_cfg: Config):
-        self.deploy_cfg = deploy_cfg
-        self._validate_config()
-
-    def _validate_config(self) -> None:
-        """Validate configuration structure and required fields."""
-        if "export" not in self.deploy_cfg:
-            raise ValueError(
-                "Missing 'export' section in deploy config. "
-                "Please update your config to the new format with 'export', 'runtime_io' sections."
-            )
-
-        if "runtime_io" not in self.deploy_cfg:
-            raise ValueError("Missing 'runtime_io' section in deploy config.")
-
-        # Validate export mode
-        valid_modes = ["onnx", "trt", "both", "none"]
-        mode = self.export_config.get("mode", "both")
-        if mode not in valid_modes:
-            raise ValueError(f"Invalid export mode '{mode}'. Must be one of {valid_modes}")
-
-        # Validate precision policy if present
-        backend_cfg = self.deploy_cfg.get("backend_config", {})
-        common_cfg = backend_cfg.get("common_config", {})
-        precision_policy = common_cfg.get("precision_policy", "auto")
-        if precision_policy not in PRECISION_POLICIES:
-            raise ValueError(
-                f"Invalid precision_policy '{precision_policy}'. " f"Must be one of {list(PRECISION_POLICIES.keys())}"
-            )
-
-    @property
-    def export_config(self) -> Dict:
-        """Get export configuration (mode, verify, device, work_dir)."""
-        return self.deploy_cfg.get("export", {})
-
-    @property
-    def runtime_io_config(self) -> Dict:
-        """Get runtime I/O configuration (info_pkl, sample_idx, onnx_file)."""
-        return self.deploy_cfg.get("runtime_io", {})
-
-    @property
-    def evaluation_config(self) -> Dict:
-        """Get evaluation configuration (enabled, num_samples, verbose, model paths)."""
-        return self.deploy_cfg.get("evaluation", {})
-
-    @property
-    def should_export_onnx(self) -> bool:
-        """Check if ONNX export is requested."""
-        mode = self.export_config.get("mode", "both")
-        return mode in ["onnx", "both"]
-
-    @property
-    def should_export_tensorrt(self) -> bool:
-        """Check if TensorRT export is requested."""
-        mode = self.export_config.get("mode", "both")
-        return mode in ["trt", "both"]
-
-    @property
-    def should_verify(self) -> bool:
-        """Check if verification is requested."""
-        return self.export_config.get("verify", False)
-
-    @property
-    def device(self) -> str:
-        """Get device for export."""
-        return self.export_config.get("device", "cuda:0")
-
-    @property
-    def work_dir(self) -> str:
-        """Get working directory."""
-        return self.export_config.get("work_dir", os.getcwd())
-
-    @property
-    def onnx_settings(self) -> Dict:
-        """Get ONNX export settings."""
-        onnx_config = self.deploy_cfg.get("onnx_config", {})
-        return {
-            "opset_version": onnx_config.get("opset_version", 16),
-            "do_constant_folding": onnx_config.get("do_constant_folding", True),
-            "input_names": onnx_config.get("input_names", ["input"]),
-            "output_names": onnx_config.get("output_names", ["output"]),
-            "dynamic_axes": onnx_config.get("dynamic_axes"),
-            "export_params": onnx_config.get("export_params", True),
-            "keep_initializers_as_inputs": onnx_config.get("keep_initializers_as_inputs", False),
-            "save_file": onnx_config.get("save_file", "calibration_classifier.onnx"),
-        }
-
-    @property
-    def tensorrt_settings(self) -> Dict:
-        """Get TensorRT export settings with precision policy support."""
-        backend_config = self.deploy_cfg.get("backend_config", {})
-        common_config = backend_config.get("common_config", {})
-
-        # Get precision policy
-        precision_policy = common_config.get("precision_policy", "auto")
-        policy_flags = PRECISION_POLICIES.get(precision_policy, {})
-
-        return {
-            "max_workspace_size": common_config.get("max_workspace_size", DEFAULT_WORKSPACE_SIZE),
-            "precision_policy": precision_policy,
-            "policy_flags": policy_flags,
-            "model_inputs": backend_config.get("model_inputs", []),
-        }
-
-
-def parse_args() -> argparse.Namespace:
-    """Parse command line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Export CalibrationStatusClassification model to ONNX/TensorRT.",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument("deploy_cfg", help="Deploy config path")
-    parser.add_argument("model_cfg", help="Model config path")
-    parser.add_argument(
-        "checkpoint", nargs="?", default=None, help="Model checkpoint path (optional when mode='none')"
-    )
-
-    # Optional overrides
-    parser.add_argument("--work-dir", help="Override output directory from config")
-    parser.add_argument("--device", help="Override device from config")
-    parser.add_argument("--log-level", default="INFO", choices=list(logging._nameToLevel.keys()), help="Logging level")
-    parser.add_argument("--info-pkl", help="Override info.pkl path from config")
-
-    return parser.parse_args()
-
-
-def setup_logging(level: str) -> logging.Logger:
-    """Setup logging configuration."""
-    logging.basicConfig(level=getattr(logging, level), format="%(levelname)s:%(name)s:%(message)s")
-    return logging.getLogger("mmdeploy")
diff --git a/projects/CalibrationStatusClassification/deploy/data_loader.py b/projects/CalibrationStatusClassification/deploy/data_loader.py
new file mode 100644
index 00000000..57e344e1
--- /dev/null
+++ b/projects/CalibrationStatusClassification/deploy/data_loader.py
@@ -0,0 +1,183 @@
+"""
+CalibrationStatusClassification DataLoader for deployment.
+
+This module implements the BaseDataLoader interface for loading and preprocessing
+calibration status classification data from info.pkl files.
+"""
+
+import os
+import pickle
+from typing import Any, Dict, Optional
+
+import torch
+from mmengine.config import Config
+
+from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import (
+    CalibrationClassificationTransform,
+)
+from autoware_ml.deployment.core import BaseDataLoader
+
+
+class CalibrationDataLoader(BaseDataLoader):
+    """
+    DataLoader for CalibrationStatusClassification task.
+
+    Loads samples from info.pkl files and preprocesses them using
+    CalibrationClassificationTransform.
+    """
+
+    def __init__(
+        self,
+        info_pkl_path: str,
+        model_cfg: Config,
+        miscalibration_probability: float = 0.0,
+        device: str = "cpu",
+    ):
+        """
+        Initialize CalibrationDataLoader.
+
+        Args:
+            info_pkl_path: Path to info.pkl file containing samples
+            model_cfg: Model configuration containing transform settings
+            miscalibration_probability: Probability of loading miscalibrated sample (0.0 or 1.0)
+            device: Device to load tensors on
+        """
+        super().__init__(
+            config={
+                "info_pkl_path": info_pkl_path,
+                "miscalibration_probability": miscalibration_probability,
+                "device": device,
+            }
+        )
+
+        self.info_pkl_path = info_pkl_path
+        self.model_cfg = model_cfg
+        self.miscalibration_probability = miscalibration_probability
+        self.device = device
+
+        # Load samples list
+        self._samples_list = self._load_info_pkl_file()
+
+        # Create transform
+        self._transform = self._create_transform()
+
+    def _load_info_pkl_file(self) -> list:
+        """
+        Load and parse info.pkl file.
+
+        Returns:
+            List of samples from data_list
+        """
+        if not os.path.exists(self.info_pkl_path):
+            raise FileNotFoundError(f"Info.pkl file not found: {self.info_pkl_path}")
+
+        try:
+            with open(self.info_pkl_path, "rb") as f:
+                info_data = pickle.load(f)
+        except Exception as e:
+            raise ValueError(f"Failed to load info.pkl file: {e}")
+
+        # Extract samples from info.pkl
+        if isinstance(info_data, dict):
+            if "data_list" in info_data:
+                samples_list = info_data["data_list"]
+            else:
+                raise ValueError(f"Expected 'data_list' key in info_data, " f"found keys: {list(info_data.keys())}")
+        else:
+            raise ValueError(f"Expected dict format, got {type(info_data)}")
+
+        if not samples_list:
+            raise ValueError("No samples found in info.pkl")
+
+        return samples_list
+
+    def _create_transform(self) -> CalibrationClassificationTransform:
+        """
+        Create CalibrationClassificationTransform with model configuration.
+
+        Returns:
+            Configured transform instance
+        """
+        data_root = self.model_cfg.get("data_root")
+        if data_root is None:
+            raise ValueError("data_root not found in model configuration")
+
+        transform_config = self.model_cfg.get("transform_config")
+        if transform_config is None:
+            raise ValueError("transform_config not found in model configuration")
+
+        return CalibrationClassificationTransform(
+            transform_config=transform_config,
+            mode="test",
+            max_depth=self.model_cfg.get("max_depth", 128.0),
+            dilation_size=self.model_cfg.get("dilation_size", 1),
+            undistort=True,
+            miscalibration_probability=self.miscalibration_probability,
+            enable_augmentation=False,
+            data_root=data_root,
+            projection_vis_dir=self.model_cfg.get("test_projection_vis_dir", None),
+            results_vis_dir=self.model_cfg.get("test_results_vis_dir", None),
+            binary_save_dir=self.model_cfg.get("binary_save_dir", None),
+        )
+
+    def load_sample(self, index: int) -> Dict[str, Any]:
+        """
+        Load a single sample from info.pkl.
+
+        Args:
+            index: Sample index to load
+
+        Returns:
+            Sample dictionary with 'image', 'lidar_points', etc.
+        """
+        if index >= len(self._samples_list):
+            raise IndexError(f"Sample index {index} out of range (0-{len(self._samples_list)-1})")
+
+        sample = self._samples_list[index]
+
+        # Validate sample structure
+        required_keys = ["image", "lidar_points"]
+        if not all(key in sample for key in required_keys):
+            raise ValueError(f"Sample {index} has invalid structure. " f"Required keys: {required_keys}")
+
+        return sample
+
+    def preprocess(self, sample: Dict[str, Any]) -> torch.Tensor:
+        """
+        Preprocess sample using CalibrationClassificationTransform.
+
+        Args:
+            sample: Raw sample data from load_sample()
+
+        Returns:
+            Preprocessed tensor with shape (1, C, H, W)
+        """
+        # Apply transform
+        results = self._transform.transform(sample)
+        input_data_processed = results["fused_img"]  # (H, W, 5)
+
+        # Convert numpy array (H, W, C) to tensor (1, C, H, W)
+        tensor = torch.from_numpy(input_data_processed).permute(2, 0, 1).float()
+        return tensor.unsqueeze(0).to(self.device)
+
+    def get_num_samples(self) -> int:
+        """
+        Get total number of samples.
+
+        Returns:
+            Number of samples in info.pkl
+        """
+        return len(self._samples_list)
+
+    def validate_sample(self, sample: Dict[str, Any]) -> bool:
+        """
+        Validate sample structure.
+
+        Args:
+            sample: Sample to validate
+
+        Returns:
+            True if valid
+        """
+        required_keys = ["image", "lidar_points"]
+        return all(key in sample for key in required_keys)
diff --git a/projects/CalibrationStatusClassification/deploy/evaluator.py b/projects/CalibrationStatusClassification/deploy/evaluator.py
index efc7dd45..951f9aa2 100644
--- a/projects/CalibrationStatusClassification/deploy/evaluator.py
+++ b/projects/CalibrationStatusClassification/deploy/evaluator.py
@@ -1,386 +1,340 @@
 """
-Model evaluation utilities for exported models.
+Classification Evaluator for CalibrationStatusClassification.
+
+This module implements the BaseEvaluator interface for evaluating
+calibration status classification models.
 """
 
 import gc
 import logging
-import os.path as osp
-import time
-from typing import Any, Tuple
+from typing import Any, Dict
 
 import numpy as np
-import onnxruntime as ort
-import pycuda.autoinit
-import pycuda.driver as cuda
-import tensorrt as trt
 import torch
-import torch.nn.functional as F
-from config import LABELS
-from exporters import (
-    _clear_gpu_memory,
-    _create_transform_with_config,
-    _load_info_pkl_file,
-    _numpy_to_tensor,
-    _run_tensorrt_inference,
-    load_info_pkl_data,
-)
 from mmengine.config import Config
+from mmpretrain.apis import get_model
 
+from autoware_ml.deployment.backends import ONNXBackend, PyTorchBackend, TensorRTBackend
+from autoware_ml.deployment.core import BaseEvaluator
 
-def _load_samples_from_info_pkl(info_pkl_path: str, num_samples: int, logger: logging.Logger) -> list:
-    """Load and validate samples from info.pkl file."""
-    try:
-        samples_list = _load_info_pkl_file(info_pkl_path)
-        logger.info(f"Loaded {len(samples_list)} samples from info.pkl")
-
-        # Limit number of samples
-        num_samples = min(num_samples, len(samples_list))
-        logger.info(f"Evaluating {num_samples} samples")
-
-        return samples_list[:num_samples]
-
-    except Exception as e:
-        logger.error(f"Failed to load info.pkl: {e}")
-        raise
-
-
-def _create_transform(model_cfg: Config):
-    """Create and configure CalibrationClassificationTransform for evaluation."""
-    return _create_transform_with_config(
-        model_cfg=model_cfg,
-        miscalibration_probability=0.0,
-        projection_vis_dir=None,
-        results_vis_dir=None,
-        binary_save_dir=None,
-    )
-
-
-def _create_onnx_inference_func(model_path: str, device: str, logger: logging.Logger):
-    """Create ONNX inference function with configured providers."""
-    logger.info(f"Using ONNX model: {model_path}")
-    logger.info("Creating ONNX InferenceSession...")
-
-    # Configure execution providers
-    if device.startswith("cuda"):
-        providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-        logger.info("Attempting to use CUDA acceleration for ONNX inference...")
-        try:
-            available_providers = ort.get_available_providers()
-            if "CUDAExecutionProvider" not in available_providers:
-                logger.warning(
-                    f"CUDAExecutionProvider not available. Available: {available_providers}. "
-                    "Install onnxruntime-gpu for CUDA acceleration"
-                )
-                providers = ["CPUExecutionProvider"]
-            else:
-                logger.info("CUDAExecutionProvider is available")
-        except Exception as e:
-            logger.warning(f"Error checking CUDA provider: {e}. Falling back to CPU")
-            providers = ["CPUExecutionProvider"]
-    else:
-        providers = ["CPUExecutionProvider"]
-        logger.info("Using CPU for ONNX inference")
-
-    # Create session
-    ort_session = ort.InferenceSession(model_path, providers=providers)
-    logger.info(f"ONNX session using providers: {ort_session.get_providers()}")
-
-    # Return inference function
-    def inference_func(input_tensor):
-        input_name = ort_session.get_inputs()[0].name
-        onnx_input = {input_name: input_tensor.cpu().numpy()}
-        start_time = time.perf_counter()
-        onnx_output = ort_session.run(None, onnx_input)[0]
-        latency = (time.perf_counter() - start_time) * 1000
-        return onnx_output, latency
-
-    return inference_func
-
-
-def _create_tensorrt_inference_func(model_path: str, logger: logging.Logger):
-    """Create TensorRT inference function with loaded engine."""
-    logger.info(f"Using TensorRT model: {model_path}")
-
-    # Load TensorRT engine
-    trt_logger = trt.Logger(trt.Logger.WARNING)
-    trt.init_libnvinfer_plugins(trt_logger, "")
-    runtime = trt.Runtime(trt_logger)
+from .data_loader import CalibrationDataLoader
 
-    with open(model_path, "rb") as f:
-        engine = runtime.deserialize_cuda_engine(f.read())
+# Label mapping
+LABELS = {"0": "miscalibrated", "1": "calibrated"}
 
-    if engine is None:
-        raise RuntimeError("Failed to deserialize TensorRT engine")
+# Constants for evaluation
+LOG_INTERVAL = 100  # Log progress every N samples
+GPU_CLEANUP_INTERVAL = 10  # Clear GPU memory every N samples for TensorRT
 
-    # Return inference function
-    def inference_func(input_tensor):
-        return _run_tensorrt_inference(engine, input_tensor.cpu(), logger)
 
-    return inference_func
+class ClassificationEvaluator(BaseEvaluator):
+    """
+    Evaluator for classification tasks.
 
+    Computes accuracy, per-class metrics, confusion matrix, and latency statistics.
+    """
 
-def _process_single_sample(
-    sample_idx: int,
-    info_pkl_path: str,
-    transform,
-    inference_func,
-    device: str,
-    logger: logging.Logger,
-    verbose: bool = False,
-) -> Tuple[list, list, list, list]:
-    """Process a single sample with both calibrated and miscalibrated versions.
+    def __init__(self, model_cfg: Config):
+        """
+        Initialize classification evaluator.
+
+        Args:
+            model_cfg: Model configuration
+        """
+        super().__init__(config={})
+        self.model_cfg = model_cfg
+
+    def evaluate(
+        self,
+        model_path: str,
+        data_loader: CalibrationDataLoader,
+        num_samples: int,
+        backend: str = "pytorch",
+        device: str = "cpu",
+        verbose: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Run full evaluation on a model.
+
+        Args:
+            model_path: Path to model checkpoint/weights
+            data_loader: DataLoader for loading samples (calibrated version)
+            num_samples: Number of samples to evaluate
+            backend: Backend to use ('pytorch', 'onnx', 'tensorrt')
+            device: Device to run inference on
+            verbose: Whether to print detailed progress
+
+        Returns:
+            Dictionary containing evaluation metrics
+        """
+        logger = logging.getLogger(__name__)
+        logger.info(f"\nEvaluating {backend.upper()} model: {model_path}")
+        logger.info(f"Number of samples: {num_samples}")
 
-    Returns:
-        Tuple of (predictions, ground_truth, probabilities, latencies) for this sample
-    """
-    predictions = []
-    ground_truth = []
-    probabilities = []
-    latencies = []
-
-    # Test both calibrated and miscalibrated versions
-    for miscalibration_prob, expected_label in [(0.0, 1), (1.0, 0)]:
-        # Update transform's miscalibration probability
-        transform.miscalibration_probability = miscalibration_prob
-
-        # Load and preprocess sample
-        sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
-        sample_data["sample_idx"] = sample_idx
-
-        results = transform.transform(sample_data)
-        input_data_processed = results["fused_img"]
-        gt_label = results["gt_label"]
-
-        # Convert to tensor
-        input_tensor = _numpy_to_tensor(input_data_processed, device)
-
-        # Run inference
-        output_np, latency = inference_func(input_tensor)
-
-        if output_np is None:
-            logger.error(f"Failed to get output for sample {sample_idx}")
-            continue
-
-        # Convert logits to probabilities
-        logits = torch.from_numpy(output_np)
-        probs = F.softmax(logits, dim=-1)
-        predicted_class = torch.argmax(probs, dim=-1).item()
-        confidence = probs.max().item()
-
-        # Store results
-        predictions.append(predicted_class)
-        ground_truth.append(gt_label)
-        probabilities.append(probs.cpu().numpy())
-        latencies.append(latency)
-
-        # Print sample results only in verbose mode
-        if verbose:
-            logger.info(
-                f"Sample {sample_idx + 1} (GT={gt_label}): "
-                f"Pred={predicted_class}, Confidence={confidence:.4f}, Latency={latency:.2f}ms"
-            )
+        # Limit num_samples to available data
+        total_samples = data_loader.get_num_samples()
+        num_samples = min(num_samples, total_samples)
 
-    return predictions, ground_truth, probabilities, latencies
+        # Create backend
+        inference_backend = self._create_backend(backend, model_path, device, logger)
 
+        # Create data loaders for both calibrated and miscalibrated versions
+        # This avoids reinitializing the transform for each sample
+        data_loader_miscalibrated = CalibrationDataLoader(
+            info_pkl_path=data_loader.info_pkl_path,
+            model_cfg=data_loader.model_cfg,
+            miscalibration_probability=1.0,
+            device=device,
+        )
+        data_loader_calibrated = data_loader  # Already calibrated (prob=0.0)
+
+        # Run inference on all samples
+        predictions = []
+        ground_truths = []
+        probabilities = []
+        latencies = []
+
+        with inference_backend:
+            for idx in range(num_samples):
+                if idx % LOG_INTERVAL == 0:
+                    logger.info(f"Processing sample {idx + 1}/{num_samples}")
+
+                try:
+                    # Process both calibrated and miscalibrated versions
+                    pred, gt, prob, lat = self._process_sample(
+                        idx, data_loader_calibrated, data_loader_miscalibrated, inference_backend, verbose, logger
+                    )
+
+                    predictions.extend(pred)
+                    ground_truths.extend(gt)
+                    probabilities.extend(prob)
+                    latencies.extend(lat)
+
+                    # Clear GPU memory periodically for TensorRT
+                    if backend == "tensorrt" and idx % GPU_CLEANUP_INTERVAL == 0:
+                        self._clear_gpu_memory()
+
+                except Exception as e:
+                    logger.error(f"Error processing sample {idx}: {e}")
+                    continue
+
+        # Convert to numpy arrays
+        predictions = np.array(predictions)
+        ground_truths = np.array(ground_truths)
+        probabilities = np.array(probabilities)
+        latencies = np.array(latencies)
+
+        # Compute metrics
+        results = self._compute_metrics(predictions, ground_truths, probabilities, latencies)
+        results["backend"] = backend
+        results["num_samples"] = len(predictions)
+
+        return results
+
+    def _create_backend(
+        self,
+        backend: str,
+        model_path: str,
+        device: str,
+        logger: logging.Logger,
+    ):
+        """Create appropriate backend instance."""
+        if backend == "pytorch":
+            # Load PyTorch model
+            logger.info(f"Loading PyTorch model from {model_path}")
+            model = get_model(self.model_cfg, model_path, device=device)
+            return PyTorchBackend(model, device=device)
+        elif backend == "onnx":
+            logger.info(f"Loading ONNX model from {model_path}")
+            return ONNXBackend(model_path, device=device)
+        elif backend == "tensorrt":
+            logger.info(f"Loading TensorRT engine from {model_path}")
+            return TensorRTBackend(model_path, device="cuda")
+        else:
+            raise ValueError(f"Unsupported backend: {backend}")
+
+    def _process_sample(
+        self,
+        sample_idx: int,
+        data_loader_calibrated: CalibrationDataLoader,
+        data_loader_miscalibrated: CalibrationDataLoader,
+        backend,
+        verbose: bool,
+        logger: logging.Logger,
+    ):
+        """
+        Process a single sample with both calibrated and miscalibrated versions.
+
+        Args:
+            sample_idx: Index of sample to process
+            data_loader_calibrated: DataLoader for calibrated samples
+            data_loader_miscalibrated: DataLoader for miscalibrated samples
+            backend: Inference backend
+            verbose: Verbose logging
+            logger: Logger instance
+
+        Returns:
+            Tuple of (predictions, ground_truths, probabilities, latencies)
+        """
+        predictions = []
+        ground_truths = []
+        probabilities = []
+        latencies = []
+
+        # Process both miscalibrated (0) and calibrated (1) versions
+        for gt_label, loader in [(0, data_loader_miscalibrated), (1, data_loader_calibrated)]:
+            # Load and preprocess using pre-created data loader
+            input_tensor = loader.load_and_preprocess(sample_idx)
+
+            # Run inference
+            output, latency = backend.infer(input_tensor)
+
+            # Get prediction
+            if output.shape[-1] == 2:  # Binary classification
+                predicted_label = int(np.argmax(output[0]))
+                prob_scores = output[0]
+            else:
+                raise ValueError(f"Unexpected output shape: {output.shape}")
+
+            predictions.append(predicted_label)
+            ground_truths.append(gt_label)
+            probabilities.append(prob_scores)
+            latencies.append(latency)
+
+            if verbose:
+                logger.info(
+                    f"  Sample {sample_idx}, GT: {LABELS[str(gt_label)]}, "
+                    f"Pred: {LABELS[str(predicted_label)]}, "
+                    f"Scores: {prob_scores}, Latency: {latency:.2f}ms"
+                )
 
-def evaluate_exported_model(
-    model_path: str,
-    model_type: str,
-    model_cfg: Config,
-    info_pkl_path: str,
-    logger: logging.Logger,
-    device: str = "cpu",
-    num_samples: int = 10,
-    verbose: bool = False,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        return predictions, ground_truths, probabilities, latencies
+
+    def _compute_metrics(
+        self,
+        predictions: np.ndarray,
+        ground_truths: np.ndarray,
+        probabilities: np.ndarray,
+        latencies: np.ndarray,
+    ) -> Dict[str, Any]:
+        """Compute all evaluation metrics."""
+        if len(predictions) == 0:
+            return {"accuracy": 0.0, "error": "No samples were processed successfully"}
+
+        # Overall accuracy
+        correct = (predictions == ground_truths).sum()
+        accuracy = correct / len(predictions)
+
+        # Per-class metrics
+        per_class_acc = {}
+        per_class_count = {}
+        for cls in np.unique(ground_truths):
+            mask = ground_truths == cls
+            cls_correct = (predictions[mask] == ground_truths[mask]).sum()
+            cls_total = mask.sum()
+            per_class_acc[int(cls)] = cls_correct / cls_total if cls_total > 0 else 0.0
+            per_class_count[int(cls)] = int(cls_total)
+
+        # Confusion matrix
+        num_classes = len(np.unique(ground_truths))
+        confusion_matrix = np.zeros((num_classes, num_classes), dtype=int)
+        for gt, pred in zip(ground_truths, predictions):
+            confusion_matrix[int(gt), int(pred)] += 1
+
+        # Latency statistics
+        latency_stats = self.compute_latency_stats(latencies.tolist())
+
+        return {
+            "accuracy": float(accuracy),
+            "correct_predictions": int(correct),
+            "total_samples": len(predictions),
+            "per_class_accuracy": per_class_acc,
+            "per_class_count": per_class_count,
+            "confusion_matrix": confusion_matrix.tolist(),
+            "latency_stats": latency_stats,
+        }
+
+    def print_results(self, results: Dict[str, Any]) -> None:
+        """
+        Pretty print evaluation results.
+
+        Args:
+            results: Results dictionary from evaluate()
+        """
+        logger = logging.getLogger(__name__)
+
+        if "error" in results:
+            logger.error(f"Evaluation error: {results['error']}")
+            return
+
+        backend = results.get("backend", "unknown")
+
+        logger.info(f"\n{'='*70}")
+        logger.info(f"{backend.upper()} Model Evaluation Results")
+        logger.info(f"{'='*70}")
+
+        # Overall metrics
+        logger.info(f"Total samples: {results['total_samples']}")
+        logger.info(f"Correct predictions: {results['correct_predictions']}")
+        logger.info(f"Accuracy: {results['accuracy']:.4f} ({results['accuracy']*100:.2f}%)")
+
+        # Latency
+        logger.info(f"\n{self.format_latency_stats(results['latency_stats'])}")
+
+        # Per-class accuracy
+        logger.info(f"\nPer-class accuracy:")
+        for cls, acc in results["per_class_accuracy"].items():
+            count = results["per_class_count"][cls]
+            label = LABELS[str(cls)]
+            logger.info(f"  Class {cls} ({label}): {acc:.4f} ({acc*100:.2f}%) - {count} samples")
+
+        # Confusion matrix
+        logger.info(f"\nConfusion Matrix:")
+        cm = np.array(results["confusion_matrix"])
+        logger.info(f"  Predicted →")
+        logger.info(f"  GT ↓        {' '.join([f'{i:>8}' for i in range(len(cm))])}")
+        for i, row in enumerate(cm):
+            logger.info(f"  {i:>8}    {' '.join([f'{val:>8}' for val in row])}")
+
+        logger.info(f"{'='*70}\n")
+
+    def _clear_gpu_memory(self) -> None:
+        """Clear GPU cache and run garbage collection."""
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        gc.collect()
+
+
+def get_models_to_evaluate(eval_cfg: Dict[str, Any], logger: logging.Logger) -> list:
     """
-    Evaluate exported model (ONNX or TensorRT) using info.pkl data.
+    Get list of models to evaluate from config.
 
     Args:
-        model_path: Path to model file (.onnx or .engine)
-        model_type: Type of model ("onnx" or "tensorrt")
-        model_cfg: Model configuration
-        info_pkl_path: Path to info.pkl file
+        eval_cfg: Evaluation configuration
         logger: Logger instance
-        device: Device for preprocessing
-        num_samples: Number of samples to evaluate
-        verbose: Enable verbose logging
 
     Returns:
-        Tuple of (predictions, ground_truth, probabilities, latencies)
-    """
-    # Load samples
-    samples_list = _load_samples_from_info_pkl(info_pkl_path, num_samples, logger)
-
-    # Create transform
-    transform = _create_transform(model_cfg)
-
-    # Create inference function based on model type
-    if model_type == "onnx":
-        inference_func = _create_onnx_inference_func(model_path, device, logger)
-    elif model_type == "tensorrt":
-        inference_func = _create_tensorrt_inference_func(model_path, logger)
-    else:
-        raise ValueError(f"Unsupported model type: {model_type}")
-
-    # Process all samples
-    all_predictions = []
-    all_ground_truth = []
-    all_probabilities = []
-    all_latencies = []
-
-    for sample_idx in range(len(samples_list)):
-        if sample_idx % 100 == 0:
-            logger.info(f"Processing sample {sample_idx + 1}/{len(samples_list)}")
-
-        try:
-            predictions, ground_truth, probabilities, latencies = _process_single_sample(
-                sample_idx=sample_idx,
-                info_pkl_path=info_pkl_path,
-                transform=transform,
-                inference_func=inference_func,
-                device=device,
-                logger=logger,
-                verbose=verbose,
-            )
-
-            all_predictions.extend(predictions)
-            all_ground_truth.extend(ground_truth)
-            all_probabilities.extend(probabilities)
-            all_latencies.extend(latencies)
-
-            # Clear GPU memory periodically
-            if model_type == "tensorrt" and sample_idx % 10 == 0:
-                _clear_gpu_memory()
-
-        except Exception as e:
-            logger.error(f"Error processing sample {sample_idx}: {e}")
-            continue
-
-    return (
-        np.array(all_predictions),
-        np.array(all_ground_truth),
-        np.array(all_probabilities),
-        np.array(all_latencies),
-    )
-
-
-def print_evaluation_results(
-    all_predictions: np.ndarray,
-    all_ground_truth: np.ndarray,
-    all_probabilities: np.ndarray,
-    all_latencies: np.ndarray,
-    model_type: str,
-    logger: logging.Logger,
-) -> None:
-    """Print comprehensive evaluation results with metrics and statistics."""
-
-    if len(all_predictions) == 0:
-        logger.error("No samples were processed successfully.")
-        return
-
-    correct_predictions = (all_predictions == all_ground_truth).sum()
-    total_samples = len(all_predictions)
-    accuracy = correct_predictions / total_samples
-    avg_latency = np.mean(all_latencies)
-
-    # Print header
-    logger.info(f"\n{'='*60}")
-    logger.info(f"{model_type.upper()} Model Evaluation Results")
-    logger.info(f"{'='*60}")
-    logger.info(f"Total samples: {total_samples}")
-    logger.info(f"Correct predictions: {correct_predictions}")
-    logger.info(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
-    logger.info(f"Average latency: {avg_latency:.2f} ms")
-
-    # Calculate per-class accuracy
-    unique_classes = np.unique(all_ground_truth)
-    logger.info(f"\nPer-class accuracy:")
-    for cls in unique_classes:
-        cls_mask = all_ground_truth == cls
-        cls_correct = (all_predictions[cls_mask] == all_ground_truth[cls_mask]).sum()
-        cls_total = cls_mask.sum()
-        cls_accuracy = cls_correct / cls_total if cls_total > 0 else 0
-        logger.info(
-            f"  Class {cls} ({LABELS[str(cls)]}): "
-            f"{cls_correct}/{cls_total} = {cls_accuracy:.4f} ({cls_accuracy*100:.2f}%)"
-        )
-
-    # Calculate average confidence
-    avg_confidence = np.mean([prob.max() for prob in all_probabilities])
-    logger.info(f"\nAverage confidence: {avg_confidence:.4f}")
-
-    # Calculate latency statistics
-    min_latency = np.min(all_latencies)
-    max_latency = np.max(all_latencies)
-    std_latency = np.std(all_latencies)
-    p50_latency = np.percentile(all_latencies, 50)
-    p95_latency = np.percentile(all_latencies, 95)
-    p99_latency = np.percentile(all_latencies, 99)
-
-    logger.info(f"\nLatency Statistics:")
-    logger.info(f"  Average: {avg_latency:.2f} ms")
-    logger.info(f"  Median (P50): {p50_latency:.2f} ms")
-    logger.info(f"  P95: {p95_latency:.2f} ms")
-    logger.info(f"  P99: {p99_latency:.2f} ms")
-    logger.info(f"  Min: {min_latency:.2f} ms")
-    logger.info(f"  Max: {max_latency:.2f} ms")
-    logger.info(f"  Std: {std_latency:.2f} ms")
-
-    # Show confusion matrix
-    logger.info(f"\nConfusion Matrix:")
-    logger.info(f"{'':>10} Predicted")
-    logger.info(f"{'Actual':>10} {'0 (misc)':>10} {'1 (calib)':>10}")
-    logger.info(f"{'-'*32}")
-    for true_cls in unique_classes:
-        row_label = f"{true_cls} ({LABELS[str(true_cls)][:4]})"
-        row = [f"{row_label:>10}"]
-        for pred_cls in unique_classes:
-            count = ((all_ground_truth == true_cls) & (all_predictions == pred_cls)).sum()
-            row.append(f"{count:>10}")
-        logger.info(" ".join(row))
-
-    logger.info(f"\n{'='*60}")
-    logger.info(f"Evaluation Summary:")
-    logger.info(f"  Model Type: {model_type.upper()}")
-    logger.info(f"  Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
-    logger.info(f"  Avg Latency: {avg_latency:.2f} ms")
-    logger.info(f"  Throughput: {1000/avg_latency:.2f} samples/sec")
-    logger.info(f"{'='*60}\n")
-
-
-def get_models_to_evaluate(eval_cfg: dict, logger: logging.Logger) -> list:
-    """Determine which models to evaluate based on config.
-
-    Returns:
-        List of (model_type, model_path) tuples
+        List of tuples (backend_name, model_path)
     """
+    models_config = eval_cfg.get("models", {})
     models_to_evaluate = []
 
-    # Get model paths from config
-    eval_onnx_path = eval_cfg.get("onnx_model")
-    eval_trt_path = eval_cfg.get("tensorrt_model")
-
-    # If both paths are None, skip evaluation
-    if eval_onnx_path is None and eval_trt_path is None:
-        logger.warning("Both onnx_model and tensorrt_model are None. Skipping evaluation.")
-        logger.warning("To enable evaluation, specify at least one model path in the config.")
-        return models_to_evaluate
-
-    # Only evaluate models that are explicitly specified (not None)
-    if eval_onnx_path is not None:
-        if osp.exists(eval_onnx_path):
-            models_to_evaluate.append(("onnx", eval_onnx_path))
-            logger.info(f"Using config-specified ONNX model: {eval_onnx_path}")
-        else:
-            logger.warning(f"Config-specified ONNX model not found: {eval_onnx_path}")
+    backend_mapping = {
+        "pytorch": "pytorch",
+        "onnx": "onnx",
+        "tensorrt": "tensorrt",
+    }
 
-    if eval_trt_path is not None:
-        if osp.exists(eval_trt_path):
-            models_to_evaluate.append(("tensorrt", eval_trt_path))
-            logger.info(f"Using config-specified TensorRT model: {eval_trt_path}")
-        else:
-            logger.warning(f"Config-specified TensorRT model not found: {eval_trt_path}")
+    for backend_key, model_path in models_config.items():
+        backend_name = backend_mapping.get(backend_key.lower())
+        if backend_name and model_path:
+            import os
+
+            if os.path.exists(model_path):
+                models_to_evaluate.append((backend_name, model_path))
+                logger.info(f"  - {backend_name}: {model_path}")
+            else:
+                logger.warning(f"  - {backend_name}: {model_path} (not found, skipping)")
 
     return models_to_evaluate
 
@@ -389,47 +343,72 @@ def run_full_evaluation(
     models_to_evaluate: list,
     model_cfg: Config,
     info_pkl: str,
-    device: Any,
+    device: str,
     num_samples: int,
-    verbose_mode: bool,
+    verbose: bool,
     logger: logging.Logger,
 ) -> None:
-    """Run evaluation for all specified models."""
+    """
+    Run full evaluation on all specified models.
+
+    Args:
+        models_to_evaluate: List of (backend, model_path) tuples
+        model_cfg: Model configuration
+        info_pkl: Path to info.pkl file
+        device: Device for inference
+        num_samples: Number of samples to evaluate
+        verbose: Verbose mode
+        logger: Logger instance
+    """
     if not models_to_evaluate:
-        logger.error(
-            "No models available for evaluation. Please export models first or specify model paths in config."
-        )
+        logger.warning("No models specified for evaluation")
         return
 
-    # Evaluate each model
-    for model_type, model_path in models_to_evaluate:
-        logger.info(f"\nEvaluating {model_type.upper()} model...")
-        logger.info(f"Model path: {model_path}")
-        logger.info(f"Number of samples: {num_samples}")
-        logger.info(f"Verbose mode: {verbose_mode}")
+    logger.info(f"\nModels to evaluate:")
+    for backend, path in models_to_evaluate:
+        logger.info(f"  - {backend}: {path}")
 
+    # Create evaluator
+    evaluator = ClassificationEvaluator(model_cfg)
+
+    # Create data loader (with miscalibration_probability=0.0 as default)
+    data_loader = CalibrationDataLoader(
+        info_pkl_path=info_pkl,
+        model_cfg=model_cfg,
+        miscalibration_probability=0.0,
+        device=device,
+    )
+
+    # Evaluate each model
+    all_results = {}
+    for backend, model_path in models_to_evaluate:
         try:
-            # Run evaluation
-            predictions, ground_truth, probabilities, latencies = evaluate_exported_model(
+            results = evaluator.evaluate(
                 model_path=model_path,
-                model_type=model_type,
-                model_cfg=model_cfg,
-                info_pkl_path=info_pkl,
-                logger=logger,
-                device=device.type if isinstance(device, torch.device) else device,
+                data_loader=data_loader,
                 num_samples=num_samples,
-                verbose=verbose_mode,
+                backend=backend,
+                device=device,
+                verbose=verbose,
             )
-
-            # Print results
-            print_evaluation_results(predictions, ground_truth, probabilities, latencies, model_type, logger)
-
-            # Cleanup
-            _clear_gpu_memory()
-
+            all_results[backend] = results
+            evaluator.print_results(results)
         except Exception as e:
-            logger.error(f"Evaluation failed for {model_type.upper()} model: {e}")
+            logger.error(f"Failed to evaluate {backend} model: {e}")
             import traceback
 
             logger.error(traceback.format_exc())
-            continue
+
+    # Print comparison summary if multiple models
+    if len(all_results) > 1:
+        logger.info(f"\n{'='*70}")
+        logger.info("Comparison Summary")
+        logger.info(f"{'='*70}")
+        logger.info(f"{'Backend':<15} {'Accuracy':<12} {'Avg Latency (ms)':<20}")
+        logger.info(f"{'-'*70}")
+        for backend, results in all_results.items():
+            if "error" not in results:
+                acc = results["accuracy"]
+                lat = results["latency_stats"]["mean_ms"]
+                logger.info(f"{backend:<15} {acc:<12.4f} {lat:<20.2f}")
+        logger.info(f"{'='*70}\n")
diff --git a/projects/CalibrationStatusClassification/deploy/exporters.py b/projects/CalibrationStatusClassification/deploy/exporters.py
deleted file mode 100644
index 6d5655ff..00000000
--- a/projects/CalibrationStatusClassification/deploy/exporters.py
+++ /dev/null
@@ -1,680 +0,0 @@
-"""
-Model export and inference verification utilities.
-"""
-
-import gc
-import logging
-import os
-import os.path as osp
-import pickle
-import time
-from typing import Any, Dict, Optional, Tuple
-
-import numpy as np
-import onnx
-import onnxruntime as ort
-import onnxsim
-import pycuda.autoinit
-import pycuda.driver as cuda
-import tensorrt as trt
-import torch
-from config import DEFAULT_VERIFICATION_TOLERANCE, LABELS, DeploymentConfig
-from mmengine.config import Config
-
-from autoware_ml.calibration_classification.datasets.transforms.calibration_classification_transform import (
-    CalibrationClassificationTransform,
-)
-
-# ============================================================================
-# Data Loading Utilities
-# ============================================================================
-
-
-def _load_info_pkl_file(info_pkl_path: str) -> list:
-    """
-    Load and parse info.pkl file.
-
-    Args:
-        info_pkl_path: Path to the info.pkl file
-
-    Returns:
-        List of samples from data_list
-
-    Raises:
-        FileNotFoundError: If info.pkl file doesn't exist
-        ValueError: If data format is unexpected
-    """
-    if not os.path.exists(info_pkl_path):
-        raise FileNotFoundError(f"Info.pkl file not found: {info_pkl_path}")
-
-    try:
-        with open(info_pkl_path, "rb") as f:
-            info_data = pickle.load(f)
-    except Exception as e:
-        raise ValueError(f"Failed to load info.pkl file: {e}")
-
-    # Extract samples from info.pkl
-    if isinstance(info_data, dict):
-        if "data_list" in info_data:
-            samples_list = info_data["data_list"]
-        else:
-            raise ValueError(f"Expected 'data_list' key in info_data, found keys: {list(info_data.keys())}")
-    else:
-        raise ValueError(f"Expected dict format, got {type(info_data)}")
-
-    if not samples_list:
-        raise ValueError("No samples found in info.pkl")
-
-    return samples_list
-
-
-def load_info_pkl_data(info_pkl_path: str, sample_idx: int = 0) -> Dict[str, Any]:
-    """
-    Load a single sample from info.pkl file.
-
-    Args:
-        info_pkl_path: Path to the info.pkl file
-        sample_idx: Index of the sample to load (default: 0)
-
-    Returns:
-        Sample dictionary with the required structure for CalibrationClassificationTransform
-
-    Raises:
-        FileNotFoundError: If info.pkl file doesn't exist
-        ValueError: If data format is unexpected or sample index is invalid
-    """
-    samples_list = _load_info_pkl_file(info_pkl_path)
-
-    if sample_idx >= len(samples_list):
-        raise ValueError(f"Sample index {sample_idx} out of range (0-{len(samples_list)-1})")
-
-    sample = samples_list[sample_idx]
-
-    # Validate sample structure
-    required_keys = ["image", "lidar_points"]
-    if not all(key in sample for key in required_keys):
-        raise ValueError(f"Sample {sample_idx} has invalid structure. Required keys: {required_keys}")
-
-    return sample
-
-
-def _create_transform_with_config(
-    model_cfg: Config,
-    miscalibration_probability: float = 0.0,
-    projection_vis_dir: Optional[str] = None,
-    results_vis_dir: Optional[str] = None,
-    binary_save_dir: Optional[str] = None,
-) -> CalibrationClassificationTransform:
-    """
-    Create CalibrationClassificationTransform with model configuration.
-
-    Args:
-        model_cfg: Model configuration
-        miscalibration_probability: Probability of miscalibration
-        projection_vis_dir: Optional projection visualization directory
-        results_vis_dir: Optional results visualization directory
-        binary_save_dir: Optional binary save directory
-
-    Returns:
-        Configured CalibrationClassificationTransform
-    """
-    data_root = model_cfg.get("data_root")
-    if data_root is None:
-        raise ValueError("data_root not found in model configuration")
-
-    transform_config = model_cfg.get("transform_config")
-    if transform_config is None:
-        raise ValueError("transform_config not found in model configuration")
-
-    return CalibrationClassificationTransform(
-        transform_config=transform_config,
-        mode="test",
-        max_depth=model_cfg.get("max_depth", 128.0),
-        dilation_size=model_cfg.get("dilation_size", 1),
-        undistort=True,
-        miscalibration_probability=miscalibration_probability,
-        enable_augmentation=False,
-        data_root=data_root,
-        projection_vis_dir=projection_vis_dir,
-        results_vis_dir=results_vis_dir,
-        binary_save_dir=binary_save_dir,
-    )
-
-
-def _numpy_to_tensor(numpy_array: np.ndarray, device: str = "cpu") -> torch.Tensor:
-    """
-    Convert numpy array (H, W, C) to tensor (1, C, H, W).
-
-    Args:
-        numpy_array: Input numpy array with shape (H, W, C)
-        device: Device to load tensor on
-
-    Returns:
-        Tensor with shape (1, C, H, W)
-    """
-    tensor = torch.from_numpy(numpy_array).permute(2, 0, 1).float()  # (C, H, W)
-    return tensor.unsqueeze(0).to(device)  # (1, C, H, W)
-
-
-def _clear_gpu_memory() -> None:
-    """Clear GPU cache and run garbage collection."""
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-    gc.collect()
-
-
-def load_sample_data_from_info_pkl(
-    info_pkl_path: str,
-    model_cfg: Config,
-    miscalibration_probability: float,
-    sample_idx: int = 0,
-    device: str = "cpu",
-) -> torch.Tensor:
-    """
-    Load and preprocess sample data from info.pkl using CalibrationClassificationTransform.
-
-    Args:
-        info_pkl_path: Path to the info.pkl file
-        model_cfg: Model configuration containing data_root setting
-        miscalibration_probability: Probability of loading a miscalibrated sample
-        sample_idx: Index of the sample to load (default: 0)
-        device: Device to load tensor on
-
-    Returns:
-        Preprocessed tensor ready for model inference
-    """
-    # Load sample data from info.pkl
-    sample_data = load_info_pkl_data(info_pkl_path, sample_idx)
-
-    # Create transform
-    transform = _create_transform_with_config(
-        model_cfg=model_cfg,
-        miscalibration_probability=miscalibration_probability,
-        projection_vis_dir=model_cfg.get("test_projection_vis_dir", None),
-        results_vis_dir=model_cfg.get("test_results_vis_dir", None),
-        binary_save_dir=model_cfg.get("binary_save_dir", None),
-    )
-
-    # Apply transform
-    results = transform.transform(sample_data)
-    input_data_processed = results["fused_img"]  # (H, W, 5)
-
-    # Convert to tensor
-    return _numpy_to_tensor(input_data_processed, device)
-
-
-# ============================================================================
-# Model Export Functions
-# ============================================================================
-
-
-def export_to_onnx(
-    model: torch.nn.Module,
-    input_tensor: torch.Tensor,
-    output_path: str,
-    config: DeploymentConfig,
-    logger: logging.Logger,
-) -> None:
-    """Export model to ONNX format."""
-    settings = config.onnx_settings
-    model.eval()
-
-    logger.info("Exporting model to ONNX format...")
-    logger.info(f"Input shape: {input_tensor.shape}")
-    logger.info(f"Output path: {output_path}")
-    logger.info(f"ONNX opset version: {settings['opset_version']}")
-
-    with torch.no_grad():
-        torch.onnx.export(
-            model,
-            input_tensor,
-            output_path,
-            export_params=settings["export_params"],
-            keep_initializers_as_inputs=settings["keep_initializers_as_inputs"],
-            opset_version=settings["opset_version"],
-            do_constant_folding=settings["do_constant_folding"],
-            input_names=settings["input_names"],
-            output_names=settings["output_names"],
-            dynamic_axes=settings["dynamic_axes"],
-            verbose=False,
-        )
-
-    logger.info(f"ONNX export completed: {output_path}")
-
-    # Optional model simplification
-    _optimize_onnx_model(output_path, logger)
-
-
-def _optimize_onnx_model(onnx_path: str, logger: logging.Logger) -> None:
-    """Optimize ONNX model using onnxsim."""
-    logger.info("Simplifying ONNX model...")
-    model_simplified, success = onnxsim.simplify(onnx_path)
-    if success:
-        onnx.save(model_simplified, onnx_path)
-        logger.info(f"ONNX model simplified successfully. Saved to {onnx_path}")
-    else:
-        logger.warning("ONNX model simplification failed")
-
-
-def export_to_tensorrt(
-    onnx_path: str, output_path: str, input_tensor: torch.Tensor, config: DeploymentConfig, logger: logging.Logger
-) -> bool:
-    """Export ONNX model to a TensorRT engine with precision policy support."""
-    settings = config.tensorrt_settings
-    precision_policy = settings["precision_policy"]
-    policy_flags = settings["policy_flags"]
-
-    logger.info(f"Building TensorRT engine with precision policy: {precision_policy}")
-
-    # Initialize TensorRT
-    trt_logger = trt.Logger(trt.Logger.WARNING)
-    trt.init_libnvinfer_plugins(trt_logger, "")
-
-    builder = trt.Builder(trt_logger)
-    builder_config = builder.create_builder_config()
-    builder_config.set_memory_pool_limit(pool=trt.MemoryPoolType.WORKSPACE, pool_size=settings["max_workspace_size"])
-
-    # Create network with appropriate flags
-    flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-
-    # Handle strongly typed flag (network creation flag, not builder flag)
-    if policy_flags.get("STRONGLY_TYPED"):
-        flags |= 1 << int(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
-        logger.info("Using strongly typed TensorRT network creation")
-
-    network = builder.create_network(flags)
-
-    # Apply precision flags to builder config
-    for flag_name, enabled in policy_flags.items():
-        if flag_name == "STRONGLY_TYPED":
-            continue  # Already handled as network creation flag above
-        if enabled and hasattr(trt.BuilderFlag, flag_name):
-            builder_config.set_flag(getattr(trt.BuilderFlag, flag_name))
-            logger.info(f"BuilderFlag.{flag_name} enabled")
-
-    # Setup optimization profile
-    profile = builder.create_optimization_profile()
-    _configure_input_shapes(profile, input_tensor, settings["model_inputs"], logger)
-    builder_config.add_optimization_profile(profile)
-
-    # Parse ONNX model
-    parser = trt.OnnxParser(network, trt_logger)
-    with open(onnx_path, "rb") as f:
-        if not parser.parse(f.read()):
-            _log_parser_errors(parser, logger)
-            return False
-        logger.info("Successfully parsed the ONNX file")
-
-    # Build engine
-    logger.info("Building TensorRT engine...")
-    serialized_engine = builder.build_serialized_network(network, builder_config)
-
-    if serialized_engine is None:
-        logger.error("Failed to build TensorRT engine")
-        return False
-
-    # Save engine
-    with open(output_path, "wb") as f:
-        f.write(serialized_engine)
-
-    logger.info(f"TensorRT engine saved to {output_path}")
-    logger.info(f"Engine max workspace size: {settings['max_workspace_size'] / (1024**3):.2f} GB")
-
-    return True
-
-
-def _configure_input_shapes(profile, input_tensor: torch.Tensor, model_inputs: list, logger: logging.Logger) -> None:
-    """Configure input shapes for TensorRT optimization profile."""
-    if model_inputs:
-        input_shapes = model_inputs[0].get("input_shapes", {})
-        for input_name, shapes in input_shapes.items():
-            min_shape = shapes.get("min_shape", list(input_tensor.shape))
-            opt_shape = shapes.get("opt_shape", list(input_tensor.shape))
-            max_shape = shapes.get("max_shape", list(input_tensor.shape))
-
-            logger.info(f"Setting input shapes - min: {min_shape}, opt: {opt_shape}, max: {max_shape}")
-            profile.set_shape(input_name, min_shape, opt_shape, max_shape)
-    else:
-        # Default shapes based on input tensor
-        input_shape = list(input_tensor.shape)
-        logger.info(f"Using default input shape: {input_shape}")
-        profile.set_shape("input", input_shape, input_shape, input_shape)
-
-
-def _log_parser_errors(parser, logger: logging.Logger) -> None:
-    """Log TensorRT parser errors."""
-    logger.error("Failed to parse ONNX model")
-    for error in range(parser.num_errors):
-        logger.error(f"Parser error: {parser.get_error(error)}")
-
-
-# ============================================================================
-# Inference and Verification Functions
-# ============================================================================
-
-
-def run_pytorch_inference(
-    model: torch.nn.Module, input_tensor: torch.Tensor, logger: logging.Logger
-) -> Tuple[torch.Tensor, float]:
-    """Run PyTorch inference on CPU for verification and return output with latency."""
-    # Move to CPU to avoid GPU memory issues
-    model_cpu = model.cpu()
-    input_cpu = input_tensor.cpu()
-
-    model_cpu.eval()
-    with torch.no_grad():
-        # Measure inference time
-        start_time = time.perf_counter()
-        output = model_cpu(input_cpu)
-        end_time = time.perf_counter()
-
-        latency = (end_time - start_time) * 1000  # Convert to milliseconds
-
-        # Handle different output formats
-        if hasattr(output, "output"):
-            output = output.output
-        elif isinstance(output, dict) and "output" in output:
-            output = output["output"]
-
-        if not isinstance(output, torch.Tensor):
-            raise ValueError(f"Unexpected PyTorch output type: {type(output)}")
-
-    logger.info(f"PyTorch inference latency: {latency:.2f} ms")
-    logger.info(f"Output verification:")
-    logger.info(f"  Output: {output.cpu().numpy()}")
-    return output, latency
-
-
-def run_onnx_inference(
-    onnx_path: str,
-    input_tensor: torch.Tensor,
-    ref_output: torch.Tensor,
-    logger: logging.Logger,
-) -> bool:
-    """Verify ONNX model output against PyTorch model."""
-    # Clear GPU cache
-    _clear_gpu_memory()
-
-    # ONNX inference with timing
-    providers = ["CPUExecutionProvider"]
-    ort_session = ort.InferenceSession(onnx_path, providers=providers)
-    onnx_input = {ort_session.get_inputs()[0].name: input_tensor.cpu().numpy()}
-
-    start_time = time.perf_counter()
-    onnx_output = ort_session.run(None, onnx_input)[0]
-    end_time = time.perf_counter()
-    onnx_latency = (end_time - start_time) * 1000
-
-    logger.info(f"ONNX inference latency: {onnx_latency:.2f} ms")
-
-    # Ensure onnx_output is numpy array before comparison
-    if not isinstance(onnx_output, np.ndarray):
-        logger.error(f"Unexpected ONNX output type: {type(onnx_output)}")
-        return False
-
-    # Compare outputs
-    return _compare_outputs(ref_output.cpu().numpy(), onnx_output, "ONNX", logger)
-
-
-def run_tensorrt_inference(
-    tensorrt_path: str,
-    input_tensor: torch.Tensor,
-    ref_output: torch.Tensor,
-    logger: logging.Logger,
-) -> bool:
-    """Verify TensorRT model output against PyTorch model."""
-    # Clear GPU cache
-    _clear_gpu_memory()
-
-    # Load TensorRT engine
-    trt_logger = trt.Logger(trt.Logger.WARNING)
-    trt.init_libnvinfer_plugins(trt_logger, "")
-    runtime = trt.Runtime(trt_logger)
-
-    with open(tensorrt_path, "rb") as f:
-        engine = runtime.deserialize_cuda_engine(f.read())
-
-    if engine is None:
-        logger.error("Failed to deserialize TensorRT engine")
-        return False
-
-    # Run TensorRT inference with timing
-    trt_output, latency = _run_tensorrt_inference(engine, input_tensor.cpu(), logger)
-    logger.info(f"TensorRT inference latency: {latency:.2f} ms")
-
-    # Compare outputs
-    return _compare_outputs(ref_output.cpu().numpy(), trt_output, "TensorRT", logger)
-
-
-def _run_tensorrt_inference(engine, input_tensor: torch.Tensor, logger: logging.Logger) -> Tuple[np.ndarray, float]:
-    """Run TensorRT inference and return output with timing."""
-    context = engine.create_execution_context()
-    stream = cuda.Stream()
-    start = cuda.Event()
-    end = cuda.Event()
-
-    # Get tensor names and shapes
-    input_name, output_name = None, None
-    for i in range(engine.num_io_tensors):
-        tensor_name = engine.get_tensor_name(i)
-        if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
-            input_name = tensor_name
-        elif engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.OUTPUT:
-            output_name = tensor_name
-
-    if input_name is None or output_name is None:
-        raise RuntimeError("Could not find input/output tensor names")
-
-    # Prepare arrays
-    input_np = input_tensor.numpy().astype(np.float32)
-    if not input_np.flags["C_CONTIGUOUS"]:
-        input_np = np.ascontiguousarray(input_np)
-
-    context.set_input_shape(input_name, input_np.shape)
-    output_shape = context.get_tensor_shape(output_name)
-    output_np = np.empty(output_shape, dtype=np.float32)
-    if not output_np.flags["C_CONTIGUOUS"]:
-        output_np = np.ascontiguousarray(output_np)
-
-    # Allocate GPU memory
-    d_input = cuda.mem_alloc(input_np.nbytes)
-    d_output = cuda.mem_alloc(output_np.nbytes)
-
-    try:
-        # Set tensor addresses
-        context.set_tensor_address(input_name, int(d_input))
-        context.set_tensor_address(output_name, int(d_output))
-
-        # Run inference with timing
-        cuda.memcpy_htod_async(d_input, input_np, stream)
-        start.record(stream)
-        context.execute_async_v3(stream_handle=stream.handle)
-        end.record(stream)
-        cuda.memcpy_dtoh_async(output_np, d_output, stream)
-        stream.synchronize()
-
-        latency = end.time_since(start)
-        return output_np, latency
-
-    finally:
-        # Cleanup
-        try:
-            d_input.free()
-            d_output.free()
-        except:
-            pass
-
-
-def _compare_outputs(
-    pytorch_output: np.ndarray, backend_output: np.ndarray, backend_name: str, logger: logging.Logger
-) -> bool:
-    """Compare outputs between PyTorch and backend."""
-    if not isinstance(backend_output, np.ndarray):
-        logger.error(f"Unexpected {backend_name} output type: {type(backend_output)}")
-        return False
-
-    max_diff = np.abs(pytorch_output - backend_output).max()
-    mean_diff = np.abs(pytorch_output - backend_output).mean()
-
-    logger.info(f"Output verification:")
-    logger.info(f"  {backend_name} output: {backend_output}")
-    logger.info(f"  Max difference with PyTorch: {max_diff:.6f}")
-    logger.info(f"  Mean difference with PyTorch: {mean_diff:.6f}")
-
-    success = max_diff < DEFAULT_VERIFICATION_TOLERANCE
-    if not success:
-        logger.warning(f"Large difference detected: {max_diff:.6f}")
-
-    return success
-
-
-def run_verification(
-    model: torch.nn.Module,
-    onnx_path: str,
-    trt_path: Optional[str],
-    input_tensors: Dict[str, torch.Tensor],
-    logger: logging.Logger,
-) -> None:
-    """Run model verification for available backends."""
-    _clear_gpu_memory()
-
-    for key, input_tensor in input_tensors.items():
-        logger.info("=" * 50)
-        logger.info(f"Verifying {LABELS[key]} sample...")
-        logger.info("-" * 50)
-        logger.info("Verifying PyTorch model...")
-        pytorch_output, pytorch_latency = run_pytorch_inference(model, input_tensor, logger)
-        logger.info(
-            f"PyTorch output for {LABELS[key]}: [SCORE_MISCALIBRATED, SCORE_CALIBRATED] = {pytorch_output.cpu().numpy()}"
-        )
-        score_calibrated = pytorch_output.cpu().numpy()[0, 1] - pytorch_output.cpu().numpy()[0, 0]
-        if key == "0" and score_calibrated < 0:
-            logger.info(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
-        elif key == "0" and score_calibrated > 0:
-            logger.warning(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
-        elif key == "1" and score_calibrated > 0:
-            logger.info(f"Positive calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
-        elif key == "1" and score_calibrated < 0:
-            logger.warning(f"Negative calibration score detected for {LABELS[key]} sample: {score_calibrated:.6f}")
-
-        if onnx_path and osp.exists(onnx_path):
-            logger.info("-" * 50)
-            logger.info("Verifying ONNX model...")
-            if run_onnx_inference(onnx_path, input_tensor, pytorch_output, logger):
-                logger.info("ONNX model verification passed!")
-            else:
-                logger.error("ONNX model verification failed!")
-
-        if trt_path and osp.exists(trt_path):
-            logger.info("-" * 50)
-            logger.info("Verifying TensorRT model...")
-            if run_tensorrt_inference(trt_path, input_tensor, pytorch_output, logger):
-                logger.info("TensorRT model verification passed!")
-            else:
-                logger.error("TensorRT model verification failed!")
-    logger.info("=" * 50)
-
-
-# ============================================================================
-# Export Pipeline Helper Functions
-# ============================================================================
-
-
-def validate_and_prepare_paths(
-    config: DeploymentConfig, work_dir: str, existing_onnx: Optional[str], logger: logging.Logger
-) -> Tuple[Optional[str], Optional[str]]:
-    """Determine and validate export paths for ONNX and TensorRT models.
-
-    Returns:
-        Tuple of (onnx_path, trt_path)
-    """
-    onnx_path = None
-    trt_path = None
-
-    if config.should_export_onnx:
-        onnx_settings = config.onnx_settings
-        onnx_path = osp.join(work_dir, onnx_settings["save_file"])
-
-    if config.should_export_tensorrt:
-        # Use existing ONNX if provided, otherwise use the one we'll export
-        if existing_onnx and not config.should_export_onnx:
-            onnx_path = existing_onnx
-            if not osp.exists(onnx_path):
-                logger.error(f"Provided ONNX file does not exist: {onnx_path}")
-                return None, None
-            logger.info(f"Using existing ONNX file: {onnx_path}")
-        elif not onnx_path:
-            # Need ONNX for TensorRT but neither export nor existing file specified
-            logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.")
-            return None, None
-
-        # Set TensorRT output path
-        onnx_settings = config.onnx_settings
-        trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
-        trt_path = osp.join(work_dir, trt_file)
-
-    return onnx_path, trt_path
-
-
-def export_models(
-    model: torch.nn.Module,
-    config: DeploymentConfig,
-    onnx_path: Optional[str],
-    trt_path: Optional[str],
-    input_tensor_calibrated: torch.Tensor,
-    device: torch.device,
-    logger: logging.Logger,
-) -> Tuple[bool, torch.device]:
-    """Export models to ONNX and/or TensorRT formats.
-
-    Returns:
-        Tuple of (success, updated_device)
-    """
-    # Export ONNX
-    if config.should_export_onnx and onnx_path:
-        export_to_onnx(model, input_tensor_calibrated, onnx_path, config, logger)
-
-    # Export TensorRT
-    if config.should_export_tensorrt and trt_path and onnx_path:
-        logger.info("Converting ONNX to TensorRT...")
-
-        # Ensure CUDA device for TensorRT
-        if device.type != "cuda":
-            logger.warning("TensorRT requires CUDA device, switching to cuda")
-            device = torch.device("cuda")
-
-        success = export_to_tensorrt(onnx_path, trt_path, input_tensor_calibrated, config, logger)
-        if success:
-            logger.info(f"TensorRT conversion successful: {trt_path}")
-        else:
-            logger.error("TensorRT conversion failed")
-            return False, device
-
-    return True, device
-
-
-def run_model_verification(
-    model: torch.nn.Module,
-    config: DeploymentConfig,
-    onnx_path: Optional[str],
-    trt_path: Optional[str],
-    input_tensor_calibrated: torch.Tensor,
-    input_tensor_miscalibrated: torch.Tensor,
-    existing_onnx: Optional[str],
-    logger: logging.Logger,
-) -> None:
-    """Run model verification for exported formats."""
-    if not config.should_verify:
-        return
-
-    logger.info(
-        "Running verification for miscalibrated and calibrated samples with "
-        "an output array [SCORE_MISCALIBRATED, SCORE_CALIBRATED]..."
-    )
-    input_tensors = {"0": input_tensor_miscalibrated, "1": input_tensor_calibrated}
-
-    # Only verify formats that were exported or provided
-    onnx_path_for_verify = onnx_path if (config.should_export_onnx or existing_onnx) else None
-    trt_path_for_verify = trt_path if config.should_export_tensorrt else None
-
-    run_verification(model, onnx_path_for_verify, trt_path_for_verify, input_tensors, logger)
diff --git a/projects/CalibrationStatusClassification/deploy/main.py b/projects/CalibrationStatusClassification/deploy/main.py
index 918bafe7..655d0be5 100644
--- a/projects/CalibrationStatusClassification/deploy/main.py
+++ b/projects/CalibrationStatusClassification/deploy/main.py
@@ -1,58 +1,70 @@
 """
-CalibrationStatusClassification Model Deployment Script
+CalibrationStatusClassification Model Deployment Script (Refactored)
 
-This script exports CalibrationStatusClassification models to ONNX and TensorRT formats,
-with comprehensive verification and performance benchmarking.
+This script exports CalibrationStatusClassification models using the unified
+deployment framework, with comprehensive verification and performance benchmarking.
 
 Features:
 - ONNX export with optimization
 - TensorRT conversion with precision policy support
-- Dual verification (ONNX + TensorRT) on single samples
-- Full model evaluation on multiple samples with metrics
+- Unified verification across backends
+- Full model evaluation with metrics
 - Performance benchmarking with latency statistics
 - Confusion matrix and per-class accuracy analysis
 """
 
+import logging
 import os.path as osp
 
 import mmengine
 import torch
-from config import DeploymentConfig, parse_args, setup_logging
-from evaluator import get_models_to_evaluate, run_full_evaluation
-from exporters import (
-    export_models,
-    load_sample_data_from_info_pkl,
-    run_model_verification,
-    validate_and_prepare_paths,
-)
 from mmengine.config import Config
 from mmpretrain.apis import get_model
 
+from autoware_ml.deployment.core import BaseDeploymentConfig, parse_base_args, setup_logging, verify_model_outputs
+from autoware_ml.deployment.exporters import ONNXExporter, TensorRTExporter
+from projects.CalibrationStatusClassification.deploy.data_loader import CalibrationDataLoader
+from projects.CalibrationStatusClassification.deploy.evaluator import (
+    ClassificationEvaluator,
+    get_models_to_evaluate,
+    run_full_evaluation,
+)
+
+
+class CalibrationDeploymentConfig(BaseDeploymentConfig):
+    """Extended configuration for CalibrationStatusClassification deployment."""
+
+    def __init__(self, deploy_cfg: Config):
+        super().__init__(deploy_cfg)
+        # CalibrationStatus-specific config can be added here if needed
+
 
 def main():
     """Main deployment function."""
-    args = parse_args()
+    # Parse arguments
+    parser = parse_base_args()
+    args = parser.parse_args()
     logger = setup_logging(args.log_level)
 
     # Load configurations
     logger.info(f"Loading deploy config from: {args.deploy_cfg}")
     deploy_cfg = Config.fromfile(args.deploy_cfg)
-    config = DeploymentConfig(deploy_cfg)
+    config = CalibrationDeploymentConfig(deploy_cfg)
 
     logger.info(f"Loading model config from: {args.model_cfg}")
     model_cfg = Config.fromfile(args.model_cfg)
 
-    # Extract runtime configuration
-    work_dir = config.work_dir
-    device = config.device
-    info_pkl = config.runtime_io_config.get("info_pkl")
-    sample_idx = config.runtime_io_config.get("sample_idx", 0)
-    existing_onnx = config.runtime_io_config.get("onnx_file")
-    export_mode = config.export_config.get("mode", "both")
+    # Get configuration
+    work_dir = args.work_dir or config.export_config.work_dir
+    device = args.device or config.export_config.device
+    info_pkl = config.runtime_config.get("info_pkl")
+    sample_idx = config.runtime_config.get("sample_idx", 0)
+    existing_onnx = config.runtime_config.get("onnx_file")
+    export_mode = config.export_config.mode
 
     # Validate required parameters
     if not info_pkl:
-        logger.error("info_pkl path must be provided either in config or via --info-pkl")
+        logger.error("info_pkl path must be provided in config")
         return
 
     # Setup working directory
@@ -69,8 +81,7 @@ def main():
         eval_enabled = config.evaluation_config.get("enabled", False)
         if not eval_enabled:
             logger.error(
-                "Configuration error: export mode is 'none' (evaluation-only mode) "
-                "but evaluation.enabled is False. "
+                "Configuration error: export mode is 'none' but evaluation.enabled is False. "
                 "Please set evaluation.enabled = True in your config."
             )
             return
@@ -82,58 +93,126 @@ def main():
 
     # Export phase
     if not is_eval_only:
+        logger.info(f"\n{'='*70}")
+        logger.info("Starting model export...")
+        logger.info(f"{'='*70}\n")
+
         # Determine export paths
-        onnx_path, trt_path = validate_and_prepare_paths(config, work_dir, existing_onnx, logger)
-        if onnx_path is None and trt_path is None:
-            return
+        onnx_path = None
+        trt_path = None
+
+        onnx_settings = config.get_onnx_settings()
+
+        if config.export_config.should_export_onnx():
+            onnx_path = osp.join(work_dir, onnx_settings["save_file"])
+
+        if config.export_config.should_export_tensorrt():
+            if existing_onnx and not config.export_config.should_export_onnx():
+                onnx_path = existing_onnx
+                if not osp.exists(onnx_path):
+                    logger.error(f"Provided ONNX file does not exist: {onnx_path}")
+                    return
+            elif not onnx_path:
+                logger.error("TensorRT export requires ONNX file. Set mode='both' or provide onnx_file in config.")
+                return
+
+            trt_file = onnx_settings["save_file"].replace(".onnx", ".engine")
+            trt_path = osp.join(work_dir, trt_file)
 
         # Load model
         logger.info(f"Loading model from checkpoint: {args.checkpoint}")
-        device = torch.device(device)
-        model = get_model(model_cfg, args.checkpoint, device=device)
+        torch_device = torch.device(device)
+        model = get_model(model_cfg, args.checkpoint, device=torch_device)
 
-        # Load sample data
+        # Create data loaders for calibrated and miscalibrated samples
         logger.info(f"Loading sample data from info.pkl: {info_pkl}")
-        input_tensor_calibrated = load_sample_data_from_info_pkl(info_pkl, model_cfg, 0.0, sample_idx, device=device)
-        input_tensor_miscalibrated = load_sample_data_from_info_pkl(
-            info_pkl, model_cfg, 1.0, sample_idx, device=device
+        data_loader_calibrated = CalibrationDataLoader(
+            info_pkl_path=info_pkl,
+            model_cfg=model_cfg,
+            miscalibration_probability=0.0,
+            device=device,
+        )
+        data_loader_miscalibrated = CalibrationDataLoader(
+            info_pkl_path=info_pkl,
+            model_cfg=model_cfg,
+            miscalibration_probability=1.0,
+            device=device,
         )
 
-        # Export models
-        success, device = export_models(model, config, onnx_path, trt_path, input_tensor_calibrated, device, logger)
-        if not success:
-            logger.error("Export failed")
-            return
+        # Load sample inputs
+        input_tensor_calibrated = data_loader_calibrated.load_and_preprocess(sample_idx)
+        input_tensor_miscalibrated = data_loader_miscalibrated.load_and_preprocess(sample_idx)
+
+        # Export ONNX
+        if config.export_config.should_export_onnx() and onnx_path:
+            logger.info(f"\nExporting to ONNX...")
+            onnx_exporter = ONNXExporter(onnx_settings, logger)
+            success = onnx_exporter.export(model, input_tensor_calibrated, onnx_path)
+            if success:
+                logger.info(f"✓ ONNX export successful: {onnx_path}")
+                onnx_exporter.validate_export(onnx_path)
+            else:
+                logger.error("✗ ONNX export failed")
+                return
+
+        # Export TensorRT
+        if config.export_config.should_export_tensorrt() and trt_path and onnx_path:
+            logger.info(f"\nExporting to TensorRT...")
+            trt_settings = config.get_tensorrt_settings()
+            trt_exporter = TensorRTExporter(trt_settings, logger)
+            success = trt_exporter.export(model, input_tensor_calibrated, trt_path, onnx_path=onnx_path)
+            if success:
+                logger.info(f"✓ TensorRT export successful: {trt_path}")
+            else:
+                logger.error("✗ TensorRT export failed")
+                return
+
+        # Run verification if requested
+        if config.export_config.verify:
+            logger.info(f"\n{'='*70}")
+            logger.info("Running model verification...")
+            logger.info(f"{'='*70}\n")
+
+            test_inputs = {
+                "miscalibrated_sample": input_tensor_miscalibrated,
+                "calibrated_sample": input_tensor_calibrated,
+            }
+
+            verify_onnx = (
+                onnx_path if config.export_config.should_export_onnx() else (existing_onnx if existing_onnx else None)
+            )
+            verify_trt = trt_path if config.export_config.should_export_tensorrt() else None
+
+            verification_results = verify_model_outputs(
+                pytorch_model=model,
+                test_inputs=test_inputs,
+                onnx_path=verify_onnx,
+                tensorrt_path=verify_trt,
+                device=device,
+                logger=logger,
+            )
 
-        # Update tensors if device changed
-        if device.type == "cuda":
-            input_tensor_calibrated = input_tensor_calibrated.to(device)
-            input_tensor_miscalibrated = input_tensor_miscalibrated.to(device)
-
-        # Run verification
-        run_model_verification(
-            model,
-            config,
-            onnx_path,
-            trt_path,
-            input_tensor_calibrated,
-            input_tensor_miscalibrated,
-            existing_onnx,
-            logger,
-        )
+            # Check if all verifications passed
+            all_passed = all(verification_results.values())
+            if all_passed:
+                logger.info("✓ All verifications PASSED")
+            else:
+                logger.warning("⚠ Some verifications FAILED")
 
         # Log exported formats
         exported_formats = []
-        if config.should_export_onnx:
+        if config.export_config.should_export_onnx():
             exported_formats.append("ONNX")
-        if config.should_export_tensorrt:
+        if config.export_config.should_export_tensorrt():
             exported_formats.append("TensorRT")
         if exported_formats:
-            logger.info(f"Exported formats: {', '.join(exported_formats)}")
+            logger.info(f"\nExported formats: {', '.join(exported_formats)}")
 
+        logger.info(f"\n{'='*70}")
         logger.info("Deployment completed successfully!")
+        logger.info(f"{'='*70}\n")
     else:
-        logger.info("Evaluation-only mode: Skipping model loading and export")
+        logger.info("Evaluation-only mode: Skipping model loading and export\n")
 
     # Evaluation phase
     eval_cfg = config.evaluation_config
@@ -142,20 +221,24 @@ def main():
     verbose_mode = eval_cfg.get("verbose", False)
 
     if should_evaluate:
-        logger.info(f"\n{'='*60}")
+        logger.info(f"\n{'='*70}")
         logger.info("Starting full model evaluation...")
-        logger.info(f"{'='*60}")
+        logger.info(f"{'='*70}\n")
 
         models_to_evaluate = get_models_to_evaluate(eval_cfg, logger)
-        run_full_evaluation(
-            models_to_evaluate,
-            model_cfg,
-            info_pkl,
-            device,
-            num_samples,
-            verbose_mode,
-            logger,
-        )
+
+        if models_to_evaluate:
+            run_full_evaluation(
+                models_to_evaluate,
+                model_cfg,
+                info_pkl,
+                device,
+                num_samples,
+                verbose_mode,
+                logger,
+            )
+        else:
+            logger.warning("No models found for evaluation")
 
 
 if __name__ == "__main__":
diff --git a/tools/calibration_classification/README.md b/tools/calibration_classification/README.md
index f170e90d..d80f8480 100644
--- a/tools/calibration_classification/README.md
+++ b/tools/calibration_classification/README.md
@@ -266,8 +266,10 @@ evaluation = dict(
     enabled=False,                     # Enable evaluation by default
     num_samples=100,                    # Number of samples to evaluate
     verbose=False,                     # Enable detailed logging
-    onnx_path=None,                    # Optional: path to existing ONNX model
-    tensorrt_path=None,                # Optional: path to existing TensorRT engine
+    models=dict(
+        onnx=None,                    # Optional: path to existing ONNX model
+        tensorrt=None,                # Optional: path to existing TensorRT engine
+    ),
 )
 ```
 

From f6f0942e42160c7ebb59fcb48764cee6ca8341d6 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 8 Oct 2025 14:35:41 +0900
Subject: [PATCH 18/19] fix: fallback to cpu if cuda fail due to gpu issue

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 .../deployment/backends/onnx_backend.py       | 43 ++++++++++++++++---
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/autoware_ml/deployment/backends/onnx_backend.py b/autoware_ml/deployment/backends/onnx_backend.py
index 3fa51d82..f8e0ed37 100644
--- a/autoware_ml/deployment/backends/onnx_backend.py
+++ b/autoware_ml/deployment/backends/onnx_backend.py
@@ -1,5 +1,6 @@
 """ONNX Runtime inference backend."""
 
+import logging
 import os
 import time
 from typing import Tuple
@@ -28,6 +29,8 @@ def __init__(self, model_path: str, device: str = "cpu"):
         """
         super().__init__(model_path, device)
         self._session = None
+        self._fallback_attempted = False
+        self._logger = logging.getLogger(__name__)
 
     def load_model(self) -> None:
         """Load ONNX model."""
@@ -37,12 +40,15 @@ def load_model(self) -> None:
         # Select execution provider based on device
         if self.device.startswith("cuda"):
             providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+            self._logger.info("Attempting to use CUDA acceleration (will fallback to CPU if needed)...")
         else:
             providers = ["CPUExecutionProvider"]
+            self._logger.info("Using CPU for ONNX inference")
 
         try:
             self._session = ort.InferenceSession(self.model_path, providers=providers)
             self._model = self._session  # For is_loaded check
+            self._logger.info(f"ONNX session using providers: {self._session.get_providers()}")
         except Exception as e:
             raise RuntimeError(f"Failed to load ONNX model: {e}")
 
@@ -65,16 +71,41 @@ def infer(self, input_tensor: torch.Tensor) -> Tuple[np.ndarray, float]:
         input_name = self._session.get_inputs()[0].name
         onnx_input = {input_name: input_array}
 
-        # Run inference
-        start_time = time.perf_counter()
-        output = self._session.run(None, onnx_input)[0]
-        end_time = time.perf_counter()
+        try:
+            # Run inference
+            start_time = time.perf_counter()
+            output = self._session.run(None, onnx_input)[0]
+            end_time = time.perf_counter()
+
+            latency_ms = (end_time - start_time) * 1000
+
+            return output, latency_ms
+        except Exception as e:
+            # Check if this is a CUDA/PTX error and we haven't tried CPU fallback yet
+            if ("PTX" in str(e) or "CUDA" in str(e)) and not self._fallback_attempted:
+                self._logger.warning(f"CUDA runtime error detected: {e}")
+                self._logger.warning("Recreating session with CPU provider...")
+                self._fallback_attempted = True
+
+                # Recreate session with CPU provider
+                self._session = ort.InferenceSession(self.model_path, providers=["CPUExecutionProvider"])
+                self._logger.info(f"Session recreated with providers: {self._session.get_providers()}")
+
+                # Retry inference with CPU
+                input_name = self._session.get_inputs()[0].name
+                onnx_input = {input_name: input_array}
+                start_time = time.perf_counter()
+                output = self._session.run(None, onnx_input)[0]
+                end_time = time.perf_counter()
 
-        latency_ms = (end_time - start_time) * 1000
+                latency_ms = (end_time - start_time) * 1000
 
-        return output, latency_ms
+                return output, latency_ms
+            else:
+                raise
 
     def cleanup(self) -> None:
         """Clean up ONNX Runtime resources."""
         self._session = None
         self._model = None
+        self._fallback_attempted = False

From c977363f6f0158b156cd8a9fc44d6f1a38914791 Mon Sep 17 00:00:00 2001
From: vividf <yihsiang.fang@tier4.jp>
Date: Wed, 8 Oct 2025 17:21:45 +0900
Subject: [PATCH 19/19] chore: add deployment readme

Signed-off-by: vividf <yihsiang.fang@tier4.jp>
---
 autoware_ml/deployment/README.md | 352 +++++++++++++++++++++++++++++++
 1 file changed, 352 insertions(+)
 create mode 100644 autoware_ml/deployment/README.md

diff --git a/autoware_ml/deployment/README.md b/autoware_ml/deployment/README.md
new file mode 100644
index 00000000..ddfe7936
--- /dev/null
+++ b/autoware_ml/deployment/README.md
@@ -0,0 +1,352 @@
+# Autoware ML Deployment Framework
+
+A unified, task-agnostic deployment framework for exporting, verifying, and evaluating machine learning models across different backends (ONNX, TensorRT) with comprehensive support for model validation and performance benchmarking.
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Features](#features)
+- [Current Support](#current-support)
+- [Architecture](#architecture)
+- [Quick Start](#quick-start)
+- [Usage Guide](#usage-guide)
+  - [Basic Export](#basic-export)
+  - [Export with Verification](#export-with-verification)
+  - [Export with Full Evaluation](#export-with-full-evaluation)
+  - [Evaluation Only Mode](#evaluation-only-mode)
+- [Configuration Reference](#configuration-reference)
+
+## Overview
+
+The Autoware ML Deployment Framework provides a standardized pipeline for deploying trained models to production-ready inference backends. It handles the complete deployment workflow from model export to validation and performance analysis, with a focus on ensuring model quality and correctness across different deployment targets.
+
+### Key Capabilities
+
+- **Multi-Backend Export**: Export models to ONNX and TensorRT formats
+- **Precision Policy Support**: Flexible precision policies (FP32, FP16, TF32, INT8)
+- **Automated Verification**: Cross-backend output validation to ensure correctness
+- **Performance Benchmarking**: Comprehensive latency and throughput analysis
+- **Full Evaluation**: Complete model evaluation with metrics and confusion matrices
+- **Modular Design**: Easy to extend for new tasks and backends
+
+
+
+## Current Support
+
+### Detection 3D
+* [ ] BEVFusion
+* [ ] CenterPoint
+* [ ] TransFusion
+* [ ] StreamPETR
+
+### Detection 2D
+* [ ] YOLOX
+* [ ] YOLOX_opt (Traffic Light Detection)
+* [ ] FRNet
+* [ ] GLIP (Grounded Language-Image Pre-training)
+
+### Classification
+* [X] CalibrationStatusClassification
+* [ ] MobileNetv2 (Traffic Light Classification)
+
+### Backbones & Components
+* [ ] SwinTransformer
+* [ ] ConvNeXt_PC (Point Cloud)
+* [ ] SparseConvolution
+
+### Multimodal
+* [ ] BLIP-2 (Vision-Language Model)
+
+> **Note**: Currently, only **CalibrationStatusClassification** has full deployment framework support. Other models may have custom deployment scripts in their respective project directories but are not yet integrated with the unified deployment framework.
+
+### Supported Backends
+
+| Backend | Export | Inference | Verification |
+|---------|--------|-----------|--------------|
+| **ONNX** | ✅ | ✅ | ✅ |
+| **TensorRT** | ✅ | ✅ | ✅ |
+
+## Architecture
+
+The deployment framework follows a modular architecture:
+
+```
+autoware_ml/deployment/
+├── core/                          # Core abstractions
+│   ├── base_config.py            # Configuration management
+│   ├── base_data_loader.py       # Data loading interface
+│   ├── base_evaluator.py         # Evaluation interface
+│   └── verification.py           # Cross-backend verification
+├── backends/                      # Backend implementations
+│   ├── pytorch_backend.py        # PyTorch inference
+│   ├── onnx_backend.py           # ONNX Runtime inference
+│   └── tensorrt_backend.py       # TensorRT inference
+└── exporters/                     # Export implementations
+    ├── onnx_exporter.py          # ONNX export
+    └── tensorrt_exporter.py      # TensorRT export
+```
+
+### Design Principles
+
+1. **Task-Agnostic Core**: Base classes are independent of specific tasks
+2. **Backend Abstraction**: Unified interface across different inference backends
+3. **Extensibility**: Easy to add new tasks, backends, or exporters
+4. **Configuration-Driven**: All settings managed through Python config files
+5. **Comprehensive Validation**: Built-in verification at every step
+
+
+## Quick Start
+
+Here's a minimal example to export and verify a calibration classification model:
+
+```bash
+# Export to both ONNX and TensorRT with verification
+python projects/CalibrationStatusClassification/deploy/main.py \
+    deploy_config.py \
+    model_config.py \
+    checkpoint.pth \
+    --work-dir work_dirs/deployment
+```
+
+
+## Usage Guide
+
+### Basic Export
+
+Export a model to ONNX format:
+
+**1. Create deployment config** (`deploy_config_onnx.py`):
+
+```python
+export = dict(
+    mode='onnx',           # Export mode: 'onnx', 'trt', 'both', 'none'
+    verify=False,          # Skip verification
+    device='cuda:0',       # Device for export
+    work_dir='work_dirs/deployment'
+)
+
+# Runtime I/O settings
+runtime_io = dict(
+    info_pkl='path/to/info.pkl',  # Dataset info file
+    sample_idx=0                   # Sample index for export
+)
+
+# ONNX configuration
+onnx_config = dict(
+    opset_version=16,
+    do_constant_folding=True,
+    input_names=['input'],
+    output_names=['output'],
+    save_file='model.onnx',
+    dynamic_axes={
+        'input': {0: 'batch_size'},
+        'output': {0: 'batch_size'}
+    }
+)
+
+# Backend configuration
+backend_config = dict(
+    common_config=dict(
+        precision_policy='auto',      # Options: 'auto', 'fp16', 'fp32_tf32', 'int8'
+        max_workspace_size=1 << 30    # 1 GB for TensorRT
+    )
+)
+```
+
+**2. Run export**:
+
+```bash
+python projects/CalibrationStatusClassification/deploy/main.py \
+    deploy_config_onnx.py \
+    path/to/model_config.py \
+    path/to/checkpoint.pth
+```
+
+### Export with Verification
+
+Verify that exported models produce correct outputs:
+
+**Update config**:
+
+```python
+export = dict(
+    mode='both',          # Export to both ONNX and TensorRT
+    verify=True,          # Enable verification
+    device='cuda:0',
+    work_dir='work_dirs/deployment'
+)
+
+# ... rest of config ...
+```
+
+**Run with verification**:
+
+```bash
+python projects/CalibrationStatusClassification/deploy/main.py \
+    deploy_config_verify.py \
+    path/to/model_config.py \
+    path/to/checkpoint.pth
+```
+
+### Export with Full Evaluation
+
+Perform complete model evaluation on a validation dataset:
+
+**Update config** (`deploy_config_eval.py`):
+
+```python
+export = dict(
+    mode='both',
+    verify=True,
+    device='cuda:0',
+    work_dir='work_dirs/deployment'
+)
+
+# Enable evaluation
+evaluation = dict(
+    enabled=True,
+    num_samples=1000,        # Number of samples to evaluate
+    verbose=False,           # Set True for detailed per-sample output
+    models_to_evaluate=[
+        'pytorch',           # Evaluate PyTorch model
+        'onnx',             # Evaluate ONNX model
+        'tensorrt'          # Evaluate TensorRT model
+    ]
+)
+
+# ... rest of config ...
+```
+
+**Run with evaluation**:
+
+```bash
+python projects/CalibrationStatusClassification/deploy/main.py \
+    deploy_config_eval.py \
+    path/to/model_config.py \
+    path/to/checkpoint.pth
+```
+
+**Output includes**:
+- Per-model accuracy and performance metrics
+- Confusion matrices for each backend
+- Latency statistics (min, max, mean, median, p95, p99)
+- Per-class accuracy breakdown
+
+### Evaluation Only Mode
+
+Run evaluation without exporting (useful for testing existing deployments):
+
+**Config** (`deploy_config_eval_only.py`):
+
+```python
+export = dict(
+    mode='none',           # Skip export
+    device='cuda:0',
+    work_dir='work_dirs/deployment'
+)
+
+evaluation = dict(
+    enabled=True,
+    num_samples=1000,
+    models_to_evaluate=['onnx', 'tensorrt']  # Evaluate existing models
+)
+
+runtime_io = dict(
+    info_pkl='path/to/info.pkl',
+    onnx_file='work_dirs/deployment/model.onnx'  # Path to existing ONNX
+)
+
+# ... rest of config ...
+```
+
+**Run**:
+
+```bash
+# No checkpoint needed in eval-only mode
+python projects/CalibrationStatusClassification/deploy/main.py \
+    deploy_config_eval_only.py \
+    path/to/model_config.py
+```
+
+## Configuration Reference
+
+### Export Configuration
+
+```python
+export = dict(
+    mode='both',          # 'onnx', 'trt', 'both', 'none'
+    verify=True,          # Enable cross-backend verification
+    device='cuda:0',      # Device for export/inference
+    work_dir='work_dirs'  # Output directory
+)
+```
+
+### Runtime I/O Configuration
+
+```python
+runtime_io = dict(
+    info_pkl='path/to/dataset/info.pkl',      # Required: dataset info file
+    sample_idx=0,                              # Sample index for export
+    onnx_file='path/to/existing/model.onnx'   # Optional: use existing ONNX
+)
+```
+
+### ONNX Configuration
+
+```python
+onnx_config = dict(
+    opset_version=16,                # ONNX opset version
+    do_constant_folding=True,        # Enable constant folding optimization
+    input_names=['input'],           # Input tensor names
+    output_names=['output'],         # Output tensor names
+    save_file='model.onnx',          # Output filename
+    export_params=True,              # Export model parameters
+    dynamic_axes={                   # Dynamic dimensions
+        'input': {0: 'batch_size'},
+        'output': {0: 'batch_size'}
+    },
+    keep_initializers_as_inputs=False  # ONNX optimization
+)
+```
+
+### Backend Configuration
+
+```python
+backend_config = dict(
+    common_config=dict(
+        precision_policy='fp16',        # Precision policy (see below)
+        max_workspace_size=1 << 30      # TensorRT workspace size (bytes)
+    ),
+    model_inputs=[                      # Optional: input specifications
+        dict(
+            name='input',
+            shape=(1, 5, 512, 512),
+            dtype='float32'
+        )
+    ]
+)
+```
+
+### Precision Policies
+
+| Policy | Description | Use Case |
+|--------|-------------|----------|
+| `auto` | Let TensorRT decide | Default, balanced performance |
+| `fp16` | Half precision (FP16) | 2x faster, ~same accuracy |
+| `fp32_tf32` | TensorFlow 32 (TF32) | Good balance for Ampere+ GPUs |
+| `strongly_typed` | Strict type enforcement | For debugging |
+
+### Evaluation Configuration
+
+```python
+evaluation = dict(
+    enabled=True,                    # Enable evaluation
+    num_samples=1000,                # Number of samples to evaluate
+    verbose=False,                   # Detailed per-sample output
+    models_to_evaluate=[             # Backends to evaluate
+        'pytorch',
+        'onnx',
+        'tensorrt'
+    ]
+)
+```