diff --git a/DeepDataMiningLearning/detection/dataset.py b/DeepDataMiningLearning/detection/dataset.py index ed252f6..d55d7af 100644 --- a/DeepDataMiningLearning/detection/dataset.py +++ b/DeepDataMiningLearning/detection/dataset.py @@ -8,6 +8,7 @@ from PIL import Image import csv from DeepDataMiningLearning.detection.dataset_kitti import KittiDataset +from DeepDataMiningLearning.detection.dataset_nuscene import NuscenesDataset from DeepDataMiningLearning.detection.dataset_waymococo import WaymoCOCODataset from collections import defaultdict @@ -157,6 +158,8 @@ def get_dataset(datasetname, is_train, is_val, args): ds, num_classes = get_waymococodataset(is_train, is_val, args) elif datasetname.lower() == 'yolo': ds, num_classes = get_yolodataset(is_train, is_val, args) + elif datasetname.lower() == 'nuscene': + ds, num_classes = get_nuscenedataset(is_train, is_val, args) return ds, num_classes def get_transform(is_train, args): @@ -185,6 +188,22 @@ def get_cocodataset(is_train, is_val, args): ) return ds, num_classes +# Utility method to load nuScenes dataset +def get_nuscenedataset(is_train, is_val, args): + rootPath=args.data_path + if is_val == True: + transformfunc=get_transform(False, args) + metadata_subdir = 'v1.0-mini' # small dataset for development + metadata_subdir = 'v1.0-train' # full training dataset + dataset = NuscenesDataset(rootPath, metadata_subdir=metadata_subdir, train=True, transform=transformfunc) + else: #Training + transformfunc=get_transform(True, args) #add augumentation + metadata_subdir = 'v1.0-mini' # small dataset for development + metadata_subdir = 'v1.0-val' # full validation dataset + dataset = NuscenesDataset(rootPath, metadata_subdir=metadata_subdir, train=is_train, transform=transformfunc) + + num_classes = dataset.numclass + return dataset, num_classes def get_kittidataset(is_train, is_val, args): rootPath=args.data_path diff --git a/DeepDataMiningLearning/detection/dataset_nuscene.py b/DeepDataMiningLearning/detection/dataset_nuscene.py new file mode 100644 index 0000000..4fb53ad --- /dev/null +++ b/DeepDataMiningLearning/detection/dataset_nuscene.py @@ -0,0 +1,224 @@ +import random +import torch +from torch.utils.data import Dataset +from typing import Any, Callable, List, Optional, Tuple +import os +import numpy as np +import json +from PIL import Image +from pathlib import Path + +# Nuscenes dataset class +# This class is used to load nuScenes dataset. +# Ref: https://www.nuscenes.org/ +# +# Mini dataset for development +# metadata and image - +# https://motional-nuscenes.s3-ap-northeast-1.amazonaws.com/public/nuimages-v1.0/nuimages-v1.0-mini.tgz +# Full dataset for training +# metdata - https://motional-nuscenes.s3-ap-northeast-1.amazonaws.com/public/nuimages-v1.0/nuimages-v1.0-all-metadata.tgz +# images - https://motional-nuscenes.s3-ap-northeast-1.amazonaws.com/public/nuimages-v1.0/nuimages-v1.0-all-samples.tgz +# Extract all files in +class NuscenesDataset(Dataset): + ## Nuscenes dataset class initialization + # Args: + # data_dir: str: Path to the data directory + # train: bool: True if training dataset, False otherwise + # sample_data_file: str: Name of the sample_data.json file + # transform: Optional[Callable]: Transform to apply to the image + def __init__(self, + data_dir: str, + metadata_subdir: str = 'v1.0-mini', + train: bool = True, + transform: Optional[Callable] = None): + self.data_dir = data_dir + self.train = train + self.transform = transform + self.metadata_dir = os.path.join(data_dir, metadata_subdir) + + # Read categories + self.read_categories() + self.numclass = len(self.INSTANCE_CATEGORY_NAMES) + + # Read annotations + self.read_annotations() + + # Read sample_data.json + self.sample_data_list = [] + sample_data_path = os.path.join(self.metadata_dir, 'sample_data.json') + with open(sample_data_path, 'r') as f: + samples_list = json.load(f) + # only keep files for which annotations are available + i = -1 + for sample in samples_list: + token = sample['token'] + if token in self.annotations: + i = i + 1 + self.sample_data_list.append({ + 'image_id': i, + 'filename': sample['filename'], + 'token': sample['token'] + }) + + + def __len__(self): + return len(self.sample_data_list) + + def __getitem__(self, idx: int) -> Tuple[Any, Any]: + """Get item at a given index. + + Args: + idx (int): Index + Returns: + tuple: (image, target), where + target is a list of dictionaries with the following keys: + + - type: str + - truncated: float + - occluded: int + - alpha: float + - bbox: float[4] + - dimensions: float[3] + - locations: float[3] + - rotation_y: float + + """ + if idx>len(self.sample_data_list): + print("Index out-of-range") + image = None + else: + sample_data = self.sample_data_list[idx] + image_file = sample_data['filename'] + image_path = Path(os.path.join(self.data_dir, image_file)) + if not image_path.exists(): + raise FileNotFoundError(f"Image file {image_path} does not exist") + image = Image.open(image_path) + if self.train: + target = self.get_label(idx) #list of dicts + + if self.transform: + image, target = self.transform(image, target) + return image, target + + # read annotation files + # In nuScene dataset, each image has a unique token and corresponding annotations + # are stored in object_ann.json file. + # Token in sample_data.json maps to sample_data_token in object_ann.json + def read_annotations(self): + anns = {} + with open(os.path.join(self.metadata_dir, 'object_ann.json'), 'r') as f: + ann_list = json.load(f) + for ann in ann_list: + image_key = ann['sample_data_token'] + image_ann_list = [] + # check if anns has the key + if image_key not in anns: + anns[image_key] = image_ann_list + else: + image_ann_list = anns[image_key] + + image_ann_list.append({ + 'bbox': ann['bbox'], + 'category_token': ann['category_token'] + }) + self.annotations = anns + return + + # Read category.jso and create a list of category names + # nuScene dataset has token (unique id) for each category, create a map of token + # to index in the list of category names + def read_categories(self): + categories = [] + category_token_idx_map = {} + category_names = [] + + with open(os.path.join(self.metadata_dir, 'category.json'), 'r') as f: + categories = json.load(f) + # populate category_token_id_map and category_names + for i, category in enumerate(categories): + category_token_idx_map[category['token']] = i + category_names.append(category['name']) + + self.INSTANCE_CATEGORY_NAMES = category_names + self.INSTANCE_CATEGORY_TOKEN2IDX = category_token_idx_map + return + + # Get label for image at a given index + # Returns a dictionary with keys: boxes, labels, image_id, area, iscrowd + # Values are tensors for each key + def get_label(self, idx: int) -> dict: + annotations = self.annotations.get(self.sample_data_list[idx]['token'], []) + boxes = [] + labels = [] + for ann in annotations: + category_idx = self.INSTANCE_CATEGORY_TOKEN2IDX[ann['category_token']] + boxes.append(ann['bbox']) + labels.append(category_idx) + + num_objs = len(labels) #update num_objs + newtarget = {} + # convert everything into a torch.Tensor + boxes = torch.as_tensor(boxes, dtype=torch.float32) + labels = torch.as_tensor(labels, dtype=torch.int64) + area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) + # suppose all instances are not crowd + iscrowd = torch.zeros((num_objs,), dtype=torch.int64) + if num_objs >0: + newtarget["boxes"] = boxes + newtarget["labels"] = labels + #newtarget["masks"] = masks + newtarget["image_id"] = idx + newtarget["area"] = area + newtarget["iscrowd"] = iscrowd + else: + #negative example, ref: https://github.com/pytorch/vision/issues/2144 + newtarget['boxes'] = torch.zeros((0, 4), dtype=torch.float32) #not empty + return newtarget + + +# Test the dataset +# It loads the dataset and modify a random image with bounding boxes and corresponding +# labels. Modified image is saved to disk (/out.png) for verification. +# +# This demonstrates that the dataset is loaded properly and can be used for training +if __name__ == "__main__": + data_dir = '/home/nadeem/sjsu/data/nuScenes/' + + # sample data file + # train + metadata_subdir='v1.0-train' + # validation + metadata_subdir='v1.0-val' + # mini + metadata_subdir='v1.0-mini' + + import DeepDataMiningLearning.detection.transforms as T + def get_transformsimple(): + transforms = [] + transforms.append(T.PILToTensor()) + transforms.append(T.ToDtype(torch.float, scale=True)) + return T.Compose(transforms) + + ds = NuscenesDataset(data_dir=data_dir, metadata_subdir=metadata_subdir, train=True, transform=get_transformsimple()) + print("Size of dataset", len(ds)) + + test_idx = random.randint(0, len(ds) - 1) + img, target = ds[test_idx] + + print(img.size) + print(target.keys()) #['boxes', 'labels', 'image_id', 'area', 'iscrowd'] + boxes = target['boxes'] + labels = target['labels'] + print(boxes.shape) #torch.Size([3, 4]) n,4 + print(labels) + labels = [ds.INSTANCE_CATEGORY_NAMES[int(label)] for label in labels] + print(labels) + + # Draw bounding box + import torchvision + img = torchvision.utils.draw_bounding_boxes(img, boxes, labels, width=2, colors="yellow", fill=False) + + img = torchvision.transforms.ToPILImage()(img) + + # save output + img.save(os.path.join(data_dir, "out.png")) diff --git a/DeepDataMiningLearning/detection/mytrain.py b/DeepDataMiningLearning/detection/mytrain.py index af7d843..257dd07 100644 --- a/DeepDataMiningLearning/detection/mytrain.py +++ b/DeepDataMiningLearning/detection/mytrain.py @@ -222,7 +222,7 @@ def main(args): if args.rpn_score_thresh is not None: kwargs["rpn_score_thresh"] = args.rpn_score_thresh - model, preprocess, classes = create_detectionmodel(args.model, num_classes, args.trainable) + model, preprocess, classes = create_detectionmodel(args.model, num_classes=num_classes, trainable_layers=args.trainable, device=device) model.to(device) if args.distributed and args.sync_bn: @@ -330,7 +330,7 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, sc images = list(image.to(device) for image in images) #list of [3, 1280, 1920] targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets] #tuple to list #with torch.cuda.amp.autocast(enabled=scaler is not None): - with torch.amp.autocast(enabled=scaler is not None): + with torch.amp.autocast(device_type=device.type, enabled=scaler is not None): loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) #single value