Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for nuScenes dataset #1

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions DeepDataMiningLearning/detection/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from PIL import Image
import csv
from DeepDataMiningLearning.detection.dataset_kitti import KittiDataset
from DeepDataMiningLearning.detection.dataset_nuscene import NuscenesDataset
from DeepDataMiningLearning.detection.dataset_waymococo import WaymoCOCODataset
from collections import defaultdict

Expand Down Expand Up @@ -157,6 +158,8 @@ def get_dataset(datasetname, is_train, is_val, args):
ds, num_classes = get_waymococodataset(is_train, is_val, args)
elif datasetname.lower() == 'yolo':
ds, num_classes = get_yolodataset(is_train, is_val, args)
elif datasetname.lower() == 'nuscene':
ds, num_classes = get_nuscenedataset(is_train, is_val, args)
return ds, num_classes

def get_transform(is_train, args):
Expand Down Expand Up @@ -185,6 +188,22 @@ def get_cocodataset(is_train, is_val, args):
)
return ds, num_classes

# Utility method to load nuScenes dataset
def get_nuscenedataset(is_train, is_val, args):
rootPath=args.data_path
if is_val == True:
transformfunc=get_transform(False, args)
metadata_subdir = 'v1.0-mini' # small dataset for development
metadata_subdir = 'v1.0-train' # full training dataset
dataset = NuscenesDataset(rootPath, metadata_subdir=metadata_subdir, train=True, transform=transformfunc)
else: #Training
transformfunc=get_transform(True, args) #add augumentation
metadata_subdir = 'v1.0-mini' # small dataset for development
metadata_subdir = 'v1.0-val' # full validation dataset
dataset = NuscenesDataset(rootPath, metadata_subdir=metadata_subdir, train=is_train, transform=transformfunc)

num_classes = dataset.numclass
return dataset, num_classes

def get_kittidataset(is_train, is_val, args):
rootPath=args.data_path
Expand Down
224 changes: 224 additions & 0 deletions DeepDataMiningLearning/detection/dataset_nuscene.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
import random
import torch
from torch.utils.data import Dataset
from typing import Any, Callable, List, Optional, Tuple
import os
import numpy as np
import json
from PIL import Image
from pathlib import Path

# Nuscenes dataset class
# This class is used to load nuScenes dataset.
# Ref: https://www.nuscenes.org/
#
# Mini dataset for development
# metadata and image -
# https://motional-nuscenes.s3-ap-northeast-1.amazonaws.com/public/nuimages-v1.0/nuimages-v1.0-mini.tgz
# Full dataset for training
# metdata - https://motional-nuscenes.s3-ap-northeast-1.amazonaws.com/public/nuimages-v1.0/nuimages-v1.0-all-metadata.tgz
# images - https://motional-nuscenes.s3-ap-northeast-1.amazonaws.com/public/nuimages-v1.0/nuimages-v1.0-all-samples.tgz
# Extract all files in <data_dir>
class NuscenesDataset(Dataset):
## Nuscenes dataset class initialization
# Args:
# data_dir: str: Path to the data directory
# train: bool: True if training dataset, False otherwise
# sample_data_file: str: Name of the sample_data.json file
# transform: Optional[Callable]: Transform to apply to the image
def __init__(self,
data_dir: str,
metadata_subdir: str = 'v1.0-mini',
train: bool = True,
transform: Optional[Callable] = None):
self.data_dir = data_dir
self.train = train
self.transform = transform
self.metadata_dir = os.path.join(data_dir, metadata_subdir)

# Read categories
self.read_categories()
self.numclass = len(self.INSTANCE_CATEGORY_NAMES)

# Read annotations
self.read_annotations()

# Read sample_data.json
self.sample_data_list = []
sample_data_path = os.path.join(self.metadata_dir, 'sample_data.json')
with open(sample_data_path, 'r') as f:
samples_list = json.load(f)
# only keep files for which annotations are available
i = -1
for sample in samples_list:
token = sample['token']
if token in self.annotations:
i = i + 1
self.sample_data_list.append({
'image_id': i,
'filename': sample['filename'],
'token': sample['token']
})


def __len__(self):
return len(self.sample_data_list)

def __getitem__(self, idx: int) -> Tuple[Any, Any]:
"""Get item at a given index.

Args:
idx (int): Index
Returns:
tuple: (image, target), where
target is a list of dictionaries with the following keys:

- type: str
- truncated: float
- occluded: int
- alpha: float
- bbox: float[4]
- dimensions: float[3]
- locations: float[3]
- rotation_y: float

"""
if idx>len(self.sample_data_list):
print("Index out-of-range")
image = None
else:
sample_data = self.sample_data_list[idx]
image_file = sample_data['filename']
image_path = Path(os.path.join(self.data_dir, image_file))
if not image_path.exists():
raise FileNotFoundError(f"Image file {image_path} does not exist")
image = Image.open(image_path)
if self.train:
target = self.get_label(idx) #list of dicts

if self.transform:
image, target = self.transform(image, target)
return image, target

# read annotation files
# In nuScene dataset, each image has a unique token and corresponding annotations
# are stored in object_ann.json file.
# Token in sample_data.json maps to sample_data_token in object_ann.json
def read_annotations(self):
anns = {}
with open(os.path.join(self.metadata_dir, 'object_ann.json'), 'r') as f:
ann_list = json.load(f)
for ann in ann_list:
image_key = ann['sample_data_token']
image_ann_list = []
# check if anns has the key
if image_key not in anns:
anns[image_key] = image_ann_list
else:
image_ann_list = anns[image_key]

image_ann_list.append({
'bbox': ann['bbox'],
'category_token': ann['category_token']
})
self.annotations = anns
return

# Read category.jso and create a list of category names
# nuScene dataset has token (unique id) for each category, create a map of token
# to index in the list of category names
def read_categories(self):
categories = []
category_token_idx_map = {}
category_names = []

with open(os.path.join(self.metadata_dir, 'category.json'), 'r') as f:
categories = json.load(f)
# populate category_token_id_map and category_names
for i, category in enumerate(categories):
category_token_idx_map[category['token']] = i
category_names.append(category['name'])

self.INSTANCE_CATEGORY_NAMES = category_names
self.INSTANCE_CATEGORY_TOKEN2IDX = category_token_idx_map
return

# Get label for image at a given index
# Returns a dictionary with keys: boxes, labels, image_id, area, iscrowd
# Values are tensors for each key
def get_label(self, idx: int) -> dict:
annotations = self.annotations.get(self.sample_data_list[idx]['token'], [])
boxes = []
labels = []
for ann in annotations:
category_idx = self.INSTANCE_CATEGORY_TOKEN2IDX[ann['category_token']]
boxes.append(ann['bbox'])
labels.append(category_idx)

num_objs = len(labels) #update num_objs
newtarget = {}
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
labels = torch.as_tensor(labels, dtype=torch.int64)
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
if num_objs >0:
newtarget["boxes"] = boxes
newtarget["labels"] = labels
#newtarget["masks"] = masks
newtarget["image_id"] = idx
newtarget["area"] = area
newtarget["iscrowd"] = iscrowd
else:
#negative example, ref: https://github.com/pytorch/vision/issues/2144
newtarget['boxes'] = torch.zeros((0, 4), dtype=torch.float32) #not empty
return newtarget


# Test the dataset
# It loads the dataset and modify a random image with bounding boxes and corresponding
# labels. Modified image is saved to disk (<data_dir>/out.png) for verification.
#
# This demonstrates that the dataset is loaded properly and can be used for training
if __name__ == "__main__":
data_dir = '/home/nadeem/sjsu/data/nuScenes/'

# sample data file
# train
metadata_subdir='v1.0-train'
# validation
metadata_subdir='v1.0-val'
# mini
metadata_subdir='v1.0-mini'

import DeepDataMiningLearning.detection.transforms as T
def get_transformsimple():
transforms = []
transforms.append(T.PILToTensor())
transforms.append(T.ToDtype(torch.float, scale=True))
return T.Compose(transforms)

ds = NuscenesDataset(data_dir=data_dir, metadata_subdir=metadata_subdir, train=True, transform=get_transformsimple())
print("Size of dataset", len(ds))

test_idx = random.randint(0, len(ds) - 1)
img, target = ds[test_idx]

print(img.size)
print(target.keys()) #['boxes', 'labels', 'image_id', 'area', 'iscrowd']
boxes = target['boxes']
labels = target['labels']
print(boxes.shape) #torch.Size([3, 4]) n,4
print(labels)
labels = [ds.INSTANCE_CATEGORY_NAMES[int(label)] for label in labels]
print(labels)

# Draw bounding box
import torchvision
img = torchvision.utils.draw_bounding_boxes(img, boxes, labels, width=2, colors="yellow", fill=False)

img = torchvision.transforms.ToPILImage()(img)

# save output
img.save(os.path.join(data_dir, "out.png"))
4 changes: 2 additions & 2 deletions DeepDataMiningLearning/detection/mytrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def main(args):
if args.rpn_score_thresh is not None:
kwargs["rpn_score_thresh"] = args.rpn_score_thresh

model, preprocess, classes = create_detectionmodel(args.model, num_classes, args.trainable)
model, preprocess, classes = create_detectionmodel(args.model, num_classes=num_classes, trainable_layers=args.trainable, device=device)
model.to(device)

if args.distributed and args.sync_bn:
Expand Down Expand Up @@ -330,7 +330,7 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, sc
images = list(image.to(device) for image in images) #list of [3, 1280, 1920]
targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets] #tuple to list
#with torch.cuda.amp.autocast(enabled=scaler is not None):
with torch.amp.autocast(enabled=scaler is not None):
with torch.amp.autocast(device_type=device.type, enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values()) #single value

Expand Down