diff --git a/src/kabr_tools/miniscene2behavior.py b/src/kabr_tools/miniscene2behavior.py index 8347350..4ce2fc1 100644 --- a/src/kabr_tools/miniscene2behavior.py +++ b/src/kabr_tools/miniscene2behavior.py @@ -19,12 +19,15 @@ def get_input_clip(cap: cv2.VideoCapture, cfg: CfgNode, keyframe_idx: int) -> li # https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + assert keyframe_idx < total_frames, f"keyframe_idx: {keyframe_idx}" \ + f" >= total_frames: {total_frames}" seq = get_sequence( keyframe_idx, seq_length // 2, cfg.DATA.SAMPLING_RATE, total_frames, ) + clip = [] for frame_idx in seq: cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) @@ -124,29 +127,34 @@ def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module, # find all tracks tracks = [] + frames = {} for track in root.iterfind("track"): track_id = track.attrib["id"] tracks.append(track_id) + frames[track_id] = [] - # find all frames - # TODO: rewrite - some tracks may have different frames - assert len(tracks) > 0, "No tracks found in track file" - frames = [] - for box in track.iterfind("box"): - frames.append(int(box.attrib["frame"])) + # find all frames + for box in track.iterfind("box"): + frames[track_id].append(int(box.attrib["frame"])) # run model on miniscene for track in tracks: video_file = f"{miniscene_path}/{track}.mp4" cap = cv2.VideoCapture(video_file) - for frame in tqdm(frames, desc=f"{track} frames"): - inputs = get_input_clip(cap, cfg, frame) + index = 0 + for frame in tqdm(frames[track], desc=f"{track} frames"): + try: + inputs = get_input_clip(cap, cfg, index) + except AssertionError as e: + print(e) + break + index += 1 if cfg.NUM_GPUS: # transfer the data to the current GPU device. if isinstance(inputs, (list,)): - for i in range(len(inputs)): - inputs[i] = inputs[i].cuda(non_blocking=True) + for i, input_clip in enumerate(inputs): + inputs[i] = input_clip.cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) @@ -163,6 +171,7 @@ def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module, if frame % 20 == 0: pd.DataFrame(label_data).to_csv( output_path, sep=" ", index=False) + cap.release() pd.DataFrame(label_data).to_csv(output_path, sep=" ", index=False) diff --git a/tests/test_miniscene2behavior.py b/tests/test_miniscene2behavior.py index 7875e2d..ddb8f5d 100644 --- a/tests/test_miniscene2behavior.py +++ b/tests/test_miniscene2behavior.py @@ -5,6 +5,7 @@ import requests from unittest.mock import Mock, patch import torch +from lxml import etree import numpy as np import pandas as pd from kabr_tools import ( @@ -97,12 +98,13 @@ def test_run(self): @patch('kabr_tools.miniscene2behavior.process_cv2_inputs') @patch('kabr_tools.miniscene2behavior.cv2.VideoCapture') def test_matching_tracks(self, video_capture, process_cv2_inputs): - - # Create fake model that always returns a prediction of 1 + # create fake model that weights class 98 mock_model = Mock() - mock_model.return_value = torch.tensor([1]) + prob = torch.zeros(99) + prob[-1] = 1 + mock_model.return_value = prob - # Create fake cfg + # create fake cfg mock_config = Mock( DATA=Mock(NUM_FRAMES=16, SAMPLING_RATE=5, @@ -111,25 +113,36 @@ def test_matching_tracks(self, video_capture, process_cv2_inputs): OUTPUT_DIR='' ) - # Create fake video capture + # create fake video capture vc = video_capture.return_value vc.read.return_value = True, np.zeros((8, 8, 3), np.uint8) - vc.get.return_value = 1 + vc.get.return_value = 21 self.output = '/tmp/annotation_data.csv' + miniscene_dir = os.path.join(EXAMPLESDIR, "MINISCENE1") + video_name = "DJI" annotate_miniscene(cfg=mock_config, model=mock_model, - miniscene_path=os.path.join( - EXAMPLESDIR, "MINISCENE1"), - video='DJI', + miniscene_path=miniscene_dir, + video=video_name, output_path=self.output) - # Read in output CSV and make sure we have the expected columns and at least one row + # check output CSV df = pd.read_csv(self.output, sep=' ') self.assertEqual(list(df.columns), [ "video", "track", "frame", "label"]) - self.assertGreater(len(df.index), 0) + row_ct = 0 + + root = etree.parse( + f"{miniscene_dir}/metadata/DJI_tracks.xml").getroot() + for track in root.iterfind("track"): + track_id = int(track.get("id")) + for box in track.iterfind("box"): + row_val = [video_name, track_id, int(box.get("frame")), 98] + self.assertEqual(list(df.loc[row_ct]), row_val) + row_ct += 1 + self.assertEqual(len(df.index), row_ct) @patch('kabr_tools.miniscene2behavior.process_cv2_inputs') @patch('kabr_tools.miniscene2behavior.cv2.VideoCapture') @@ -151,9 +164,11 @@ def test_nonmatching_tracks(self, video_capture, process_cv2_inputs): # Create fake video capture vc = video_capture.return_value vc.read.return_value = True, np.zeros((8, 8, 3), np.uint8) - vc.get.return_value = 1 + vc.get.return_value = 21 self.output = '/tmp/annotation_data.csv' + miniscene_dir = os.path.join(EXAMPLESDIR, "MINISCENE2") + video_name = "DJI" annotate_miniscene(cfg=mock_config, model=mock_model, @@ -162,11 +177,22 @@ def test_nonmatching_tracks(self, video_capture, process_cv2_inputs): video='DJI', output_path=self.output) - # Read in output CSV and make sure we have the expected columns and at least one row + # check output CSV df = pd.read_csv(self.output, sep=' ') self.assertEqual(list(df.columns), [ "video", "track", "frame", "label"]) - self.assertGreater(len(df.index), 0) + row_ct = 0 + + root = etree.parse( + f"{miniscene_dir}/metadata/DJI_tracks.xml").getroot() + for track in root.iterfind("track"): + track_id = int(track.get("id")) + for box in track.iterfind("box"): + row_val = [video_name, track_id, int(box.get("frame")), 0] + self.assertEqual(list(df.loc[row_ct]), row_val) + row_ct += 1 + self.assertEqual(len(df.index), row_ct) + def test_parse_arg_min(self): # parse arguments