-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathucf_extract_frames.py
101 lines (80 loc) · 3.59 KB
/
ucf_extract_frames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
After moving all the files using the 1_ file, we run this one to extract
the images from the videos and also create a data file we can use
for training and testing later.
"""
import csv
import glob
import os
import os.path
from subprocess import call
def extract_files():
"""After we have all of our videos split between train and test, and
all nested within folders representing their classes, we need to
make a data file that we can reference when training our RNN(s).
This will let us keep track of image sequences and other parts
of the training process.
We'll first need to extract images from each of the videos. We'll
need to record the following data in the file:
[train|test], class, filename, nb frames
Extracting can be done with ffmpeg:
`ffmpeg -i video.mpg image-%04d.jpg`
"""
data_file = []
folders = ['./train/', './test/']
for folder in folders:
class_folders = glob.glob(folder + '*')
for vid_class in class_folders:
class_files = glob.glob(vid_class + '/*.avi')
for video_path in class_files:
# Get the parts of the file.
video_parts = get_video_parts(video_path)
train_or_test, classname, filename_no_ext, filename = video_parts
# Only extract if we haven't done it yet. Otherwise, just get
# the info.
if not check_already_extracted(video_parts):
# Now extract it.
src = train_or_test + '/' + classname + '/' + \
filename
dest = train_or_test + '/' + classname + '/' + \
filename_no_ext + '__frame_%04d.jpg' #'-%04d.jpg'
#call(["ffmpeg", "-i", src, dest])
os.system('ffmpeg -hide_banner -loglevel panic -i "{}" "{}"'.format(src, dest))
# Now get how many frames it is.
nb_frames = get_nb_frames_for_video(video_parts)
data_file.append([train_or_test, classname, filename_no_ext, nb_frames])
print("Generated %d frames for %s" % (nb_frames, filename_no_ext))
with open('./data_file.csv', 'w') as fout:
writer = csv.writer(fout)
writer.writerows(data_file)
print("Extracted and wrote %d video files." % (len(data_file)))
def get_nb_frames_for_video(video_parts):
"""Given video parts of an (assumed) already extracted video, return
the number of frames that were extracted."""
train_or_test, classname, filename_no_ext, _ = video_parts
generated_files = glob.glob(train_or_test + '/' + classname + '/' +
filename_no_ext + '*.jpg')
return len(generated_files)
def get_video_parts(video_path):
"""Given a full path to a video, return its parts."""
parts = video_path.split('/')
filename = parts[3]
filename_no_ext = filename.split('.')[0]
classname = parts[2]
train_or_test = parts[1]
return train_or_test, classname, filename_no_ext, filename
def check_already_extracted(video_parts):
"""Check to see if we created the _0001 frame of this file."""
train_or_test, classname, filename_no_ext, _ = video_parts
return bool(os.path.exists(train_or_test + '/' + classname +
'/' + filename_no_ext + '_0001.jpg'))
def main():
"""
Extract images from videos and build a new file that we
can use as our data input file. It can have format:
[train|test], class, filename, nb frames
"""
extract_files()
if __name__ == '__main__':
os.chdir('./ucf_data')
main()