-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdbCreateAndPopulate.py
141 lines (109 loc) · 3.63 KB
/
dbCreateAndPopulate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import asyncio
import sqlite3 as sl
import pathlib
import os
import configparser
from random import randrange
import uuid
# TODO document how to set up the needed folders and paths in configuration.txt
# TODO document how to use recreateDb
# TODO indices
configParser = configparser.RawConfigParser()
configFilePath = r'configuration.txt'
configParser.read(configFilePath)
datasetPathVideo = configParser.get('COMMON', 'datasetPathVideo')
recreateDb = int(configParser.get('dbCreateAndPopulate', 'recreateDb'))
datasetPathDatabase = configParser.get('COMMON', 'datasetPathDatabase') + '/dataset.db'
if(recreateDb != 0):
filename = uuid.uuid4().hex
try:
os.rename(datasetPathDatabase,configParser.get('COMMON', 'datasetPathDatabase') + '/' + str(uuid.uuid4().hex))
except:
pass
con = sl.connect(datasetPathDatabase)
if(recreateDb != 0):
con.execute("""
CREATE TABLE VIDEO (
ID INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
VIDEO_PATH TEXT NOT NULL,
AGE INTEGER,
ETHNICITY INTEGER,
GENDER INTEGER,
AUDIO_PRE INTEGER,
FACES_PRE INTEGER,
TRAINED INTEGER
);
""")
con.execute("""
CREATE TABLE AUDIO (
ID INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
VIDEO_ID INTEGER NOT NULL,
AUDIO_LENGTH INTEGER,
SPEAKER_EMB BLOB,
LANG TEXT,
AUDIO_EMB BLOB,
AUDIO_EMB2 BLOB,
WAV_TO_VEC BLOB,
PYANNOTE_TITANET BLOB,
AUDIO_FEATURES BLOB
);
""")
con.execute("""
CREATE TABLE FACE (
ID INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
FACE_PATH TEXT,
VIDEO_ID INTEGER,
FACE_NORM_PATH TEXT,
LATENT_REP BLOB,
VGG_BLURRED INTEGER
);
""")
con.execute("""
CREATE TABLE CODE_TABLE (
ID INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
TABLE_NAME TEXT NOT NULL ,
COLUMN_NAME TEXT NOT NULL ,
CODE_INTEGER INTEGER NOT NULL ,
CODE_MEANING TEXT NOT NULL
);
""")
con.execute("""
CREATE INDEX AUDIO_VIDEO_ID_IDX ON AUDIO (VIDEO_ID);
""")
con.execute("""
CREATE INDEX FACE_VIDEO_ID_IDX ON FACE (VIDEO_ID);
""")
con.execute("""
CREATE INDEX AUDIO_AUDIO_LENGTH_IDX ON AUDIO (AUDIO_LENGTH);
""")
con.execute("""
CREATE INDEX FACE_VGG_BLURRED_IDX ON FACE (VGG_BLURRED);
""")
con.execute("""
CREATE INDEX VIDEO_TRAINED_IDX ON VIDEO (TRAINED);
""")
def insertIntoDb(file,root):
#await asyncio.sleep(10)
#print(file)
sql = 'SELECT COUNT (*) FROM VIDEO WHERE VIDEO_PATH = ?'
data = [(os.path.join(root, file))]
result = con.execute(sql, data)
if(result.fetchall()[0][0] == 0):
sql = 'INSERT INTO VIDEO (VIDEO_PATH, AUDIO_PRE, FACES_PRE) VALUES (?,?,?)'
data = [(os.path.join(root, file)),0,0]
with con:
con.execute(sql, data)
def insertAllIntoDb():
for root, dirs, files in os.walk(datasetPathVideo):
#tasks = [insertIntoDb(file,root) for file in files]
# TODO split into chunks of say 4 files
#await asyncio.wait(tasks)
#await asyncio.gather(*tasks)
for file in files:
insertIntoDb(file,root)
#asyncio.run(insertAllIntoDb())
insertAllIntoDb()# TODO Better display of progress and make indicies
with con:
data = con.execute("SELECT COUNT(*) FROM VIDEO")
for row in data:
print(str(row) + " VIDEO FILES INSERTED")