-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathnikl.py
35 lines (27 loc) · 1.21 KB
/
nikl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import argparse
import os
import io
import shutil
import tarfile
import wget
import subprocess
from utils import create_manifest
parser = argparse.ArgumentParser(description='Processes nikl.')
parser.add_argument('--target-dir', default='./data/nikl_dataset', help='Path to save dataset')
parser.add_argument('--min-duration', default=1, type=int,
help='Prunes training samples shorter than the min duration (given in seconds, default 1)')
parser.add_argument('--max-duration', default=15, type=int,
help='Prunes training samples longer than the max duration (given in seconds, default 15)')
args = parser.parse_args()
def main():
if not os.path.isdir(args.target_dir):
os.makedirs(args.target_dir)
train_path = args.target_dir + '/train/'
test_path = args.target_dir + '/test/'
subprocess.call(["local/clean_corpus.sh","$HOME/copora/NIKL",args.target_dir])
subprocess.call(["local/data_prep.sh","$HOME/copora/NIKL",args.target_dir])
print ('\n', 'Creating manifests...')
create_manifest(train_path, 'nikl_train_manifest.csv', args.min_duration, args.max_duration)
create_manifest(test_path, 'nikl_val_manifest.csv')
if __name__ == '__main__':
main()