Skip to content

Commit 3a131f3

Browse files
committed
add Options --choice_sensors and --choice_runs_file
1 parent 79cc864 commit 3a131f3

File tree

2 files changed

+41
-6
lines changed

2 files changed

+41
-6
lines changed

example_list.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2014-05-19-13-20-57
2+
2014-06-26-09-31-18

scrape_mrgdatashare.py

+39-6
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,17 @@
3131
file_extension = ".tar"
3232
downloads_dir_example = os.path.expanduser("~/Downloads")
3333

34-
# throttle params
34+
# throttle params (seconds) - to avoid overloading the server with requests and to avoid getting blocked by the server for too many requests in a short time period
3535
default_period_duration = 10 * 60
3636
default_chunks_per_period = 1000
3737
default_chunk_length = 1 * 1024
3838

39-
# download errors handling params
39+
# download errors handling params (seconds) - to avoid overloading the server and to avoid losing data due to network errors
4040
default_relogin_duration = 10 * 60
4141
default_nb_tries_reconnection = 5
4242
default_reconnection_duration = 10 * 60
43+
default_choice_sensors = 'all'
44+
default_choice_runs_file = 'all'
4345

4446

4547
class Datasets:
@@ -62,7 +64,10 @@ def __init__(self, parse_args):
6264
self.datasets_file = Datasets.get_dataset_file(parse_args)
6365

6466
# read datasets file
65-
self.datasets = Datasets.get_datasets(self.datasets_file)
67+
self.choice_sensors = parse_args.choice_sensors.split(',')
68+
self.choice_runs_file = parse_args.choice_runs_file
69+
self.datasets = Datasets.get_datasets(self.datasets_file, self.choice_runs_file, self.choice_sensors)
70+
6671

6772
@staticmethod
6873
def get_dataset_file(parse_args):
@@ -86,7 +91,7 @@ def get_dataset_file(parse_args):
8691
parse_args.datasets_file)
8792

8893
@staticmethod
89-
def get_datasets(datasets_file):
94+
def get_datasets(datasets_file, choice_runs_file, choice_sensors):
9095
"""Reads known datasets list and file patterns from input file.
9196
9297
Args:
@@ -99,12 +104,28 @@ def get_datasets(datasets_file):
99104

100105
print("reading datasets_file: " + datasets_file)
101106
datasets = []
107+
# choose dataset and sensor type to download
108+
if choice_runs_file == 'all':
109+
choice_runs = 'all'
110+
else:
111+
with open(choice_runs_file, 'r') as f:
112+
choice_runs = f.read().splitlines()
102113
with open(datasets_file, "r") as file_handle:
103114
lines = file_handle.readlines()
104115
for line in lines:
105116
line = line.strip("\n").split(",")
106-
dataset = {"dataset": line[0], "file_patterns": line[1:]}
107-
datasets.append(dataset)
117+
if choice_runs == 'all' or line[0] in choice_runs: # choose this run
118+
if choice_sensors[0] == 'all':
119+
dataset = {"dataset": line[0], "file_patterns": line[1:]}
120+
else: # not all sensors
121+
exist_sensors = [] # sensors that will be downloaded
122+
for exist_sensor in line[1:]: # enumerate exist_sensor in this run
123+
for choice_sensor in choice_sensors: # enumerate choice_sensor the user want to download
124+
if choice_sensor in exist_sensor: # if choice_sensor is in exist_sensor
125+
exist_sensors.append(exist_sensor) # add exist_sensor to exist_sensors
126+
dataset = {"dataset": line[0], "file_patterns": exist_sensors}
127+
datasets.append(dataset)
128+
108129
print("got num_datasets: " + str(len(datasets)))
109130
return datasets
110131

@@ -695,6 +716,18 @@ def get_local_file_path(file_url, dataset_handler):
695716
default=default_nb_tries_reconnection,
696717
help="Number of downloading tries for a file e.g. " +
697718
str(default_nb_tries_reconnection))
719+
argument_parser.add_argument(
720+
"--choice_sensors",
721+
dest="choice_sensors",
722+
type=str,
723+
default=default_choice_sensors,
724+
help="choice of sensors in [tags, stereo_centre, stereo_left, stereo_right, vo, mono_left, mono_right, mono_rear, lms_front, lms_rear, ldmrs, gps, all] to download e.g. " + default_choice_sensors)
725+
argument_parser.add_argument(
726+
"--choice_runs_file",
727+
dest="choice_runs_file",
728+
type=str,
729+
default=default_choice_runs_file,
730+
help="choice of runs recorded in a file to download, if 'all' all runs are downloaded")
698731

699732
# parse CL
700733
args = argument_parser.parse_args()

0 commit comments

Comments
 (0)