-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawl.py
35 lines (24 loc) · 820 Bytes
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from db.service import BasicService, RatingService
from imdb import crawl
if __name__ == '__main__':
bs = BasicService()
rs = RatingService()
offset = 0
while True:
ratings = rs.get_by_offset(offset=offset)
# there is no record for the updating
if len(ratings) == 0:
break
# get title ids from the rating objects
rating_ids = [rating.title_id for rating in ratings]
# get basic objects by title ids
basics = bs.get_by_ids(rating_ids)
if len(basics) == 0:
offset += 1
continue
# get title ids for the crawling
ids = [basic.title_id for basic in basics]
# save updated objects
updated_objects = crawl(ids)
bs.save_all(updated_objects)
offset += 1