-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
45 changed files
with
431,290 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,7 @@ | ||
|
||
HeriGraph_Dataset.zip | ||
dataset/Amsterdam/Edge_List.csv | ||
dataset/Suzhou/Edge_List.csv | ||
dataset/Venice-XL/Textual_Features.csv | ||
dataset/Venice-XL/Visual_Features.csv | ||
dataset/Venice/Edge_List.csv |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
import json | ||
import flickrapi | ||
import pandas as pd | ||
import numpy as np | ||
from argparse import Namespace | ||
from collections import Counter | ||
import pickle | ||
import os | ||
import urllib | ||
|
||
args = Namespace( | ||
# Data and Path information | ||
api_key = u'[api_key]', | ||
api_secret = u'[api_secret]', | ||
radius = 2, | ||
save_dir = 'data_storage/', | ||
tags = None, | ||
len_grid = 1, | ||
image_dir = 'data_storage/images/' | ||
) | ||
|
||
def get_latlon(id_x, id_y, num = args.len_grid): | ||
lat = 52.365 | ||
lon = 4.887777778 | ||
return lat,lon | ||
|
||
def collect_ids(flickr, lat, lon, radius, x,y, tags = None): | ||
|
||
if 'photo_ids_{}_{}.csv'.format(x,y) in [files for root, dirs, files in os.walk(args.save_dir)][0]: | ||
Ids = pd.read_csv(args.save_dir+'photo_ids_{}_{}.csv'.format(x,y),sep='\t')['ids'].astype(str).unique().tolist() | ||
else: | ||
Ids = [] | ||
|
||
walk = flickr.walk(has_geo = 1, lat = lat, lon = lon, radius = args.radius, tags=tags) | ||
for photo in walk: | ||
id_now = photo.get('id') | ||
if id_now in Ids: | ||
continue | ||
Ids.append(id_now) | ||
if len(Ids)%200 == 0: | ||
print('{} photo ids collected'.format(len(Ids))) | ||
pd.Series(Ids, name = 'ids').to_csv(args.save_dir + 'photo_ids_{}_{}.csv'.format(x,y), index=False) | ||
if len(Ids) >= 5000: | ||
return Ids | ||
pd.Series(Ids, name = 'ids').to_csv(args.save_dir + 'photo_ids_{}_{}.csv'.format(x,y), index=False) | ||
return Ids | ||
|
||
def update_df(Photos): | ||
return Photos | ||
|
||
|
||
def get_photos(flickr, Photos, Ids): | ||
Photos = {str(k):v for k,v in Photos.items()} | ||
processed = Photos.keys() | ||
print(len(processed)) | ||
for id_now in Ids: | ||
if id_now in processed: | ||
continue | ||
else: | ||
Photos[id_now] = {} | ||
sizes = json.loads(flickr.photos.getSizes(photo_id = id_now, format='json')) | ||
info = json.loads(flickr.photos.getInfo(photo_id = id_now, format='json')) | ||
try: | ||
url_c = sizes['sizes']['size'][8]['source'] | ||
url_q = sizes['sizes']['size'][1]['source'] | ||
url_n = sizes['sizes']['size'][4]['source'] | ||
url_largest = sizes['sizes']['size'][-1]['source'] | ||
can = sizes['sizes']['candownload'] | ||
Photos[id_now]['candownload'] = can | ||
Photos[id_now]['url_c'] = url_c | ||
Photos[id_now]['url_q'] = url_q | ||
Photos[id_now]['url_n'] = url_n | ||
Photos[id_now]['url_largest'] = url_largest | ||
Photos[id_now]['others'] = sizes | ||
Photos[id_now]['info'] = info | ||
Photos[id_now]['owner'] = info['photo']['owner']['nsid'] | ||
Photos[id_now]['owner_loc'] = info['photo']['owner']['location'] | ||
Photos[id_now]['title'] = info['photo']['title']['_content'] | ||
Photos[id_now]['description'] = info['photo']['description']['_content'] | ||
Photos[id_now]['comments'] = info['photo']['comments']['_content'] | ||
Photos[id_now]['taken'] = info['photo']['dates']['taken'] | ||
Photos[id_now]['views'] = info['photo']['views'] | ||
Photos[id_now]['people'] = info['photo']['people']['haspeople'] | ||
Photos[id_now]['tags'] = info['photo']['tags']['tag'] | ||
Photos[id_now]['lat'] = info['photo']['location']['latitude'] | ||
Photos[id_now]['lon'] = info['photo']['location']['longitude'] | ||
Photos[id_now]['neighbourhood'] = info['photo']['location']['neighbourhood']['_content'] | ||
Photos[id_now]['url'] = info['photo']['urls']['url'][0]['_content'] | ||
|
||
if can: | ||
urllib.request.urlretrieve(url_q, args.image_dir+'150/{}.jpg'.format(id_now)) | ||
urllib.request.urlretrieve(url_n, args.image_dir+'320/{}.jpg'.format(id_now)) | ||
|
||
if len(processed)%20 ==1: | ||
print('{}/{} photos collected'.format(len(processed),len(Ids))) | ||
with open(args.image_dir+'Photo_sizes_pre_sep.p', 'wb') as fp: | ||
pickle.dump(Photos,fp, protocol=pickle.HIGHEST_PROTOCOL) | ||
with open(args.image_dir+'Photo_sizes.p', 'wb') as fp: | ||
pickle.dump(Photos,fp, protocol=pickle.HIGHEST_PROTOCOL) | ||
photo_df = pd.DataFrame(Photos).T.drop(['others','info'],axis=1) | ||
photo_df.to_csv(args.image_dir+'photos_sizes.csv', sep='\t',encoding='utf-8-sig') | ||
except Exception as e: | ||
print(e) | ||
print(id_now) | ||
continue | ||
|
||
with open(args.image_dir+'Photo_sizes_pre.p', 'wb') as fp: | ||
pickle.dump(Photos,fp, protocol=pickle.HIGHEST_PROTOCOL) | ||
|
||
with open(args.image_dir+'Photo_sizes.p', 'wb') as fp: | ||
pickle.dump(Photos,fp, protocol=pickle.HIGHEST_PROTOCOL) | ||
photo_df = pd.DataFrame(Photos).T.drop(['others','info'],axis=1) | ||
photo_df.to_csv(args.image_dir+'photos_sizes.csv', sep='\t',encoding='utf-8-sig') | ||
return Photos | ||
|
||
def main(): | ||
|
||
flickr = flickrapi.FlickrAPI(args.api_key, args.api_secret) | ||
|
||
if 'completed.p' in [files for root, dirs, files in os.walk(args.image_dir)][0]: | ||
with open(args.image_dir+'completed.p', 'rb') as fp: | ||
completed = pickle.load(fp) | ||
else: | ||
completed = {} | ||
|
||
if 'Photo_sizes.p' in [files for root, dirs, files in os.walk(args.image_dir)][0]: | ||
with open(args.image_dir+'Photo_sizes.p', 'rb') as fp: | ||
Photos = pickle.load(fp) | ||
else: | ||
Photos = {} | ||
|
||
for x in range(args.len_grid): | ||
for y in range(args.len_grid): | ||
if (x,y) in completed.keys(): | ||
continue | ||
|
||
lat,lon = get_latlon(x,y) | ||
|
||
if 'photo_ids_{}_{}.csv'.format(x,y) in [files for root, dirs, files in os.walk(args.save_dir)][0]: | ||
Ids = collect_ids(flickr, lat,lon, args.radius, tags=args.tags, x=x,y=y) | ||
else: | ||
Ids = collect_ids(flickr, lat,lon, args.radius, tags=args.tags, x=x,y=y) | ||
|
||
|
||
Photos = get_photos(flickr, Photos, Ids) | ||
|
||
completed[(x,y)] = {} | ||
completed[(x,y)]['lat'] = lat | ||
completed[(x,y)]['lon'] = lon | ||
completed[(x,y)]['collected'] = len(Ids) | ||
completed[(x,y)]['total'] = len(Photos) | ||
|
||
with open(args.save_dir+'completed.p', 'wb') as fp: | ||
pickle.dump(completed,fp, protocol=pickle.HIGHEST_PROTOCOL) | ||
|
||
completed_df = pd.DataFrame(completed).T | ||
completed_df.to_csv(args.save_dir+'completed.csv') | ||
|
||
|
||
if __name__ == "__main__": | ||
main() | ||
"""## END""" |
Oops, something went wrong.