Skip to content

Commit 689dc88

Browse files
committed
generate stats, remove redundant queries #205, upgrade sentry sdk
1 parent 901a86b commit 689dc88

File tree

2 files changed

+10
-146
lines changed

2 files changed

+10
-146
lines changed

mapswipe_workers/mapswipe_workers/generate_stats.py

Lines changed: 9 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,9 @@ def generate_stats(only_new_results):
4242

4343
filename = f'{DATA_PATH}/api-data/agg_res_by_project_id.csv'
4444
get_aggregated_results_by_project_id(filename)
45+
csv_to_geojson(filename, 'geom')
46+
csv_to_geojson(filename, 'centroid')
4547

46-
filename = f'{DATA_PATH}/api-data/agg_res_by_project_id_geom.csv'
47-
get_aggregated_results_by_project_id_geom(filename)
48-
csv_to_geojson(filename)
49-
50-
filename = f'{DATA_PATH}/api-data/agg_res_by_project_id_centroid.csv'
51-
get_aggregated_results_by_project_id_centroid(filename)
52-
csv_to_geojson(filename)
5348

5449
filename = f'{DATA_PATH}/api-data/agg_projects.csv'
5550
get_aggregated_projects(filename)
@@ -62,14 +57,8 @@ def generate_stats(only_new_results):
6257

6358
filename = f'{DATA_PATH}/api-data/agg_progress_by_project_id.csv'
6459
get_aggregated_progress_by_project_id(filename)
65-
66-
filename = f'{DATA_PATH}/api-data/agg_progress_by_project_id_geom.csv'
67-
get_aggregated_progress_by_project_id_geom(filename)
68-
csv_to_geojson(filename)
69-
70-
filename = f'{DATA_PATH}/api-data/agg_progress_by_project_id_centroid.csv'
71-
get_aggregated_progress_by_project_id_centroid(filename)
72-
csv_to_geojson(filename)
60+
csv_to_geojson(filename, 'geom')
61+
csv_to_geojson(filename, 'centroid')
7362

7463
logger.info('start to export csv file for %s projects based on given project_id_list' % len(project_id_list))
7564
for project_id in project_id_list:
@@ -240,63 +229,14 @@ def get_aggregated_results_by_project_id(filename):
240229
filename: str
241230
'''
242231

243-
pg_db = auth.postgresDB()
244-
sql_query = "COPY (SELECT * FROM aggregated_results_by_project_id) TO STDOUT WITH CSV HEADER"
245-
246-
with open(filename, 'w') as f:
247-
pg_db.copy_expert(sql_query, f)
248-
249-
del pg_db
250-
251-
logger.info('saved aggregated results by project_id to %s' % filename)
252-
253-
254-
def get_aggregated_results_by_project_id_geom(filename):
255-
'''
256-
Export results aggregated on project_id basis as csv file.
257-
258-
Parameters
259-
----------
260-
filename: str
261-
'''
262-
263232
pg_db = auth.postgresDB()
264233
sql_query = """COPY (
265234
SELECT
266235
r.*
267236
,p.name
268237
,p.project_details
269238
,ST_AsText(p.geom) as geom
270-
FROM
271-
aggregated_results_by_project_id as r , projects as p
272-
WHERE
273-
r.project_id = p.project_id
274-
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""
275-
276-
with open(filename, 'w') as f:
277-
pg_db.copy_expert(sql_query, f)
278-
279-
del pg_db
280-
281-
logger.info('saved aggregated results by project_id to %s' % filename)
282-
283-
284-
def get_aggregated_results_by_project_id_centroid(filename):
285-
'''
286-
Export results aggregated on project_id basis as csv file.
287-
288-
Parameters
289-
----------
290-
filename: str
291-
'''
292-
293-
pg_db = auth.postgresDB()
294-
sql_query = """COPY (
295-
SELECT
296-
r.*
297-
,p.name
298-
,p.project_details
299-
,ST_AsText(ST_Centroid(p.geom)) as geom
239+
,ST_AsText(ST_Centroid(p.geom)) as centroid
300240
FROM
301241
aggregated_results_by_project_id as r , projects as p
302242
WHERE
@@ -403,26 +343,6 @@ def get_aggregated_progress_by_project_id(filename):
403343
filename: str
404344
'''
405345

406-
pg_db = auth.postgresDB()
407-
sql_query = "COPY (SELECT * FROM aggregated_progress_by_project_id) TO STDOUT WITH CSV HEADER"
408-
409-
with open(filename, 'w') as f:
410-
pg_db.copy_expert(sql_query, f)
411-
412-
del pg_db
413-
414-
logger.info('saved aggregated progress by project_id to %s' % filename)
415-
416-
417-
def get_aggregated_progress_by_project_id_geom(filename):
418-
'''
419-
Export aggregated progress on a project_id basis as csv file.
420-
421-
Parameters
422-
----------
423-
filename: str
424-
'''
425-
426346
pg_db = auth.postgresDB()
427347
sql_query = """
428348
COPY (
@@ -431,37 +351,7 @@ def get_aggregated_progress_by_project_id_geom(filename):
431351
,p.name
432352
,p.project_details
433353
,ST_AsText(p.geom) as geom
434-
FROM
435-
aggregated_progress_by_project_id as r,
436-
projects as p
437-
WHERE
438-
p.project_id = r.project_id
439-
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""
440-
441-
with open(filename, 'w') as f:
442-
pg_db.copy_expert(sql_query, f)
443-
444-
del pg_db
445-
logger.info('saved aggregated progress by project_id to %s' % filename)
446-
447-
448-
def get_aggregated_progress_by_project_id_centroid(filename):
449-
'''
450-
Export aggregated progress on a project_id basis as csv file.
451-
452-
Parameters
453-
----------
454-
filename: str
455-
'''
456-
457-
pg_db = auth.postgresDB()
458-
sql_query = """
459-
COPY (
460-
SELECT
461-
r.*
462-
,p.name
463-
,p.project_details
464-
,ST_AsText(ST_Centroid(p.geom)) as geom
354+
,ST_AsText(ST_Centroid(p.geom)) as centroid
465355
FROM
466356
aggregated_progress_by_project_id as r,
467357
projects as p
@@ -597,38 +487,12 @@ def get_last_result():
597487
return last_update
598488

599489

600-
def csv_to_geojson(filename):
490+
def csv_to_geojson(filename, geometry_field='geom'):
601491
'''
602492
Use ogr2ogr to convert csv file to GeoJSON
603493
'''
604494

605-
outfile = filename.replace('csv', 'geojson')
606-
# need to remove file here because ogr2ogr can't overwrite when choosing GeoJSON
607-
if os.path.isfile(outfile):
608-
os.remove(outfile)
609-
filename_without_path = filename.split('/')[-1].replace('.csv', '')
610-
# TODO: remove geom column from normal attributes in sql query
611-
subprocess.run([
612-
"ogr2ogr",
613-
"-f",
614-
"GeoJSON",
615-
outfile,
616-
filename,
617-
"-sql",
618-
f'SELECT *, CAST(geom as geometry) FROM "{filename_without_path}"'
619-
], check=True)
620-
logger.info(f'converted {filename} to {outfile}.')
621-
622-
cast_datatypes_for_geojson(outfile)
623-
624-
625-
def csv_to_geojson_centroids(filename):
626-
'''
627-
Use ogr2ogr to convert csv file to GeoJSON
628-
'''
629-
630-
outfile = filename.replace('.csv', '_centroids.geojson')
631-
495+
outfile = filename.replace('.csv', f'_{geometry_field}.geojson')
632496
# need to remove file here because ogr2ogr can't overwrite when choosing GeoJSON
633497
if os.path.isfile(outfile):
634498
os.remove(outfile)
@@ -641,7 +505,7 @@ def csv_to_geojson_centroids(filename):
641505
outfile,
642506
filename,
643507
"-sql",
644-
f'SELECT *, ST_Centroid(CAST(geom as geometry)) FROM "{filename_without_path}"'
508+
f'SELECT *, CAST({geometry_field} as geometry) FROM "{filename_without_path}"'
645509
], check=True)
646510
logger.info(f'converted {filename} to {outfile}.')
647511

mapswipe_workers/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@ mapswipe-workers==3.0
44
psycopg2-binary==2.8.3
55
schedule==0.6.0
66
slackclient==2.1.0
7-
sentry-sdk==0.11.2
7+
sentry-sdk==0.12.3
88
python-dateutil==2.8.0

0 commit comments

Comments
 (0)