From 2e89ea9fc8b012638f609f63665d56247ccca80e Mon Sep 17 00:00:00 2001 From: "douglas.petronilio" Date: Fri, 20 Mar 2020 17:15:28 -0300 Subject: [PATCH 1/4] get info from bucket --- .../management/commands/update_report.py | 21 ++++++++++++++++--- api/settings.py | 6 ++++++ docker-compose.yaml | 7 ++++++- requirements.txt | 1 + 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/api/report/management/commands/update_report.py b/api/report/management/commands/update_report.py index dc29ceb..adf58c8 100644 --- a/api/report/management/commands/update_report.py +++ b/api/report/management/commands/update_report.py @@ -7,6 +7,8 @@ from apscheduler.schedulers.blocking import BlockingScheduler from django.core.management.base import BaseCommand from django.utils.timezone import make_aware +from django.conf import settings +import boto3 from api.report.models import * @@ -17,11 +19,24 @@ def cron(*args, **options): if 6 <= datetime.now().hour <= 20: print(f"Cron job is running. The time is {datetime.now()}") - request = requests.get(url) - content = request.content.decode('utf8').replace('var database=', '') + s3resource = boto3.resource('s3', + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) + + bucket_name = settings.AWS_STORAGE_BUCKET_NAME + + obj = s3resource.Object(bucket_name,"ministerio_saude_brasil/2020-03-20/16-47/rawData.json") + + # request = requests.get(url) + + # content = request.content.decode('utf8').replace('var database=', '') + # data = json.loads(content) + content = obj.get()['Body'].read().decode('utf-8') data = json.loads(content) + print('Object body: {}'.format(data['brazil'])) + for record in data['brazil']: date_time = datetime.strptime(f"{record['date']} {record['time']}", '%d/%m/%Y %H:%M') @@ -57,6 +72,6 @@ def handle(self, *args, **options): print('Cron started! Wait the job starts!') scheduler = BlockingScheduler() - scheduler.add_job(cron, 'cron', minutes=20, timezone='America/Maceio') + scheduler.add_job(cron, 'interval', minutes=1, timezone=settings.TIME_ZONE) scheduler.start() diff --git a/api/settings.py b/api/settings.py index a0b3969..58b61e0 100644 --- a/api/settings.py +++ b/api/settings.py @@ -62,3 +62,9 @@ # https://docs.djangoproject.com/en/3.0/howto/static-files/ STATIC_URL = '/static/' + +AWS_ACCESS_KEY_ID = os.environ['AWS_ACCESS_KEY_ID'] +AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY'] +AWS_STORAGE_BUCKET_NAME = os.environ['AWS_STORAGE_BUCKET_NAME'] +AWS_LOCATION = os.environ['AWS_LOCATION'] + diff --git a/docker-compose.yaml b/docker-compose.yaml index ef9910f..5437416 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -6,4 +6,9 @@ services: context: . target: release image: hummingbird - \ No newline at end of file + environment: + AWS_ACCESS_KEY_ID: + AWS_SECRET_ACCESS_KEY: + AWS_STORAGE_BUCKET_NAME: + AWS_LOCATION: + diff --git a/requirements.txt b/requirements.txt index 7b5a57c..88ad692 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ requests==2.23.0 sqlparse==0.3.1 urllib3==1.25.8 apscheduler==3.6.3 +boto3==1.12.25 From 1d42f288d86ebc72e14195bfc10f7c1143095528 Mon Sep 17 00:00:00 2001 From: "douglas.petronilio" Date: Fri, 20 Mar 2020 18:31:35 -0300 Subject: [PATCH 2/4] get url from bino --- api/report/management/commands/update_report.py | 7 +++---- api/settings.py | 2 +- docker-compose.yaml | 8 +++++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/api/report/management/commands/update_report.py b/api/report/management/commands/update_report.py index adf58c8..a9b4062 100644 --- a/api/report/management/commands/update_report.py +++ b/api/report/management/commands/update_report.py @@ -25,13 +25,12 @@ def cron(*args, **options): aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) bucket_name = settings.AWS_STORAGE_BUCKET_NAME + url = "%s%s"%(settings.BINO_URL, 'crawl/ministerio_saude_brasil') + response = requests.post(url = url, data = {}) + file_name = json.loads(response.content.decode('utf-8'))['path'] obj = s3resource.Object(bucket_name,"ministerio_saude_brasil/2020-03-20/16-47/rawData.json") - # request = requests.get(url) - - # content = request.content.decode('utf8').replace('var database=', '') - # data = json.loads(content) content = obj.get()['Body'].read().decode('utf-8') data = json.loads(content) diff --git a/api/settings.py b/api/settings.py index 58b61e0..eadf475 100644 --- a/api/settings.py +++ b/api/settings.py @@ -15,7 +15,6 @@ # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/ @@ -67,4 +66,5 @@ AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY'] AWS_STORAGE_BUCKET_NAME = os.environ['AWS_STORAGE_BUCKET_NAME'] AWS_LOCATION = os.environ['AWS_LOCATION'] +BINO_URL = os.environ['BINO_URL'] diff --git a/docker-compose.yaml b/docker-compose.yaml index 5437416..3ae1162 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,8 +7,10 @@ services: target: release image: hummingbird environment: - AWS_ACCESS_KEY_ID: - AWS_SECRET_ACCESS_KEY: - AWS_STORAGE_BUCKET_NAME: + AWS_ACCESS_KEY_ID: + AWS_SECRET_ACCESS_KEY: + AWS_STORAGE_BUCKET_NAME: AWS_LOCATION: + BINO_URL: 'http://localhost:8080/' + network_mode: host From 72d0f70773f88eb9cf7d7649c1af17f616df16f0 Mon Sep 17 00:00:00 2001 From: "douglas.petronilio" Date: Fri, 20 Mar 2020 18:47:59 -0300 Subject: [PATCH 3/4] Improve path to crawl --- api/report/management/commands/update_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/report/management/commands/update_report.py b/api/report/management/commands/update_report.py index a9b4062..745afad 100644 --- a/api/report/management/commands/update_report.py +++ b/api/report/management/commands/update_report.py @@ -25,7 +25,7 @@ def cron(*args, **options): aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) bucket_name = settings.AWS_STORAGE_BUCKET_NAME - url = "%s%s"%(settings.BINO_URL, 'crawl/ministerio_saude_brasil') + url = "%s%s"%(settings.BINO_URL, '/crawl/ministerio_saude_brasil') response = requests.post(url = url, data = {}) file_name = json.loads(response.content.decode('utf-8'))['path'] From b23c77ef15db61cb00bd33ae65042bd5f220c781 Mon Sep 17 00:00:00 2001 From: "douglas.petronilio" Date: Fri, 20 Mar 2020 18:50:49 -0300 Subject: [PATCH 4/4] change line bucket_name to be together where is used --- api/report/management/commands/update_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/report/management/commands/update_report.py b/api/report/management/commands/update_report.py index 745afad..629774c 100644 --- a/api/report/management/commands/update_report.py +++ b/api/report/management/commands/update_report.py @@ -24,11 +24,11 @@ def cron(*args, **options): aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) - bucket_name = settings.AWS_STORAGE_BUCKET_NAME url = "%s%s"%(settings.BINO_URL, '/crawl/ministerio_saude_brasil') response = requests.post(url = url, data = {}) file_name = json.loads(response.content.decode('utf-8'))['path'] + bucket_name = settings.AWS_STORAGE_BUCKET_NAME obj = s3resource.Object(bucket_name,"ministerio_saude_brasil/2020-03-20/16-47/rawData.json") content = obj.get()['Body'].read().decode('utf-8')