From 789399ae2e3f5be44ef8c2d818bb3615826581cd Mon Sep 17 00:00:00 2001 From: israr-ulhaq Date: Tue, 9 Jul 2024 11:56:32 +0100 Subject: [PATCH 1/3] Add pages for downloading files from find --- app/main/download_data.py | 82 +++++++++++++++++++++++ app/main/forms.py | 4 ++ app/main/routes.py | 52 +++++++++----- app/static/src/css/custom.css | 10 --- app/templates/main/file-not-found.html | 26 +++++++ app/templates/main/request-received.html | 2 +- app/templates/main/retrieve-download.html | 17 +++++ tests/test_download_data.py | 0 tests/test_routes.py | 62 +++++++++++++++++ 9 files changed, 228 insertions(+), 27 deletions(-) create mode 100644 app/templates/main/file-not-found.html create mode 100644 app/templates/main/retrieve-download.html create mode 100644 tests/test_download_data.py diff --git a/app/main/download_data.py b/app/main/download_data.py index a5befa6..a69b1e8 100644 --- a/app/main/download_data.py +++ b/app/main/download_data.py @@ -1,9 +1,12 @@ import io import json +from collections import namedtuple from datetime import datetime from enum import StrEnum from typing import Any +from urllib.parse import urlencode +import requests from flask import abort, current_app from app.const import MIMETYPE @@ -238,3 +241,82 @@ def process_api_response(query_params: dict) -> tuple: return abort(500), f"Unknown content type: {content_type}" return content_type, file_content + + +def process_async_download(query_params: dict): + """process async download request to start the background process. + :param query_params: (dict): Query parameters for the API request. + """ + request_url = ( + Config.DATA_STORE_API_HOST + + "/trigger_async_download" + + ("?" + urlencode(query_params, doseq=True) if query_params else "") + ) + requests.post(request_url, timeout=20) + + +def get_presigned_url(filename: str): + """Get the presigned link for the short time to retrieve the file from s3 bucket. + :param filename (str): object name which needs to be retrieved from s3 if exists + Raises:ValueError: If object doest not exists in S3, it will raise an error. + Returns:Returns the response the API. + """ + if not filename: + raise ValueError("filename is required") + + response = get_response(Config.DATA_STORE_API_HOST, f"/get-presigned-url/{filename}") + return response.json()["presigned_url"] + + +FileMetadata = namedtuple("FileMetadata", ["response_status_code", "formated_date", "file_format", "file_size_str"]) + + +def get_find_download_file_metadata(filename: str) -> FileMetadata: + """To get the object metadata from S3 using the ovject Key + :param filename (str): object name to get the metadata + + Raises: + ValueError: If object doest not exists in S3, it will raise an error. + + Returns: FileMetadata: + - Returns the last modified date, + -file format, and human-readable file size. + """ + response = get_response(Config.DATA_STORE_API_HOST, f"/get-find-download-metadata/{filename}") + response_status_code = response.status_code + + if response_status_code == 200: + metadata = response.json() + file_size = metadata["content_length"] + file_size_str = get_human_readable_file_size(file_size) + last_modified_date = metadata["last_modified"] + content_type = metadata["content_type"] + + date = datetime.fromisoformat(last_modified_date) + formated_date = date.strftime("%d %B %Y") + + if content_type == MIMETYPE.XLSX: + file_format = "Microsoft Excel spreadsheet" + elif content_type == MIMETYPE.JSON: + file_format = "JSON" + else: + file_format = "Unknown type" + + return FileMetadata(response_status_code, formated_date, file_format, file_size_str) + else: + return FileMetadata(response_status_code, None, None, None) + + +def get_human_readable_file_size(file_size_bytes: int) -> str: + """Return a human-readable file size string. + :param file_size_bytes: file size in bytes, + :return: human-readable file size, + """ + + file_size_kb = round(file_size_bytes / 1024, 1) + if file_size_kb < 1024: + return f"{round(file_size_kb, 1)} KB" + elif file_size_kb < 1024 * 1024: + return f"{round(file_size_kb / 1024, 1)} MB" + else: + return f"{round(file_size_kb / (1024 * 1024), 1)} GB" diff --git a/app/main/forms.py b/app/main/forms.py index ef30226..cf2585d 100644 --- a/app/main/forms.py +++ b/app/main/forms.py @@ -34,3 +34,7 @@ class DownloadForm(FlaskForm): default=None, ) download = SubmitField("Download", widget=GovSubmitInput()) + + +class RetrieveForm(FlaskForm): + download = SubmitField("Download Your Data", widget=GovSubmitInput()) diff --git a/app/main/routes.py b/app/main/routes.py index c218cf0..e6ba3db 100644 --- a/app/main/routes.py +++ b/app/main/routes.py @@ -1,5 +1,4 @@ # isort: off -from datetime import datetime from flask import ( flash, @@ -7,7 +6,6 @@ render_template, request, url_for, - send_file, abort, current_app, g, @@ -24,14 +22,16 @@ FormNames, financial_quarter_from_mapping, financial_quarter_to_mapping, + get_find_download_file_metadata, get_fund_checkboxes, get_org_checkboxes, get_outcome_checkboxes, + get_presigned_url, get_region_checkboxes, get_returns, - process_api_response, + process_async_download, ) -from app.main.forms import DownloadForm +from app.main.forms import DownloadForm, RetrieveForm @bp.route("/", methods=["GET"]) @@ -86,8 +86,6 @@ def download(): to_quarter = request.form.get("to-quarter") to_year = request.form.get("to-year") - current_datetime = datetime.now().strftime("%Y-%m-%d-%H%M%S") - reporting_period_start = ( financial_quarter_from_mapping(quarter=from_quarter, year=from_year) if to_quarter and to_year else None ) @@ -96,7 +94,7 @@ def download(): financial_quarter_to_mapping(quarter=to_quarter, year=to_year) if to_quarter and to_year else None ) - query_params = {"file_format": file_format} + query_params = {"email_address": g.user.email, "file_format": file_format} if orgs: query_params["organisations"] = orgs if regions: @@ -110,7 +108,7 @@ def download(): if reporting_period_end: query_params["rp_end"] = reporting_period_end - content_type, file_content = process_api_response(query_params) + process_async_download(query_params) # Todo: Change it to async current_app.logger.info( "Request for download by {user_id} with {query_params}", @@ -121,21 +119,43 @@ def download(): "request_type": "download", }, ) - return send_file( - file_content, - download_name=f"download-{current_datetime}.{file_format}", - as_attachment=True, - mimetype=content_type, - ) + + return redirect(url_for("main.request_received")) @bp.route("/request-received", methods=["GET", "POST"]) @login_required(return_app=SupportedApp.POST_AWARD_FRONTEND) def request_received(): + return render_template("request-received.html", user_email=g.user.email) + + +@bp.route("/retrieve-download/", methods=["GET", "POST"]) +@login_required(return_app=SupportedApp.POST_AWARD_FRONTEND) +def retrieve_download(filename: str): + """Get file from S3, send back to user with presigned link + and file metadata, if file is not exist + return file not found page + :param: filename (str):filename of the file which needs to be retrieved with metadata + Returns: redirect to presigned url + """ + file_metadata = get_find_download_file_metadata(filename) + if file_metadata.response_status_code == 404: + if request.method == "POST": + return redirect(url_for(".retrieve_download", filename=filename)) + return render_template("file-not-found.html") + form = RetrieveForm() context = { - "user_email": g.user.email, + "filename": filename, + "file_size": file_metadata.file_size_str, + "file_format": file_metadata.file_format, + "date": file_metadata.formated_date, } - return render_template("request-received.html", context=context) + if form.validate_on_submit(): + presigned_url = get_presigned_url(filename) + return redirect(presigned_url) + + else: + return render_template("retrieve-download.html", context=context, form=form) @bp.route("/accessibility", methods=["GET"]) diff --git a/app/static/src/css/custom.css b/app/static/src/css/custom.css index 474a424..30da754 100644 --- a/app/static/src/css/custom.css +++ b/app/static/src/css/custom.css @@ -3,16 +3,6 @@ } -.govuk-button { - background-color: #1d70b8; -} - - -.govuk-button:hover { - background-color: #12066d; -} - - .govuk-footer__meta { display: flex; margin-right: -15px; diff --git a/app/templates/main/file-not-found.html b/app/templates/main/file-not-found.html new file mode 100644 index 0000000..570fb27 --- /dev/null +++ b/app/templates/main/file-not-found.html @@ -0,0 +1,26 @@ +{% extends "base.html" %} + +{%- from 'govuk_frontend_jinja/components/back-link/macro.html' import govukBackLink -%} + +{% block pageTitle %} +Page not found – {{ config['SERVICE_NAME'] }} – GOV.UK +{% endblock pageTitle %} + +{% set mainClasses = "govuk-main-wrapper--l" %} + +{% block beforeContent %} + {{ govukBackLink({ + 'text': "Back", + 'href': url_for('main.download') + }) }} +{% endblock beforeContent %} + +{% block content %} +
+
+

Your link to download data has expired

+

Your data download link has expired.

+

You can create a new data download using new filter options.

+
+
+{% endblock content %} diff --git a/app/templates/main/request-received.html b/app/templates/main/request-received.html index 8ad5f23..cd3ea8c 100644 --- a/app/templates/main/request-received.html +++ b/app/templates/main/request-received.html @@ -12,7 +12,7 @@

What happens next

- We will email a link to {{ context["user_email"] }}. + We will email a link to {{ user_email }}.

This may take up to 5 minutes to be delivered to your inbox.

diff --git a/app/templates/main/retrieve-download.html b/app/templates/main/retrieve-download.html new file mode 100644 index 0000000..1ab72ae --- /dev/null +++ b/app/templates/main/retrieve-download.html @@ -0,0 +1,17 @@ +{% extends "base.html" %} + +{% block content %} +
+
+

Your data is ready to be downloaded

+
+

You requested a data download on {{ context.date }}. +
File format: {{ context.file_format }}, {{ context.file_size }}

+
+
+ {{ form.csrf_token }} + {{ form.download }} +
+
+
+{% endblock content %} diff --git a/tests/test_download_data.py b/tests/test_download_data.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_routes.py b/tests/test_routes.py index 9252f08..053a0f4 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -5,6 +5,8 @@ import pytest from bs4 import BeautifulSoup +from app.main.download_data import FileMetadata + def test_index_page_redirect(flask_test_client): response = flask_test_client.get("/") @@ -152,3 +154,63 @@ def test_user_not_signed(unauthenticated_flask_test_client): response = unauthenticated_flask_test_client.get("/request-received") assert response.status_code == 302 assert response.location == "authenticator/sessions/sign-out?return_app=post-award-frontend" + + +def test_download_file_exist(flask_test_client): + file_metadata = FileMetadata(200, "06 July 2024", "Microsoft Excel spreadsheet", "1 MB") + + with patch("app.main.routes.get_find_download_file_metadata", return_value=file_metadata): + response = flask_test_client.get( + "/retrieve-download/fund-monitoring-data-2024-07-05-11:18:45-e4c77136-18ca-4ba3-9896-0ce572984e72.json" + ) + + assert response.status_code == 200 + page = BeautifulSoup(response.text) + download_button = page.select_one("button#download") + assert download_button is not None + + +def test_file_not_found(flask_test_client): + file_metadata = FileMetadata(404, None, None, None) + + with patch("app.main.routes.get_find_download_file_metadata", return_value=file_metadata): + response = flask_test_client.get( + "/retrieve-download/fund-monitoring-data-2024-07-05-11:18:45-e4c77136-18ca-4ba3-9896-0ce572984e72.json" + ) + + assert response.status_code == 200 + page = BeautifulSoup(response.text) + download_button = page.select_one("button#download") + assert download_button is None + assert b"Your link to download data has expired" in response.data + + +def test_presigned_url( + flask_test_client, +): + presigned_url = "https://example/presigned-url" + file_metadata = FileMetadata(200, "06 July 2024", "Microsoft Excel spreadsheet", "1 MB") + with ( + patch("app.main.routes.get_find_download_file_metadata", return_value=file_metadata), + patch("app.main.routes.get_presigned_url", return_value=presigned_url), + ): + response = flask_test_client.post( + "/retrieve-download/fund-monitoring-data-2024-07-05-11:18:45-e4c77136-18ca-4ba3-9896-0ce572984e72.json" + ) + + assert response.status_code == 302 + assert response.location == presigned_url + + +def test_file_not_exist(flask_test_client): + file_metadata = FileMetadata(404, None, None, None) + with patch("app.main.routes.get_find_download_file_metadata", return_value=file_metadata): + response = flask_test_client.post( + "/retrieve-download/fund-monitoring-data-2024-07-05-11:18:45-e4c77136-18ca-4ba3-9896-0ce572984e72.json" + ) + + assert response.status_code == 302 + assert ( + response.location + == "/retrieve-download/fund-monitoring-data-2024-07-05-11:18:45-e4c77136-18ca-4ba3-9896-0ce572984e72.json" + ) From a3ca360c220c684001fb0d2466639a002bd2e949 Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Tue, 9 Jul 2024 11:58:44 +0100 Subject: [PATCH 2/3] Fix tests and remove change to async download --- app/main/download_data.py | 36 +++++++++++--------------- app/main/routes.py | 17 +++++++++--- app/templates/main/file-not-found.html | 2 +- tests/test_download_data.py | 33 +++++++++++++++++++++++ 4 files changed, 62 insertions(+), 26 deletions(-) diff --git a/app/main/download_data.py b/app/main/download_data.py index a69b1e8..61aa9e4 100644 --- a/app/main/download_data.py +++ b/app/main/download_data.py @@ -4,9 +4,7 @@ from datetime import datetime from enum import StrEnum from typing import Any -from urllib.parse import urlencode -import requests from flask import abort, current_app from app.const import MIMETYPE @@ -243,18 +241,6 @@ def process_api_response(query_params: dict) -> tuple: return content_type, file_content -def process_async_download(query_params: dict): - """process async download request to start the background process. - :param query_params: (dict): Query parameters for the API request. - """ - request_url = ( - Config.DATA_STORE_API_HOST - + "/trigger_async_download" - + ("?" + urlencode(query_params, doseq=True) if query_params else "") - ) - requests.post(request_url, timeout=20) - - def get_presigned_url(filename: str): """Get the presigned link for the short time to retrieve the file from s3 bucket. :param filename (str): object name which needs to be retrieved from s3 if exists @@ -294,19 +280,27 @@ def get_find_download_file_metadata(filename: str) -> FileMetadata: date = datetime.fromisoformat(last_modified_date) formated_date = date.strftime("%d %B %Y") - - if content_type == MIMETYPE.XLSX: - file_format = "Microsoft Excel spreadsheet" - elif content_type == MIMETYPE.JSON: - file_format = "JSON" - else: - file_format = "Unknown type" + file_format = get_file_format_from_content_type(content_type) return FileMetadata(response_status_code, formated_date, file_format, file_size_str) else: return FileMetadata(response_status_code, None, None, None) +def get_file_format_from_content_type(file_extension: str) -> str: + """Return nice file format name based on the file extension. + :param file_extension: file extension, + :return: nice file format name, + """ + + file_format = "Unknown file" + if file_extension == MIMETYPE.XLSX: + file_format = "Microsoft Excel spreadsheet" + elif file_extension == MIMETYPE.JSON: + file_format = "JSON file" + return file_format + + def get_human_readable_file_size(file_size_bytes: int) -> str: """Return a human-readable file size string. :param file_size_bytes: file size in bytes, diff --git a/app/main/routes.py b/app/main/routes.py index e6ba3db..58aa481 100644 --- a/app/main/routes.py +++ b/app/main/routes.py @@ -1,4 +1,5 @@ # isort: off +from datetime import datetime from flask import ( flash, @@ -9,6 +10,7 @@ abort, current_app, g, + send_file, ) # isort: on @@ -29,7 +31,7 @@ get_presigned_url, get_region_checkboxes, get_returns, - process_async_download, + process_api_response, ) from app.main.forms import DownloadForm, RetrieveForm @@ -86,6 +88,8 @@ def download(): to_quarter = request.form.get("to-quarter") to_year = request.form.get("to-year") + current_datetime = datetime.now().strftime("%Y-%m-%d-%H%M%S") + reporting_period_start = ( financial_quarter_from_mapping(quarter=from_quarter, year=from_year) if to_quarter and to_year else None ) @@ -94,7 +98,7 @@ def download(): financial_quarter_to_mapping(quarter=to_quarter, year=to_year) if to_quarter and to_year else None ) - query_params = {"email_address": g.user.email, "file_format": file_format} + query_params = {"file_format": file_format} if orgs: query_params["organisations"] = orgs if regions: @@ -108,7 +112,7 @@ def download(): if reporting_period_end: query_params["rp_end"] = reporting_period_end - process_async_download(query_params) # Todo: Change it to async + content_type, file_content = process_api_response(query_params) current_app.logger.info( "Request for download by {user_id} with {query_params}", @@ -120,7 +124,12 @@ def download(): }, ) - return redirect(url_for("main.request_received")) + return send_file( + file_content, + download_name=f"download-{current_datetime}.{file_format}", + as_attachment=True, + mimetype=content_type, + ) @bp.route("/request-received", methods=["GET", "POST"]) diff --git a/app/templates/main/file-not-found.html b/app/templates/main/file-not-found.html index 570fb27..09217f2 100644 --- a/app/templates/main/file-not-found.html +++ b/app/templates/main/file-not-found.html @@ -3,7 +3,7 @@ {%- from 'govuk_frontend_jinja/components/back-link/macro.html' import govukBackLink -%} {% block pageTitle %} -Page not found – {{ config['SERVICE_NAME'] }} – GOV.UK +Your link to download data has expired – {{ config['SERVICE_NAME'] }} – GOV.UK {% endblock pageTitle %} {% set mainClasses = "govuk-main-wrapper--l" %} diff --git a/tests/test_download_data.py b/tests/test_download_data.py index e69de29..e620afd 100644 --- a/tests/test_download_data.py +++ b/tests/test_download_data.py @@ -0,0 +1,33 @@ +import pytest + +from app.main.download_data import get_file_format_from_content_type, get_human_readable_file_size + + +@pytest.mark.parametrize( + "file_extension, expected_file_format", + [ + ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "Microsoft Excel spreadsheet"), + ("application/json", "JSON file"), + ("plain/text", "Unknown file"), + ("", "Unknown file"), + ], +) +def test_get_file_format_from_content_type(file_extension, expected_file_format): + """Test get_file_format_from_content_type() function with various file extensions.""" + assert get_file_format_from_content_type(file_extension) == expected_file_format + + +@pytest.mark.parametrize( + "file_size_bytes, expected_file_size_str", + [ + (1024, "1.0 KB"), + (1024 * 20 + 512, "20.5 KB"), + (1024 * 1024, "1.0 MB"), + (1024 * 1024 * 10.67, "10.7 MB"), + (1024 * 1024 * 1024, "1.0 GB"), + (1024 * 1024 * 1024 * 2.58, "2.6 GB"), + ], +) +def test_get_human_readable_file_size(file_size_bytes, expected_file_size_str): + """Test get_human_readable_file_size() function with various file sizes.""" + assert get_human_readable_file_size(file_size_bytes) == expected_file_size_str From 4dae423705cc346d3ff0efbc37f2784449fb6f99 Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Tue, 9 Jul 2024 12:04:52 +0100 Subject: [PATCH 3/3] Align implementation with prototype and design --- app/main/forms.py | 2 +- app/templates/main/file-not-found.html | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/app/main/forms.py b/app/main/forms.py index cf2585d..5683149 100644 --- a/app/main/forms.py +++ b/app/main/forms.py @@ -37,4 +37,4 @@ class DownloadForm(FlaskForm): class RetrieveForm(FlaskForm): - download = SubmitField("Download Your Data", widget=GovSubmitInput()) + download = SubmitField("Download your data", widget=GovSubmitInput()) diff --git a/app/templates/main/file-not-found.html b/app/templates/main/file-not-found.html index 09217f2..112615b 100644 --- a/app/templates/main/file-not-found.html +++ b/app/templates/main/file-not-found.html @@ -8,13 +8,6 @@ {% set mainClasses = "govuk-main-wrapper--l" %} -{% block beforeContent %} - {{ govukBackLink({ - 'text': "Back", - 'href': url_for('main.download') - }) }} -{% endblock beforeContent %} - {% block content %}