From 69bf8a52272efc218bd6e6a7e209b35a45c7bde7 Mon Sep 17 00:00:00 2001 From: israr-ulhaq Date: Fri, 5 Jul 2024 12:29:13 +0100 Subject: [PATCH] retrieving file from S3 and retunr presignd url to the user --- app/main/download_data.py | 78 ++++++++++++++++++++--- app/main/forms.py | 2 +- app/main/routes.py | 57 ++++++++--------- app/static/src/css/custom.css | 10 --- app/templates/main/request-received.html | 2 +- app/templates/main/retrieve-download.html | 29 ++------- 6 files changed, 100 insertions(+), 78 deletions(-) diff --git a/app/main/download_data.py b/app/main/download_data.py index 83603c4..1ab76f0 100644 --- a/app/main/download_data.py +++ b/app/main/download_data.py @@ -1,5 +1,6 @@ import io import json +from collections import namedtuple from datetime import datetime from enum import StrEnum from typing import Any @@ -243,23 +244,80 @@ def process_api_response(query_params: dict) -> tuple: def process_async_download(query_params: dict): + """process async download request to start the background process. + :param query_params: (dict): Query parameters for the API request. + """ request_url = ( Config.DATA_STORE_API_HOST + "/trigger_async_download" + ("?" + urlencode(query_params, doseq=True) if query_params else "") ) - requests.get(request_url) + requests.post(request_url, timeout=20) -def retrieve_download_file(UUID: str): - if not UUID: - raise ValueError("UUID parameter is required") +def get_presigned_url(filename: str): + """Get the presigned link for the short time to retrieve the file from s3 bucket. + :param filename (str): object name which needs to be retrieved from s3 if exists + Raises:ValueError: If object doest not exists in S3, it will raise an error. + Returns:Returns the response the API. + """ + if not filename: + raise ValueError("filename is required") - response = get_response(Config.DATA_STORE_API_HOST, f"/retrieve-download/{UUID}") + response = get_response(Config.DATA_STORE_API_HOST, f"/get-presigned-url/{filename}") + return response - # if content_type == MIMETYPE.JSON: - # file_content = io.BytesIO(json.dumps(response.json()).encode("UTF-8")) - # elif content_type == MIMETYPE.XLSX: - # file_content = io.BytesIO(response.content) - return response +FileMetadata = namedtuple("FileMetadata", ["last_modified_date", "file_format", "file_size_str"]) + + +def get_find_download_file_metadata(filename: str) -> FileMetadata: + """To get the object metadata from S3 using the ovject Key + :param filename (str): object name to get the metadata + + Raises: + ValueError: If object doest not exists in S3, it will raise an error. + + Returns: FileMetadata: + - Returns the last modified date, + -file format, and human-readable file size. + """ + if not filename: + raise ValueError("filename is required") + + try: + response = get_response(Config.DATA_STORE_API_HOST, f"/get-find-download-metadata/{filename}") + print(response.status_code) + metadata = response.json() + file_size = metadata["ContentLength"] + file_size_str = get_human_readable_file_size(file_size) + last_modified_date = metadata["LastModified"] + content_type = metadata["ContentType"] + file_format = "" + + if content_type == MIMETYPE.XLSX: + file_format = "Microsoft Excel spreadsheet" + elif content_type == MIMETYPE.JSON: + file_format = "JSON" + else: + file_format = "Unknown type" + + return FileMetadata(last_modified_date, file_format, file_size_str) + + except requests.exceptions.RequestException as req_err: + raise RuntimeError(f"Request error when getting find file metadata: {req_err}") from req_err + + +def get_human_readable_file_size(file_size_bytes: int) -> str: + """Return a human-readable file size string. + :param file_size_bytes: file size in bytes, + :return: human-readable file size, + """ + + file_size_kb = round(file_size_bytes / 1024, 1) + if file_size_kb < 1024: + return f"{round(file_size_kb, 1)} KB" + elif file_size_kb < 1024 * 1024: + return f"{round(file_size_kb / 1024, 1)} MB" + else: + return f"{round(file_size_kb / (1024 * 1024), 1)} GB" diff --git a/app/main/forms.py b/app/main/forms.py index aa5c272..cf2585d 100644 --- a/app/main/forms.py +++ b/app/main/forms.py @@ -37,4 +37,4 @@ class DownloadForm(FlaskForm): class RetrieveForm(FlaskForm): - download = SubmitField("Download Your Data") + download = SubmitField("Download Your Data", widget=GovSubmitInput()) diff --git a/app/main/routes.py b/app/main/routes.py index 79f6f2e..f2edaf2 100644 --- a/app/main/routes.py +++ b/app/main/routes.py @@ -10,7 +10,6 @@ current_app, g, jsonify, - Response, ) # isort: on @@ -19,19 +18,19 @@ from fsd_utils.authentication.decorators import login_requested, login_required from werkzeug.exceptions import HTTPException -from app.const import MIMETYPE from app.main import bp from app.main.download_data import ( FormNames, financial_quarter_from_mapping, financial_quarter_to_mapping, + get_find_download_file_metadata, get_fund_checkboxes, get_org_checkboxes, get_outcome_checkboxes, + get_presigned_url, get_region_checkboxes, get_returns, process_async_download, - retrieve_download_file, ) from app.main.forms import DownloadForm, RetrieveForm @@ -131,47 +130,43 @@ def request_received(): return render_template("request-received.html", user_email=g.user.email) -@bp.route("/retrieve-download/", methods=["GET", "POST"]) +@bp.route("/get-presigned-url/", methods=["GET", "POST"]) @login_required(return_app=SupportedApp.POST_AWARD_FRONTEND) -def retrieve_download(UUID: str): - response = retrieve_download_file(UUID) - file_size = int(response.headers.get("content-length", -1)) - content_type = response.headers.get("content-type") - file_format = "" - - print(content_type) - if content_type == "application/octet-stream": - file_format = "Microsoft spreadsheet" - elif content_type == MIMETYPE.JSON: - file_format = "JSON" - else: - file_format = "Unknow type" - - form = RetrieveForm() - context = {"UUID": UUID, "file_format": file_format, "file_size": file_size} +def retrieve_download(filename: str): + """Get file from S3, send back to user with presigned link + and file metadata, if file is not exist + return file not found page + :param: filename (str):filename of the file which needs to be retrieved with metadata + Returns: redirect to presigned url + """ + response = get_presigned_url(filename) if response.status_code == 404: return render_template("file-not-found.html") + + file_metadata = get_find_download_file_metadata(filename) + form = RetrieveForm() + context = { + "filename": filename, + "file_size": file_metadata.file_size_str, + "file_format": file_metadata.file_format, + "date": file_metadata.last_modified_date, + } if form.validate_on_submit(): if response.status_code == 200: + presigned_url = response.json() try: + user_id = (g.account_id,) current_app.logger.info( - "Request for download by {{user_id=}}", + "Request for download by user_id={user_id}", extra={ - "user_id": g.account_id, + "user_id": user_id, "email": g.user.email, }, ) - return Response( - response.iter_content(chunk_size=10 * 1024), - headers={ - "Content-Disposition": response.headers.get("Content-Disposition"), - "Content-Type": response.headers.get("Content-Type"), - }, - ) + return redirect(presigned_url) + except ValueError: return jsonify({"error": "Invalid response from data store"}), 500 - else: - return jsonify({"error": f"Error retrieving file: {response.status_code}"}), response.status_code else: return render_template("retrieve-download.html", context=context, form=form) diff --git a/app/static/src/css/custom.css b/app/static/src/css/custom.css index 474a424..30da754 100644 --- a/app/static/src/css/custom.css +++ b/app/static/src/css/custom.css @@ -3,16 +3,6 @@ } -.govuk-button { - background-color: #1d70b8; -} - - -.govuk-button:hover { - background-color: #12066d; -} - - .govuk-footer__meta { display: flex; margin-right: -15px; diff --git a/app/templates/main/request-received.html b/app/templates/main/request-received.html index 8ad5f23..cd3ea8c 100644 --- a/app/templates/main/request-received.html +++ b/app/templates/main/request-received.html @@ -12,7 +12,7 @@

What happens next

- We will email a link to {{ context["user_email"] }}. + We will email a link to {{ user_email }}.

This may take up to 5 minutes to be delivered to your inbox.

diff --git a/app/templates/main/retrieve-download.html b/app/templates/main/retrieve-download.html index d418ef1..1ab72ae 100644 --- a/app/templates/main/retrieve-download.html +++ b/app/templates/main/retrieve-download.html @@ -5,34 +5,13 @@

Your data is ready to be downloaded

-

You requested a data download on {date}. -
File format: {{ context.file_format }}, {{ context.file_size }} KB

+

You requested a data download on {{ context.date }}. +
File format: {{ context.file_format }}, {{ context.file_size }}

-
+ {{ form.csrf_token }} - - - + {{ form.download }}
{% endblock content %} - -{% block bodyEnd %} - -{% endblock bodyEnd %}