Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions apigateway/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from functools import wraps
from typing import Tuple
from urllib.parse import urljoin
import re
import os

import jsondiff as jd
import requests
Expand Down Expand Up @@ -251,8 +253,20 @@ def _construct_remote_url(self) -> str:
Returns:
str: The URL of the remote server.
"""
verify_url_regex = re.compile(r"([12]\d\d\d[A-Za-z&\.]{5}[A-Za-z0-9\.]{9}[A-Z\.]/verify_url\:)(http[s]?\://)(.*)")

path = request.full_path.replace(self._deploy_path, "", 1)
path = path[1:] if path.startswith("/") else path
#This block exists because of an incompatibility between urlparse.urljoin and urllib.parse.urljoin
#This incompatibility results in http(s):// -> http(s):/ if the proc spec occurs in the middle of the url.
try:
resolver_check = verify_url_regex.match(path)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will the bibcode+verify_url:https://path... always be at the beginning of the path?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the particular issue we are handling, yes. We will need to adapt this when we move away from bibcodes, but I would hope it would just be mimicking the fix in resolver-gateway

if resolver_check:
resolver_groups = resolver_check.groups()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pretty sure this will only ever return one item in the tuple, since there's only one capturing group in your regex

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah... not sure what happened but an older version of the regex was pushed with the newer normalizing code. Should be fixed now.

return str(self._remote_base_url) + "/" + os.path.normpath(resolver_groups[0]) \
+ resolver_groups[1] + os.path.normpath(resolver_groups[2])
except ValueError:
current_app.logger.exception("Failed to properly check url path for resolver verify_url path.")
return urljoin(self._remote_base_url, path)


Expand Down