Skip to content
Open
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
08e8c93
[#38] initial migration to use pydantic v2
pkdash Aug 14, 2023
dca1631
[#38] pinning dependencies
pkdash Aug 14, 2023
00be384
[#38] updating with develop
pkdash Aug 16, 2023
d997914
[#38] merge branch 'develop' of https://github.com/I-GUIDE/catalogapi…
pkdash Sep 12, 2023
d205fdd
[#38] leaving a note Rocketry is not compatible with pydantic v2 for …
pkdash Sep 12, 2023
a0938f0
[#38] using a custom annotated type for url to string conversion
pkdash Sep 12, 2023
e76482a
[#38] setting default for optional model field
pkdash Sep 21, 2023
cba00f9
[#38] replacing rocketry (not compatible with pydantic v2) module wit…
pkdash Sep 21, 2023
ccb107e
[#38] replacing default_factory with default
pkdash Sep 26, 2023
c68dd11
[#38] replacing anyOf for type with type as string in schema generation
pkdash Sep 26, 2023
f993e4b
[#38] updating tests to check for field None value
pkdash Sep 26, 2023
a5ca4d4
[#38] updating default for optional list type model fields
pkdash Sep 26, 2023
eaacbde
[#38] updated json schema for dataset
pkdash Sep 26, 2023
3d9deff
Merge branch 'develop' of https://github.com/I-GUIDE/catalogapi into …
pkdash Oct 10, 2023
d455438
[#38] more customization of schema generation
pkdash Oct 10, 2023
4bad6ef
[#38] updated schema.json file
pkdash Oct 10, 2023
bd69aac
[#38] removing deprecated Config class
pkdash Oct 16, 2023
d761cae
[#38] updating beanie version to remove pydantic v2 warnings
pkdash Oct 16, 2023
0f61ba7
[#38] custom serialization of url field of Submission model
pkdash Oct 16, 2023
67298c5
[#38] custom serialization of field of type HttpUrl
pkdash Oct 16, 2023
c4c22db
[#38] moving extra keys of Field to json_schema_extra
pkdash Oct 16, 2023
486dba1
[#38] making SearchQuery model fields optional
pkdash Oct 16, 2023
71f7550
[#38] removing the key 'default' from generated schema if value is nu…
pkdash Oct 26, 2023
43d2d0a
[#38] updated json schema
pkdash Oct 26, 2023
513c88e
[#38] updating with develop
pkdash Jan 24, 2024
bf0b427
[#38] adding requests module to requirements
pkdash Jan 24, 2024
807eff1
[#38] re-generating the schema.json file
pkdash Jan 24, 2024
a5f0dae
[#38] updating tests for the schema changes
pkdash Jan 24, 2024
f5cd531
[#38] moving json schema customization to base class
pkdash Jan 30, 2024
a552324
[#38] replacing pydantic data type HttpUrl with custom data type Http…
pkdash Jan 31, 2024
337eae2
[#38] latest version of beanie fixes the extra field 'revision_id' issue
pkdash Feb 2, 2024
adc37da
[#38] removing duplicate make command
pkdash Feb 2, 2024
e2120ba
[#38] using custom type HttpUrlStr
pkdash Feb 16, 2024
43896a6
[#38] removing unused function
pkdash Feb 16, 2024
798a1b9
[#38] upgrading to latest version of FastAPI
pkdash Feb 16, 2024
18d3608
[#38] updating with develop and making pydantic v2 specific code changes
pkdash May 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 53 additions & 45 deletions api/adapters/hydroshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from typing import List, Optional, Union
from pydantic import BaseModel, EmailStr, HttpUrl

from api.adapters.base import AbstractRepositoryMetadataAdapter, AbstractRepositoryRequestHandler
from api.adapters.base import (
AbstractRepositoryMetadataAdapter,
AbstractRepositoryRequestHandler,
)
from api.adapters.utils import RepositoryType, register_adapter
from api.exceptions import RepositoryException
from api.models import schema
Expand All @@ -12,31 +15,31 @@


class Creator(BaseModel):
name: Optional[str]
email: Optional[EmailStr]
organization: Optional[str]
homepage: Optional[HttpUrl]
address: Optional[str]
name: Optional[str] = None
email: Optional[EmailStr] = None
organization: Optional[str] = None
homepage: Optional[HttpUrl] = None
address: Optional[str] = None
identifiers: Optional[dict] = {}

def to_dataset_creator(self):
if self.name:
creator = schema.Creator.construct()
creator = schema.Creator.model_construct()
creator.name = self.name
if self.email:
creator.email = self.email
if self.organization:
affiliation = schema.Organization.construct()
affiliation = schema.Organization.model_construct()
affiliation.name = self.organization
creator.affiliation = affiliation
_ORCID_identifier = self.identifiers.get("ORCID", "")
if _ORCID_identifier:
creator.identifier = _ORCID_identifier
else:
creator = schema.Organization.construct()
creator = schema.Organization.model_construct()
creator.name = self.organization
if self.homepage:
creator.url = self.homepage
creator.url = str(self.homepage)
if self.address:
creator.address = self.address

Expand All @@ -45,23 +48,23 @@ def to_dataset_creator(self):

class Award(BaseModel):
funding_agency_name: str
title: Optional[str]
number: Optional[str]
funding_agency_url: Optional[HttpUrl]
title: Optional[str] = None
number: Optional[str] = None
funding_agency_url: Optional[HttpUrl] = None

def to_dataset_grant(self):
grant = schema.Grant.construct()
grant = schema.Grant.model_construct()
if self.title:
grant.name = self.title
else:
grant.name = self.funding_agency_name
if self.number:
grant.identifier = self.number

funder = schema.Organization.construct()
funder = schema.Organization.model_construct()
funder.name = self.funding_agency_name
if self.funding_agency_url:
funder.url = self.funding_agency_url
funder.url = str(self.funding_agency_url)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the Organization.funder.url attribute be updated to an HttpUrl instead of casting to a string here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


grant.funder = funder
return grant
Expand All @@ -72,7 +75,7 @@ class TemporalCoverage(BaseModel):
end: datetime

def to_dataset_temporal_coverage(self):
temp_cov = schema.TemporalCoverage.construct()
temp_cov = schema.TemporalCoverage.model_construct()
if self.start:
temp_cov.startDate = self.start
if self.end:
Expand All @@ -81,32 +84,34 @@ def to_dataset_temporal_coverage(self):


class SpatialCoverageBox(BaseModel):
name: Optional[str]
name: Optional[str] = None
northlimit: float
eastlimit: float
southlimit: float
westlimit: float

def to_dataset_spatial_coverage(self):
place = schema.Place.construct()
place = schema.Place.model_construct()
if self.name:
place.name = self.name

place.geo = schema.GeoShape.construct()
place.geo.box = f"{self.northlimit} {self.eastlimit} {self.southlimit} {self.westlimit}"
place.geo = schema.GeoShape.model_construct()
place.geo.box = (
f"{self.northlimit} {self.eastlimit} {self.southlimit} {self.westlimit}"
)
return place


class SpatialCoveragePoint(BaseModel):
name: Optional[str]
name: Optional[str] = None
north: float
east: float

def to_dataset_spatial_coverage(self):
place = schema.Place.construct()
place = schema.Place.model_construct()
if self.name:
place.name = self.name
place.geo = schema.GeoCoordinates.construct()
place.geo = schema.GeoCoordinates.model_construct()
place.geo.latitude = self.north
place.geo.longitude = self.east
return place
Expand All @@ -122,8 +127,8 @@ class ContentFile(BaseModel):
checksum: str

def to_dataset_media_object(self):
media_object = schema.MediaObject.construct()
media_object.contentUrl = self.url
media_object = schema.MediaObject.model_construct()
media_object.contentUrl = str(self.url)
media_object.encodingFormat = self.content_type
media_object.contentSize = f"{self.size/1000.00} KB"
media_object.name = self.file_name
Expand All @@ -137,13 +142,13 @@ class Relation(BaseModel):
def to_dataset_part_relation(self, relation_type: str):
relation = None
if relation_type == "IsPartOf" and self.type.endswith("is part of"):
relation = schema.IsPartOf.construct()
relation = schema.IsPartOf.model_construct()
elif relation_type == "HasPart" and self.type.endswith("resource includes"):
relation = schema.HasPart.construct()
relation = schema.HasPart.model_construct()
else:
return relation

description, url = self.value.rsplit(',', 1)
description, url = self.value.rsplit(",", 1)
relation.description = description.strip()
relation.url = url.strip()
relation.name = self.value
Expand All @@ -155,22 +160,23 @@ class Rights(BaseModel):
url: HttpUrl

def to_dataset_license(self):
_license = schema.License.construct()
_license = schema.License.model_construct()
_license.name = self.statement
_license.url = self.url
_license.url = str(self.url)
return _license


class _HydroshareRequestHandler(AbstractRepositoryRequestHandler):

def get_metadata(self, record_id: str):
hs_meta_url = self.settings.hydroshare_meta_read_url % record_id
hs_file_url = self.settings.hydroshare_file_read_url % record_id

def make_request(url, file_list=False) -> Union[dict, List[dict]]:
response = requests.get(url)
if response.status_code != 200:
raise RepositoryException(status_code=response.status_code, detail=response.text)
raise RepositoryException(
status_code=response.status_code, detail=response.text
)
if not file_list:
return response.json()

Expand All @@ -180,7 +186,9 @@ def make_request(url, file_list=False) -> Union[dict, List[dict]]:
while response.json()["next"]:
response = requests.get(response.json()["next"])
if response.status_code != 200:
raise RepositoryException(status_code=response.status_code, detail=response.text)
raise RepositoryException(
status_code=response.status_code, detail=response.text
)
content_files.extend(response.json()["results"])
return content_files

Expand Down Expand Up @@ -224,16 +232,16 @@ class _HydroshareResourceMetadata(BaseModel):
creators: List[Creator]
created: datetime
modified: datetime
published: Optional[datetime]
subjects: Optional[List[str]]
published: Optional[datetime] = None
subjects: Optional[List[str]] = []
language: str
rights: Rights
awards: Optional[List[Award]]
spatial_coverage: Optional[Union[SpatialCoverageBox, SpatialCoveragePoint]]
period_coverage: Optional[TemporalCoverage]
relations: Optional[List[Relation]]
awards: Optional[List[Award]] = []
spatial_coverage: Optional[Union[SpatialCoverageBox, SpatialCoveragePoint]] = None
period_coverage: Optional[TemporalCoverage] = None
relations: Optional[List[Relation]] = []
citation: str
content_files: Optional[List[ContentFile]]
content_files: Optional[List[ContentFile]] = []

def to_dataset_creators(self):
creators = []
Expand Down Expand Up @@ -287,18 +295,18 @@ def to_dataset_license(self):

@staticmethod
def to_dataset_provider():
provider = schema.Organization.construct()
provider = schema.Organization.model_construct()
provider.name = RepositoryType.HYDROSHARE
provider.url = "https://www.hydroshare.org/"
return provider

def to_catalog_dataset(self):
dataset = DatasetMetadataDOC.construct()
dataset = DatasetMetadataDOC.model_construct()
dataset.provider = self.to_dataset_provider()
dataset.name = self.title
dataset.description = self.abstract
dataset.url = self.url
dataset.identifier = [self.identifier]
dataset.url = str(self.url)
dataset.identifier = [str(self.identifier)]
dataset.creator = self.to_dataset_creators()
dataset.dateCreated = self.created
dataset.dateModified = self.modified
Expand Down
36 changes: 18 additions & 18 deletions api/authentication/fastapi_resource_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,24 @@ def fetch_jwks(well_known: dict) -> dict:


class JwtDecodeOptions(BaseModel):
verify_signature: Optional[bool]
verify_aud: Optional[bool]
verify_iat: Optional[bool]
verify_exp: Optional[bool]
verify_nbf: Optional[bool]
verify_iss: Optional[bool]
verify_sub: Optional[bool]
verify_jti: Optional[bool]
verify_at_hash: Optional[bool]
require_aud: Optional[bool]
require_iat: Optional[bool]
require_exp: Optional[bool]
require_nbf: Optional[bool]
require_iss: Optional[bool]
require_sub: Optional[bool]
require_jti: Optional[bool]
require_at_hash: Optional[bool]
leeway: Optional[int]
verify_signature: Optional[bool] = None
verify_aud: Optional[bool] = None
verify_iat: Optional[bool] = None
verify_exp: Optional[bool] = None
verify_nbf: Optional[bool] = None
verify_iss: Optional[bool] = None
verify_sub: Optional[bool] = None
verify_jti: Optional[bool] = None
verify_at_hash: Optional[bool] = None
require_aud: Optional[bool] = None
require_iat: Optional[bool] = None
require_exp: Optional[bool] = None
require_nbf: Optional[bool] = None
require_iss: Optional[bool] = None
require_sub: Optional[bool] = None
require_jti: Optional[bool] = None
require_at_hash: Optional[bool] = None
leeway: Optional[int] = None


class OidcResourceServer(SecurityBase):
Expand Down
8 changes: 5 additions & 3 deletions api/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from functools import lru_cache
from typing import Any

from pydantic import BaseSettings, HttpUrl
from pydantic import HttpUrl
from pydantic_settings import BaseSettings, SettingsConfigDict
from dotenv import load_dotenv

# had to use load_dotenv() to get the env variables to work during testing
Expand All @@ -24,13 +25,14 @@ def __init__(self, **data: Any) -> None:
super().__init__(**data)
if self.testing:
self.database_name = f"{self.database_name}"
self.hydroshare_meta_read_url = str(self.hydroshare_meta_read_url)
self.hydroshare_file_read_url = str(self.hydroshare_file_read_url)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, there must be something else going on with the HttpUrl->str conversions that I'm not understanding. Why are these lines in here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also was a big headche for me when doing this upgrade. See this pydantic discussion where it talks about the problem with pydantic HttpUrl type.
pydantic/pydantic#6395


@property
def db_connection_string(self):
return f"{self.db_protocol}://{self.db_username}:{self.db_password}@{self.db_host}/?retryWrites=true&w=majority"

class Config:
env_file = ".env"
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noting that I have since learned that if we leave the env_file Config out then it just reads environment variables which eases deployments. This comment is not relevant to these changes, just making a note here.



@lru_cache()
Expand Down
9 changes: 7 additions & 2 deletions api/models/catalog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
from typing import Optional

from beanie import Document

Expand All @@ -19,7 +20,7 @@ class Settings:
),
datetime.datetime: lambda dt: datetime.datetime(
year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute, second=dt.second
)
),
}

def as_submission(self) -> Submission:
Expand All @@ -31,6 +32,10 @@ def as_submission(self) -> Submission:
url=self.url,
)

def delete_revision_id(self):
if hasattr(self, "revision_id"):
del self.revision_id


class DatasetMetadataDOC(CoreMetadataDOC):
repository_identifier: str = None
repository_identifier: Optional[str] = None
17 changes: 9 additions & 8 deletions api/models/management/generate_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,23 @@


def main(output_name: str = "api/models/schemas/schema.json"):
schema = DatasetSchema.schema()
json_schema = DatasetSchema.schema_json()#indent=2)
schema = DatasetSchema.model_json_schema()
json_schema = json.dumps(schema)
# Have to run it a few times for the definitions to get updated before inserted into another model
while "#/definitions/" in json_schema:
for definition in schema["definitions"]:
class_definition = schema["definitions"][definition]
while "#/$defs/" in json_schema:
for definition in schema["$defs"]:
class_definition = schema["$defs"][definition]
# replace allOf with a single definition
json_schema = json_schema.replace(
f'"allOf": [{{"$ref": "#/definitions/{definition}"}}]',
f'"allOf": [{{"$ref": "#/$defs/{definition}"}}]',
json.dumps(class_definition)[1:-1]
)
#replace definition directly
# replace definition directly
json_schema = json_schema.replace(
f'"$ref": "#/definitions/{definition}"',
f'"$ref": "#/$defs/{definition}"',
json.dumps(class_definition)[1:-1]
)

embedded_schema = json.loads(json_schema)
current_directory = absolute_directory(output_name)
with open(current_directory, "w") as f:
Expand Down
Loading