Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
08e8c93
[#38] initial migration to use pydantic v2
pkdash Aug 14, 2023
dca1631
[#38] pinning dependencies
pkdash Aug 14, 2023
00be384
[#38] updating with develop
pkdash Aug 16, 2023
d997914
[#38] merge branch 'develop' of https://github.com/I-GUIDE/catalogapi…
pkdash Sep 12, 2023
d205fdd
[#38] leaving a note Rocketry is not compatible with pydantic v2 for …
pkdash Sep 12, 2023
a0938f0
[#38] using a custom annotated type for url to string conversion
pkdash Sep 12, 2023
e76482a
[#38] setting default for optional model field
pkdash Sep 21, 2023
cba00f9
[#38] replacing rocketry (not compatible with pydantic v2) module wit…
pkdash Sep 21, 2023
ccb107e
[#38] replacing default_factory with default
pkdash Sep 26, 2023
c68dd11
[#38] replacing anyOf for type with type as string in schema generation
pkdash Sep 26, 2023
f993e4b
[#38] updating tests to check for field None value
pkdash Sep 26, 2023
a5ca4d4
[#38] updating default for optional list type model fields
pkdash Sep 26, 2023
eaacbde
[#38] updated json schema for dataset
pkdash Sep 26, 2023
3d9deff
Merge branch 'develop' of https://github.com/I-GUIDE/catalogapi into …
pkdash Oct 10, 2023
d455438
[#38] more customization of schema generation
pkdash Oct 10, 2023
4bad6ef
[#38] updated schema.json file
pkdash Oct 10, 2023
bd69aac
[#38] removing deprecated Config class
pkdash Oct 16, 2023
d761cae
[#38] updating beanie version to remove pydantic v2 warnings
pkdash Oct 16, 2023
0f61ba7
[#38] custom serialization of url field of Submission model
pkdash Oct 16, 2023
67298c5
[#38] custom serialization of field of type HttpUrl
pkdash Oct 16, 2023
c4c22db
[#38] moving extra keys of Field to json_schema_extra
pkdash Oct 16, 2023
486dba1
[#38] making SearchQuery model fields optional
pkdash Oct 16, 2023
71f7550
[#38] removing the key 'default' from generated schema if value is nu…
pkdash Oct 26, 2023
43d2d0a
[#38] updated json schema
pkdash Oct 26, 2023
513c88e
[#38] updating with develop
pkdash Jan 24, 2024
bf0b427
[#38] adding requests module to requirements
pkdash Jan 24, 2024
807eff1
[#38] re-generating the schema.json file
pkdash Jan 24, 2024
a5f0dae
[#38] updating tests for the schema changes
pkdash Jan 24, 2024
f5cd531
[#38] moving json schema customization to base class
pkdash Jan 30, 2024
a552324
[#38] replacing pydantic data type HttpUrl with custom data type Http…
pkdash Jan 31, 2024
337eae2
[#38] latest version of beanie fixes the extra field 'revision_id' issue
pkdash Feb 2, 2024
adc37da
[#38] removing duplicate make command
pkdash Feb 2, 2024
e2120ba
[#38] using custom type HttpUrlStr
pkdash Feb 16, 2024
43896a6
[#38] removing unused function
pkdash Feb 16, 2024
798a1b9
[#38] upgrading to latest version of FastAPI
pkdash Feb 16, 2024
18d3608
[#38] updating with develop and making pydantic v2 specific code changes
pkdash May 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ test:

.PHONY: pre-post
pre-post:
docker-compose run catalog-trigger python /app/triggers/management/change_streams_pre_and_post.py
docker-compose run catalog-trigger python /app/api/models/management/change_streams_pre_and_post.py
96 changes: 53 additions & 43 deletions api/adapters/hydroshare.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,43 @@
import requests
from datetime import datetime
from typing import List, Optional, Union
from pydantic import BaseModel, EmailStr, HttpUrl
from pydantic import BaseModel, EmailStr

from api.adapters.base import AbstractRepositoryMetadataAdapter, AbstractRepositoryRequestHandler
from api.adapters.base import (
AbstractRepositoryMetadataAdapter,
AbstractRepositoryRequestHandler,
)
from api.adapters.utils import RepositoryType, register_adapter
from api.exceptions import RepositoryException
from api.models import schema
from api.models.catalog import DatasetMetadataDOC
from api.models.user import Submission, SubmissionType
from api.models.schema import HttpUrlStr


class Creator(BaseModel):
name: Optional[str]
email: Optional[EmailStr]
organization: Optional[str]
homepage: Optional[HttpUrl]
address: Optional[str]
name: Optional[str] = None
email: Optional[EmailStr] = None
organization: Optional[str] = None
homepage: Optional[HttpUrlStr] = None
address: Optional[str] = None
identifiers: Optional[dict] = {}

def to_dataset_creator(self):
if self.name:
creator = schema.Creator.construct()
creator = schema.Creator.model_construct()
creator.name = self.name
if self.email:
creator.email = self.email
if self.organization:
affiliation = schema.Organization.construct()
affiliation = schema.Organization.model_construct()
affiliation.name = self.organization
creator.affiliation = affiliation
_ORCID_identifier = self.identifiers.get("ORCID", "")
if _ORCID_identifier:
creator.identifier = _ORCID_identifier
else:
creator = schema.Organization.construct()
creator = schema.Organization.model_construct()
creator.name = self.organization
if self.homepage:
creator.url = self.homepage
Expand All @@ -45,20 +49,20 @@ def to_dataset_creator(self):

class Award(BaseModel):
funding_agency_name: str
title: Optional[str]
number: Optional[str]
funding_agency_url: Optional[HttpUrl]
title: Optional[str] = None
number: Optional[str] = None
funding_agency_url: Optional[HttpUrlStr] = None

def to_dataset_grant(self):
grant = schema.Grant.construct()
grant = schema.Grant.model_construct()
if self.title:
grant.name = self.title
else:
grant.name = self.funding_agency_name
if self.number:
grant.identifier = self.number

funder = schema.Organization.construct()
funder = schema.Organization.model_construct()
funder.name = self.funding_agency_name
if self.funding_agency_url:
funder.url = self.funding_agency_url
Expand All @@ -72,7 +76,7 @@ class TemporalCoverage(BaseModel):
end: datetime

def to_dataset_temporal_coverage(self):
temp_cov = schema.TemporalCoverage.construct()
temp_cov = schema.TemporalCoverage.model_construct()
if self.start:
temp_cov.startDate = self.start
if self.end:
Expand All @@ -81,48 +85,50 @@ def to_dataset_temporal_coverage(self):


class SpatialCoverageBox(BaseModel):
name: Optional[str]
name: Optional[str] = None
northlimit: float
eastlimit: float
southlimit: float
westlimit: float

def to_dataset_spatial_coverage(self):
place = schema.Place.construct()
place = schema.Place.model_construct()
if self.name:
place.name = self.name

place.geo = schema.GeoShape.construct()
place.geo.box = f"{self.northlimit} {self.eastlimit} {self.southlimit} {self.westlimit}"
place.geo = schema.GeoShape.model_construct()
place.geo.box = (
f"{self.northlimit} {self.eastlimit} {self.southlimit} {self.westlimit}"
)
return place


class SpatialCoveragePoint(BaseModel):
name: Optional[str]
name: Optional[str] = None
north: float
east: float

def to_dataset_spatial_coverage(self):
place = schema.Place.construct()
place = schema.Place.model_construct()
if self.name:
place.name = self.name
place.geo = schema.GeoCoordinates.construct()
place.geo = schema.GeoCoordinates.model_construct()
place.geo.latitude = self.north
place.geo.longitude = self.east
return place


class ContentFile(BaseModel):
file_name: str
url: HttpUrl
url: HttpUrlStr
size: int
content_type: str
logical_file_type: str
modified_time: datetime
checksum: str

def to_dataset_media_object(self):
media_object = schema.MediaObject.construct()
media_object = schema.MediaObject.model_construct()
media_object.contentUrl = self.url
media_object.encodingFormat = self.content_type
media_object.contentSize = f"{self.size/1000.00} KB"
Expand All @@ -138,13 +144,13 @@ class Relation(BaseModel):
def to_dataset_part_relation(self, relation_type: str):
relation = None
if relation_type == "IsPartOf" and self.type.endswith("is part of"):
relation = schema.IsPartOf.construct()
relation = schema.IsPartOf.model_construct()
elif relation_type == "HasPart" and self.type.endswith("resource includes"):
relation = schema.HasPart.construct()
relation = schema.HasPart.model_construct()
else:
return relation

description, url = self.value.rsplit(',', 1)
description, url = self.value.rsplit(",", 1)
relation.description = description.strip()
relation.url = url.strip()
relation.name = self.value
Expand All @@ -153,10 +159,10 @@ def to_dataset_part_relation(self, relation_type: str):

class Rights(BaseModel):
statement: str
url: HttpUrl
url: HttpUrlStr

def to_dataset_license(self):
_license = schema.License.construct()
_license = schema.License.model_construct()
_license.name = self.statement
_license.url = self.url
return _license
Expand All @@ -170,7 +176,9 @@ def get_metadata(self, record_id: str):
def make_request(url, file_list=False) -> Union[dict, List[dict]]:
response = requests.get(url)
if response.status_code != 200:
raise RepositoryException(status_code=response.status_code, detail=response.text)
raise RepositoryException(
status_code=response.status_code, detail=response.text
)
if not file_list:
return response.json()

Expand All @@ -180,7 +188,9 @@ def make_request(url, file_list=False) -> Union[dict, List[dict]]:
while response.json()["next"]:
response = requests.get(response.json()["next"])
if response.status_code != 200:
raise RepositoryException(status_code=response.status_code, detail=response.text)
raise RepositoryException(
status_code=response.status_code, detail=response.text
)
content_files.extend(response.json()["results"])
return content_files

Expand Down Expand Up @@ -219,21 +229,21 @@ def update_submission(submission: Submission, repo_record_id: str) -> Submission
class _HydroshareResourceMetadata(BaseModel):
title: str
abstract: str
url: HttpUrl
identifier: HttpUrl
url: HttpUrlStr
identifier: HttpUrlStr
creators: List[Creator]
created: datetime
modified: datetime
published: Optional[datetime]
subjects: Optional[List[str]]
published: Optional[datetime] = None
subjects: Optional[List[str]] = []
language: str
rights: Rights
awards: Optional[List[Award]]
spatial_coverage: Optional[Union[SpatialCoverageBox, SpatialCoveragePoint]]
period_coverage: Optional[TemporalCoverage]
relations: Optional[List[Relation]]
awards: Optional[List[Award]] = []
spatial_coverage: Optional[Union[SpatialCoverageBox, SpatialCoveragePoint]] = None
period_coverage: Optional[TemporalCoverage] = None
relations: Optional[List[Relation]] = []
citation: str
content_files: Optional[List[ContentFile]]
content_files: Optional[List[ContentFile]] = []

def to_dataset_creators(self):
creators = []
Expand Down Expand Up @@ -287,13 +297,13 @@ def to_dataset_license(self):

@staticmethod
def to_dataset_provider():
provider = schema.Organization.construct()
provider = schema.Organization.model_construct()
provider.name = RepositoryType.HYDROSHARE
provider.url = "https://www.hydroshare.org/"
return provider

def to_catalog_dataset(self):
dataset = DatasetMetadataDOC.construct()
dataset = DatasetMetadataDOC.model_construct()
dataset.provider = self.to_dataset_provider()
dataset.name = self.title
dataset.description = self.abstract
Expand Down
36 changes: 18 additions & 18 deletions api/authentication/fastapi_resource_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,24 @@ def fetch_jwks(well_known: dict) -> dict:


class JwtDecodeOptions(BaseModel):
verify_signature: Optional[bool]
verify_aud: Optional[bool]
verify_iat: Optional[bool]
verify_exp: Optional[bool]
verify_nbf: Optional[bool]
verify_iss: Optional[bool]
verify_sub: Optional[bool]
verify_jti: Optional[bool]
verify_at_hash: Optional[bool]
require_aud: Optional[bool]
require_iat: Optional[bool]
require_exp: Optional[bool]
require_nbf: Optional[bool]
require_iss: Optional[bool]
require_sub: Optional[bool]
require_jti: Optional[bool]
require_at_hash: Optional[bool]
leeway: Optional[int]
verify_signature: Optional[bool] = None
verify_aud: Optional[bool] = None
verify_iat: Optional[bool] = None
verify_exp: Optional[bool] = None
verify_nbf: Optional[bool] = None
verify_iss: Optional[bool] = None
verify_sub: Optional[bool] = None
verify_jti: Optional[bool] = None
verify_at_hash: Optional[bool] = None
require_aud: Optional[bool] = None
require_iat: Optional[bool] = None
require_exp: Optional[bool] = None
require_nbf: Optional[bool] = None
require_iss: Optional[bool] = None
require_sub: Optional[bool] = None
require_jti: Optional[bool] = None
require_at_hash: Optional[bool] = None
leeway: Optional[int] = None


class OidcResourceServer(SecurityBase):
Expand Down
13 changes: 8 additions & 5 deletions api/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from functools import lru_cache
from typing import Any

from pydantic import BaseSettings, HttpUrl
from pydantic_settings import BaseSettings, SettingsConfigDict
from dotenv import load_dotenv

from api.models.schema import HttpUrlStr

# had to use load_dotenv() to get the env variables to work during testing
load_dotenv()

Expand All @@ -17,21 +19,22 @@ class Settings(BaseSettings):
testing: bool = False

oidc_issuer: str
hydroshare_meta_read_url: HttpUrl
hydroshare_file_read_url: HttpUrl
hydroshare_meta_read_url: HttpUrlStr
hydroshare_file_read_url: HttpUrlStr
search_relevance_score_threshold: float = 1.4

def __init__(self, **data: Any) -> None:
super().__init__(**data)
if self.testing:
self.database_name = f"{self.database_name}"
self.hydroshare_meta_read_url = self.hydroshare_meta_read_url
self.hydroshare_file_read_url = self.hydroshare_file_read_url

@property
def db_connection_string(self):
return f"{self.db_protocol}://{self.db_username}:{self.db_password}@{self.db_host}/?retryWrites=true&w=majority"

class Config:
env_file = ".env"
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noting that I have since learned that if we leave the env_file Config out then it just reads environment variables which eases deployments. This comment is not relevant to these changes, just making a note here.



@lru_cache()
Expand Down
14 changes: 10 additions & 4 deletions api/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import os
from contextlib import asynccontextmanager

import uvicorn
from beanie import init_beanie
Expand All @@ -21,7 +22,13 @@
from api.exceptions import RepositoryException


app = FastAPI()
@asynccontextmanager
async def lifespan(app_: FastAPI):
await startup_db_client()
yield
await shutdown_db_client()

app = FastAPI(lifespan=lifespan)

app.add_middleware(
CORSMiddleware,
Expand All @@ -43,15 +50,13 @@ async def validation_exception_handler(request, exc: ValidationError):
status_code=status.HTTP_400_BAD_REQUEST)


@app.on_event("startup")
async def startup_db_client():
settings = get_settings()
app.mongodb_client = AsyncIOMotorClient(settings.db_connection_string)
app.mongodb = app.mongodb_client[settings.database_name]
await init_beanie(database=app.mongodb, document_models=[DatasetMetadataDOC, User, Submission])


@app.on_event("shutdown")
async def shutdown_db_client():
app.mongodb_client.close()

Expand Down Expand Up @@ -85,7 +90,8 @@ def handle_exit(self, sig: int, frame) -> None:
async def main():
"""Run FastAPI"""

server = Server(config=uvicorn.Config(app, workers=1, loop="asyncio", host="0.0.0.0", port=8000, forwarded_allow_ips="*"))
server = Server(config=uvicorn.Config(app, workers=1, loop="asyncio", host="0.0.0.0", port=8000,
forwarded_allow_ips="*"))
api = asyncio.create_task(server.serve())

await asyncio.wait([api])
Expand Down
Loading