Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
4 changes: 3 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ name: Run I-GUIDE Builds and Config Saturations
on:
workflow_dispatch:
push:
branches: ['*']
branches-ignore:
- productionalization
- develop

env:
DOMAIN: iguide.cuahsi.io
Expand Down
96 changes: 0 additions & 96 deletions .github/workflows/deploy-dev.yaml

This file was deleted.

35 changes: 21 additions & 14 deletions .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,30 @@ on:
workflow_dispatch:
push:
branches:
- 'develop'
- 'productionalization'

env:
DOMAIN: iguide.cuahsi.io
TAG: latest
DEPLOY_TO_PRODUCTION: ${{ github.ref == 'refs/heads/productionalization' && true || false }}
DOMAIN: ${{ env.DEPLOY_TO_PRODUCTION == false && 'iguide-dev.cuahsi.io' || 'iguide.cuahsi.io' }}
IP: ${{ env.DEPLOY_TO_PRODUCTION == false && 'iguide-dev' || 'iguide' }}
KUBE_CLUSTER_NAME: ${{ env.DEPLOY_TO_PRODUCTION == false && 'iguide-dev' || 'iguide' }}
TAG: ${{ env.DEPLOY_TO_PRODUCTION == false && github.sha || 'latest' }}
TESTING: false
IP: iguide
OIDC_ISSUER: https://orcid.org
DATABASE_NAME: iguide_beta
# why are we using iguide_beta for production deployment? Should it be iguide_demo?
DATABASE_NAME: ${{ env.DEPLOY_TO_PRODUCTION == false && 'iguide_dev' || 'iguide_beta' }}
DB_PROTOCOL: mongodb+srv
HYDROSHARE_META_READ_URL: https://www.hydroshare.org/hsapi2/resource/%s/json/
HYDROSHARE_FILE_READ_URL: https://www.hydroshare.org/hsapi/resource/%s/files/
VITE_APP_NAME: I-GUIDE
VITE_APP_URL: https://iguide.cuahsi.io
VITE_APP_API_URL: https://iguide.cuahsi.io/api
VITE_APP_URL: ${{ env.DEPLOY_TO_PRODUCTION == false && 'https://iguide-dev.cuahsi.io' || 'https://iguide.cuahsi.io' }}
VITE_APP_API_URL: ${{ env.DEPLOY_TO_PRODUCTION == false && 'https://iguide-dev.cuahsi.io/api' || 'https://iguide.cuahsi.io/api' }}
VITE_APP_LOGIN_URL: https://orcid.org/oauth/authorize
VITE_APP_GOOGLE_MAPS_API_KEY: ""
VITE_APP_SUPPORT_EMAIL: [email protected]
VITE_APP_CLIENT_ID: APP-4ZA8C8BYAH3QHNE9
SEARCH_RELEVANCE_SCORE_THRESHOLD: 1.4


jobs:
deploy:
runs-on: ubuntu-latest
Expand All @@ -43,9 +45,9 @@ jobs:

- name: Compile the root env file
env:
DB_HOST: ${{ secrets.DB_HOST }}
DB_USERNAME: ${{ secrets.DB_USERNAME }}
DB_PASSWORD: ${{ secrets.DB_PASSWORD }}
DB_HOST: ${{ env.DEPLOY_TO_PRODUCTION == false && secrets.DB_HOST_BETA || secrets.DB_HOST }}
DB_USERNAME: ${{ env.DEPLOY_TO_PRODUCTION == false && secrets.DB_USERNAME_BETA || secrets.DB_USERNAME }}
DB_PASSWORD: ${{ env.DEPLOY_TO_PRODUCTION == false && secrets.DB_PASSWORD_BETA || secrets.DB_PASSWORD }}
run: |
variables=("OIDC_ISSUER" "DB_USERNAME" "DB_PASSWORD" "DB_HOST" "DATABASE_NAME" "DB_PROTOCOL" "TESTING" "VITE_APP_LOGIN_URL" "HYDROSHARE_META_READ_URL" "HYDROSHARE_FILE_READ_URL" "SEARCH_RELEVANCE_SCORE_THRESHOLD")

Expand All @@ -58,6 +60,9 @@ jobs:
done

- name: Compile the frontend env file
env:
VITE_APP_GOOGLE_MAPS_API_KEY: ${{env.DEPLOY_TO_PRODUCTION == false && secrets.VITE_APP_GOOGLE_MAPS_API_KEY || ''}}

run: |
variables=("VITE_APP_NAME" "VITE_APP_API_URL" "VITE_APP_SUPPORT_EMAIL" "VITE_APP_URL" "VITE_APP_LOGIN_URL" "VITE_APP_CLIENT_ID" "VITE_APP_GOOGLE_MAPS_API_KEY")

Expand Down Expand Up @@ -86,11 +91,13 @@ jobs:
USE_GKE_GCLOUD_AUTH_PLUGIN: True
GOOGLE_PROJECT: ${{ secrets.GOOGLE_PROJECT }}
run: |
gcloud container clusters get-credentials iguide --region us-central1
find ./kubernetes -type f | xargs -i sed -i "s/GOOGLE_PROJECT/$GOOGLE_PROJECT/g" {}
gcloud container clusters get-credentials $KUBE_CLUSTER_NAME --region us-central1
find ./kubernetes -type f | xargs -i sed -i "s/GOOGLE_PROJECT/$GOOGLE_PROJECT/g" {}
find ./kubernetes -type f | xargs -i sed -i "s/IGUIDE_TAG/$TAG/g" {}
find ./kubernetes -type f | xargs -i sed -i "s/IGUIDE_DOMAIN/$DOMAIN/g" {}
find ./kubernetes -type f | xargs -i sed -i "s/IGUIDE_IP/$IP/g" {}
if [[ "${{ env.DEPLOY_TO_PRODUCTION }}" == true ]]; then
find ./kubernetes -type f | xargs -i sed -i "s/IGUIDE_IP/$IP/g" {}
fi
kubectl apply -f kubernetes/
# Refresh pods
kubectl delete pods --all
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ test:

.PHONY: pre-post
pre-post:
docker-compose run catalog-trigger python /app/triggers/management/change_streams_pre_and_post.py
docker-compose run catalog-trigger python /app/api/models/management/change_streams_pre_and_post.py
2 changes: 1 addition & 1 deletion api/adapters/hydroshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from api.exceptions import RepositoryException
from api.models import schema
from api.models.catalog import DatasetMetadataDOC
from api.models.user import Submission, SubmissionType
from api.models.user import Submission


class Creator(BaseModel):
Expand Down
31 changes: 24 additions & 7 deletions api/adapters/s3.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import boto3
import json
from botocore.client import Config
from http import HTTPStatus

import boto3
from botocore import UNSIGNED
from botocore.client import Config
from botocore.exceptions import ClientError as S3ClientError

from api.adapters.base import AbstractRepositoryMetadataAdapter, AbstractRepositoryRequestHandler
from api.adapters.utils import RepositoryType, register_adapter
from api.exceptions import RepositoryException
from api.models.catalog import DatasetMetadataDOC
from api.models.user import Submission, SubmissionType
from api.models.user import Submission


class _S3RequestHandler(AbstractRepositoryRequestHandler):
Expand All @@ -16,12 +20,25 @@ def get_metadata(self, record_id: str):
file_key = record_id.split("+")[2]

s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), endpoint_url=endpoint_url)

response = s3.get_object(Bucket=bucket_name, Key=file_key)
json_content = response['Body'].read().decode('utf-8')
try:
response = s3.get_object(Bucket=bucket_name, Key=file_key)
except S3ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
raise RepositoryException(
detail=f"Specified metadata file was not found in S3: {bucket_name}/{file_key}",
status_code=HTTPStatus.NOT_FOUND
)
else:
err_msg = f"Error accessing S3 file({bucket_name}/{file_key}): {str(ex)}"
raise RepositoryException(detail=err_msg, status_code=HTTPStatus.BAD_REQUEST)

json_content = response['Body'].read().decode('utf-8')
# Parse the JSON content
data = json.loads(json_content)
try:
data = json.loads(json_content)
except json.JSONDecodeError as ex:
err_msg = f"Invalid JSON content in S3 file ({file_key}). Error: {str(ex)}"
raise RepositoryException(detail=err_msg, status_code=HTTPStatus.BAD_REQUEST)

return data

Expand Down
5 changes: 5 additions & 0 deletions api/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ def identifier(self):
identifier = f"{endpoint_url}/{self.bucket}/{self.path}"
return identifier

@property
def fetch_identifier(self):
# This is the identifier that is used to fetch the file from S3
return f"{self.endpoint_url}+{self.bucket}+{self.path}"


class Submission(Document):
title: str = None
Expand Down
26 changes: 19 additions & 7 deletions api/routes/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,18 @@ async def refresh_dataset_from_hydroshare(identifier: str, user: Annotated[User,


@router.put("/repository/s3", response_model=DatasetMetadataDOC)
async def register_s3_dataset(request_model: S3Path, user: Annotated[User, Depends(get_current_user)]):
async def register_s3_dataset(s3_path: S3Path, user: Annotated[User, Depends(get_current_user)]):
"""User provides the path to the S3 object. The metadata is fetched from the s3 object and saved to the catalog."""
path = request_model.path
bucket = request_model.bucket
endpoint_url = request_model.endpoint_url
identifier = f"{endpoint_url}+{bucket}+{path}"

identifier = s3_path.identifier
submission: Submission = user.submission_by_repository(repo_type=RepositoryType.S3, identifier=identifier)
dataset = await _save_to_db(repository_type=RepositoryType.S3, identifier=identifier, user=user,
if submission is not None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="This S3 dataset has already been submitted by this user",
)
fetch_identifier = s3_path.fetch_identifier
dataset = await _save_to_db(repository_type=RepositoryType.S3, identifier=fetch_identifier, user=user,
submission=submission)
return dataset

Expand All @@ -171,7 +175,7 @@ async def create_dataset_s3(
if submission is not None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Dataset metadata record was not found",
detail="This S3 dataset has already been submitted by this user",
)
await document.insert()
submission = document.as_submission()
Expand Down Expand Up @@ -221,11 +225,17 @@ async def _save_to_db(repository_type: RepositoryType, identifier: str, user: Us
adapter = get_adapter_by_type(repository_type=repository_type)
# fetch metadata from repository as catalog dataset
repo_dataset: DatasetMetadataDOC = await _get_repo_meta_as_catalog_record(adapter=adapter, identifier=identifier)
s3_path = None
if repository_type == RepositoryType.S3:
s3_endpoint_url, bucket, path = identifier.split("+")
s3_path = S3Path(endpoint_url=s3_endpoint_url, bucket=bucket, path=path)
identifier = s3_path.identifier
if submission is None:
# new registration
await repo_dataset.insert()
submission = repo_dataset.as_submission()
submission = adapter.update_submission(submission=submission, repo_record_id=identifier)
submission.s3_path = s3_path
user.submissions.append(submission)
await user.save(link_rule=WriteRules.WRITE)
dataset = repo_dataset
Expand All @@ -239,12 +249,14 @@ async def _save_to_db(repository_type: RepositoryType, identifier: str, user: Us
updated_submission = adapter.update_submission(submission=updated_submission, repo_record_id=identifier)
updated_submission.id = submission.id
updated_submission.submitted = submission.submitted
updated_submission.s3_path = s3_path
await updated_submission.replace()
dataset = updated_dataset
submission = updated_submission

dataset = inject_repository_identifier(submission, dataset)
dataset = inject_submission_type(submission, dataset)
dataset = inject_submission_s3_path(submission, dataset)
return dataset


Expand Down
25 changes: 25 additions & 0 deletions tests/test_dataset_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,31 @@ async def test_get_datasets_exclude_none(client_test, dataset_data):
assert "measurementTechnique" not in a_property


@pytest.mark.asyncio
async def test_register_minio_s3_dataset(client_test):
"""Testing registering metadata for a generic dataset stored on minIO s3"""

# set the path to the generic metadata file on minIO s3
s3_path = {
"path": "data/.hs/dataset_metadata.json",
"bucket": "catalog-api-test",
"endpoint_url": "https://api.minio.cuahsi.io/",
}

dataset_response = await client_test.put(
"api/catalog/repository/s3", json=s3_path
)
assert dataset_response.status_code == 200
ds_metadata = dataset_response.json()
expected_repository_identifier = f"{s3_path['endpoint_url']}{s3_path['bucket']}/{s3_path['path']}"
assert ds_metadata["repository_identifier"] == expected_repository_identifier

# retrieve the record from the db
record_id = ds_metadata.get('_id')
response = await client_test.get(f"api/catalog/dataset/{record_id}")
assert response.status_code == 200


@pytest.mark.parametrize("multiple", [True, False])
@pytest.mark.asyncio
async def test_get_submissions_1(client_test, dataset_data, multiple):
Expand Down
4 changes: 2 additions & 2 deletions triggers/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ async def do_daily():
else:
# couldn't retrieve matching repository record
await db["discovery"].delete_one({"_id": submission.identifier})
except:
logger.exception(f"Failed to collect submission {submission.url}")
except Exception as exp:
logger.exception(f"Failed to collect submission {submission.url}, Error: {str(exp)}")


def main():
Expand Down
4 changes: 2 additions & 2 deletions triggers/update_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ async def _main():
while True:
try:
await watch_catalog(db)
except:
logger.exception("Submission Watch Task failed, restarting the task")
except Exception as exp:
logger.exception(f"Submission Watch Task failed. Error:{str(exp)}, restarting the task")
finally:
db.close()

Expand Down
4 changes: 2 additions & 2 deletions triggers/update_typeahead.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ async def _main():
while True:
try:
await watch_discovery(db)
except:
logger.exception("Discovery Watch Task failed, restarting the task")
except Exception as exp:
logger.exception(f"Discovery Watch Task failed. Error:{str(exp)}, restarting the task")
finally:
db.close()

Expand Down