From 8ad9505be5d467fafde4b0913acbfe07d5824f4f Mon Sep 17 00:00:00 2001 From: Goga Koreli Date: Tue, 18 Feb 2025 15:59:31 -0800 Subject: [PATCH 1/4] refresh drives config credentials using a timer --- jupyter_drives/base.py | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/jupyter_drives/base.py b/jupyter_drives/base.py index fc81e4d..9db5fee 100644 --- a/jupyter_drives/base.py +++ b/jupyter_drives/base.py @@ -4,10 +4,14 @@ from traitlets import Enum, Unicode, default from traitlets.config import Configurable import boto3 +from tornado.ioloop import PeriodicCallback # Supported third-party services MANAGERS = {} +# 15 minutes +CREDENTIALS_REFRESH = 15 * 60 * 1000 + # Moved to the architecture of having one provider independent manager. # Keeping the loop in case of future developments that need this feature. for entry in entrypoints.get_group_all("jupyter_drives.manager_v1"): @@ -19,6 +23,7 @@ class DrivesConfig(Configurable): """ Allows configuration of supported drives via jupyter_notebook_config.py + Implements singleton pattern """ session_token = Unicode( @@ -48,7 +53,7 @@ class DrivesConfig(Configurable): allow_none=True, help = "Region name.", ) - + api_base_url = Unicode( config=True, help="Base URL of the provider service REST API.", @@ -59,7 +64,7 @@ def set_default_api_base_url(self): # for AWS S3 drives if self.provider == "s3": return "https://s3.amazonaws.com/" # region? https://s3..amazonaws.com/ - + # for Google Cloud Storage drives elif self.provider == "gcs": return "https://www.googleapis.com/" @@ -71,15 +76,34 @@ def set_default_api_base_url(self): help="The source control provider.", ) + _instance = None + + def __new__(cls, **kwargs): + if cls._instance is None: + cls._instance = super(DrivesConfig, cls).__new__(cls) + cls._instance._initialized = False + return cls._instance + def __init__(self, **kwargs): + if self._initialized: + return + super().__init__(**kwargs) - self._load_credentials() - - def _load_credentials(self): + self._initialize_credentials_refresh() + self._initialized = True + + def _initialize_credentials_refresh(self): # check if credentials were already set in jupyter_notebook_config.py if self.access_key_id is not None and self.secret_access_key is not None: return - + + self._load_credentials() + self._credential_refresh = PeriodicCallback( + self._load_credentials, CREDENTIALS_REFRESH + ) + self._credential_refresh.start() + + def _load_credentials(self): # automatically extract credentials for S3 drives try: s = boto3.Session() @@ -110,4 +134,4 @@ def _load_credentials(self): self.access_key_id = c.access_key self.secret_access_key = c.secret_key self.region_name = s.region_name - self.session_token = c.token \ No newline at end of file + self.session_token = c.token From 51007843e4ac6ef4bd89882d5ce01db49d9263de Mon Sep 17 00:00:00 2001 From: Goga Koreli Date: Tue, 18 Feb 2025 22:30:20 -0800 Subject: [PATCH 2/4] consolidate drives refresh logic into drives manager --- jupyter_drives/base.py | 41 +++++---------------- jupyter_drives/manager.py | 77 ++++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 54 deletions(-) diff --git a/jupyter_drives/base.py b/jupyter_drives/base.py index 9db5fee..32f536b 100644 --- a/jupyter_drives/base.py +++ b/jupyter_drives/base.py @@ -4,14 +4,10 @@ from traitlets import Enum, Unicode, default from traitlets.config import Configurable import boto3 -from tornado.ioloop import PeriodicCallback # Supported third-party services MANAGERS = {} -# 15 minutes -CREDENTIALS_REFRESH = 15 * 60 * 1000 - # Moved to the architecture of having one provider independent manager. # Keeping the loop in case of future developments that need this feature. for entry in entrypoints.get_group_all("jupyter_drives.manager_v1"): @@ -23,7 +19,6 @@ class DrivesConfig(Configurable): """ Allows configuration of supported drives via jupyter_notebook_config.py - Implements singleton pattern """ session_token = Unicode( @@ -53,7 +48,7 @@ class DrivesConfig(Configurable): allow_none=True, help = "Region name.", ) - + api_base_url = Unicode( config=True, help="Base URL of the provider service REST API.", @@ -64,7 +59,7 @@ def set_default_api_base_url(self): # for AWS S3 drives if self.provider == "s3": return "https://s3.amazonaws.com/" # region? https://s3..amazonaws.com/ - + # for Google Cloud Storage drives elif self.provider == "gcs": return "https://www.googleapis.com/" @@ -76,34 +71,16 @@ def set_default_api_base_url(self): help="The source control provider.", ) - _instance = None - - def __new__(cls, **kwargs): - if cls._instance is None: - cls._instance = super(DrivesConfig, cls).__new__(cls) - cls._instance._initialized = False - return cls._instance - def __init__(self, **kwargs): - if self._initialized: - return - super().__init__(**kwargs) - self._initialize_credentials_refresh() - self._initialized = True - - def _initialize_credentials_refresh(self): # check if credentials were already set in jupyter_notebook_config.py - if self.access_key_id is not None and self.secret_access_key is not None: + self.credentials_already_set = self.access_key_id is not None and self.secret_access_key is not None + self.load_credentials() + + def load_credentials(self): + if self.credentials_already_set: return - - self._load_credentials() - self._credential_refresh = PeriodicCallback( - self._load_credentials, CREDENTIALS_REFRESH - ) - self._credential_refresh.start() - - def _load_credentials(self): + # automatically extract credentials for S3 drives try: s = boto3.Session() @@ -134,4 +111,4 @@ def _load_credentials(self): self.access_key_id = c.access_key self.secret_access_key = c.secret_key self.region_name = s.region_name - self.session_token = c.token + self.session_token = c.token \ No newline at end of file diff --git a/jupyter_drives/manager.py b/jupyter_drives/manager.py index 10c105e..e7295d0 100644 --- a/jupyter_drives/manager.py +++ b/jupyter_drives/manager.py @@ -25,9 +25,14 @@ import re +from tornado.ioloop import PeriodicCallback + # constant used as suffix to deal with directory objects EMPTY_DIR_SUFFIX = '/.jupyter_drives_fix_dir' +# 15 minutes +CREDENTIALS_REFRESH = 15 * 60 * 1000 + class JupyterDrivesManager(): """ Jupyter-drives manager class. @@ -46,21 +51,12 @@ def __init__(self, config: traitlets.config.Config) -> None: self._client = httpx.AsyncClient() self._content_managers = {} self._max_files_listed = 1025 + self._drives = None # instate fsspec file system self._file_system = fsspec.filesystem(self._config.provider, asynchronous=True) - # initiate aiobotocore session if we are dealing with S3 drives - if self._config.provider == 's3': - if self._config.access_key_id and self._config.secret_access_key: - self._s3_clients = {} - self._s3_session = get_session() - self._file_system = s3fs.S3FileSystem(anon=False, asynchronous=True, key=self._config.access_key_id, secret=self._config.secret_access_key, token=self._config.session_token) - else: - raise tornado.web.HTTPError( - status_code= httpx.codes.BAD_REQUEST, - reason="No credentials specified. Please set them in your user jupyter_server_config file.", - ) + self._initialize_credentials_refresh() @property def base_api_url(self) -> str: @@ -81,6 +77,45 @@ def per_page_argument(self) -> Optional[Tuple[str, int]]: """ return ("per_page", 100) + def _initialize_credentials_refresh(self): + self._drives_refresh_callback() + if not self._config.credentials_already_set: + self._drives_refresh_timer = PeriodicCallback( + self._drives_refresh_callback, CREDENTIALS_REFRESH + ) + self._drives_refresh_timer.start() + + def _drives_refresh_callback(self): + self._config.load_credentials() + self._initialize_s3_file_system() + self._initialize_drives() + + def _initialize_s3_file_system(self): + # initiate aiobotocore session if we are dealing with S3 drives + if self._config.provider == 's3': + if self._config.access_key_id and self._config.secret_access_key: + self._s3_session = get_session() + self._file_system = s3fs.S3FileSystem( + anon=False, + asynchronous=True, + key=self._config.access_key_id, + secret=self._config.secret_access_key, + token=self._config.session_token, + ) + else: + raise tornado.web.HTTPError( + status_code=httpx.codes.BAD_REQUEST, + reason="No credentials specified. Please set them in your user jupyter_server_config file.", + ) + + def _initialize_drives(self): + if self._config.provider == "s3": + S3Drive = get_driver(Provider.S3) + self._drives = [S3Drive(self._config.access_key_id, self._config.secret_access_key, True, None, None, None, self._config.session_token)] + elif self._config.provider == 'gcs': + GCSDrive = get_driver(Provider.GOOGLE_STORAGE) + self._drives = [GCSDrive(self._config.access_key_id, self._config.secret_access_key)] # verfiy credentials needed + def set_listing_limit(self, new_limit): """Set new limit for listing. @@ -105,23 +140,21 @@ async def list_drives(self): """ data = [] if self._config.access_key_id and self._config.secret_access_key: - if self._config.provider == "s3": - S3Drive = get_driver(Provider.S3) - drives = [S3Drive(self._config.access_key_id, self._config.secret_access_key, True, None, None, None, self._config.session_token)] - - elif self._config.provider == 'gcs': - GCSDrive = get_driver(Provider.GOOGLE_STORAGE) - drives = [GCSDrive(self._config.access_key_id, self._config.secret_access_key)] # verfiy credentials needed - - else: + if self._drives is None: raise tornado.web.HTTPError( status_code= httpx.codes.NOT_IMPLEMENTED, reason="Listing drives not supported for given provider.", ) results = [] - for drive in drives: - results += drive.list_containers() + for drive in self._drives: + try: + results += drive.list_containers() + except Exception as e: + raise tornado.web.HTTPError( + status_code=httpx.codes.BAD_REQUEST, + reason=f"The following error occured when listing drives: {e}", + ) for result in results: data.append( From 5165ae0c5164f0a2d90085014df244287425825f Mon Sep 17 00:00:00 2001 From: Goga Koreli Date: Wed, 19 Feb 2025 14:02:44 -0800 Subject: [PATCH 3/4] fix mount drives and refresh drive content managers --- jupyter_drives/manager.py | 78 +++++++++++++++++++++------------------ src/contents.ts | 5 ++- 2 files changed, 45 insertions(+), 38 deletions(-) diff --git a/jupyter_drives/manager.py b/jupyter_drives/manager.py index e7295d0..47028e2 100644 --- a/jupyter_drives/manager.py +++ b/jupyter_drives/manager.py @@ -89,6 +89,7 @@ def _drives_refresh_callback(self): self._config.load_credentials() self._initialize_s3_file_system() self._initialize_drives() + self._initialize_content_managers() def _initialize_s3_file_system(self): # initiate aiobotocore session if we are dealing with S3 drives @@ -116,6 +117,43 @@ def _initialize_drives(self): GCSDrive = get_driver(Provider.GOOGLE_STORAGE) self._drives = [GCSDrive(self._config.access_key_id, self._config.secret_access_key)] # verfiy credentials needed + def _initialize_content_managers(self): + for drive_name, content_manager in self._content_managers.items(): + self._initialize_content_manager(drive_name, content_manager["provider"], content_manager["location"]) + + def _initialize_content_manager(self, drive_name, provider, region=None): + try: + if provider == 's3': + if self._config.session_token is None: + configuration = { + "aws_access_key_id": self._config.access_key_id, + "aws_secret_access_key": self._config.secret_access_key, + "aws_region": region, + } + else: + configuration = { + "aws_access_key_id": self._config.access_key_id, + "aws_secret_access_key": self._config.secret_access_key, + "aws_session_token": self._config.session_token, + "aws_region": region, + } + store = obs.store.S3Store.from_url("s3://" + drive_name + "/", config = configuration) + elif provider == 'gcs': + store = obs.store.GCSStore.from_url("gs://" + drive_name + "/", config = {}) # add gcs config + elif provider == 'http': + store = obs.store.HTTPStore.from_url(drive_name, client_options = {}) # add http client config + + self._content_managers[drive_name] = { + "store": store, + "location": region, + "provider": provider, + } + except Exception as e: + raise tornado.web.HTTPError( + status_code=httpx.codes.BAD_REQUEST, + reason=f"The following error occured when initializing the content manager: {e}", + ) + def set_listing_limit(self, new_limit): """Set new limit for listing. @@ -183,42 +221,10 @@ async def mount_drive(self, drive_name, provider): Args: drive_name: name of drive to mount """ - try: - # check if content manager doesn't already exist - if drive_name not in self._content_managers or self._content_managers[drive_name] is None: - if provider == 's3': - # get region of drive - region = await self._get_drive_location(drive_name) - if self._config.session_token is None: - configuration = { - "aws_access_key_id": self._config.access_key_id, - "aws_secret_access_key": self._config.secret_access_key, - "aws_region": region - } - else: - configuration = { - "aws_access_key_id": self._config.access_key_id, - "aws_secret_access_key": self._config.secret_access_key, - "aws_session_token": self._config.session_token, - "aws_region": region - } - store = obs.store.S3Store.from_url("s3://" + drive_name + "/", config = configuration) - elif provider == 'gcs': - store = obs.store.GCSStore.from_url("gs://" + drive_name + "/", config = {}) # add gcs config - elif provider == 'http': - store = obs.store.HTTPStore.from_url(drive_name, client_options = {}) # add http client config - - self._content_managers[drive_name] = { - "store": store, - "location": region - } - - else: - raise tornado.web.HTTPError( - status_code= httpx.codes.CONFLICT, - reason= "Drive already mounted." - ) - + try: + if provider == 's3': + region = await self._get_drive_location(drive_name) + self._initialize_content_manager(drive_name, provider, region) except Exception as e: raise tornado.web.HTTPError( status_code= httpx.codes.BAD_REQUEST, diff --git a/src/contents.ts b/src/contents.ts index 8a81f2a..52dc47b 100644 --- a/src/contents.ts +++ b/src/contents.ts @@ -230,10 +230,11 @@ export class Drive implements Contents.IDrive { // when accessed the first time, mount drive if (currentDrive.mounted === false) { try { - await mountDrive(localPath, { + const driveName = currentDrive.name; + await mountDrive(driveName, { provider: currentDrive.provider }); - this._drivesList.filter(x => x.name === localPath)[0].mounted = true; + this._drivesList.filter(x => x.name === driveName)[0].mounted = true; } catch (e) { // it will give an error if drive is already mounted } From b6ba60ca9f007bf5694df1099b39f90d93f3917b Mon Sep 17 00:00:00 2001 From: Goga Koreli Date: Thu, 20 Feb 2025 11:12:34 -0800 Subject: [PATCH 4/4] fix github actions v3 deprecation --- .github/workflows/build.yml | 8 ++++---- .github/workflows/check-release.yml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b87aab3..501735e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,7 +53,7 @@ jobs: pip uninstall -y "jupyter_drives" jupyterlab - name: Upload extension packages - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: extension-artifacts path: dist/jupyter_drives* @@ -69,7 +69,7 @@ jobs: with: python-version: '3.9' architecture: 'x64' - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: extension-artifacts - name: Install and Test @@ -105,7 +105,7 @@ jobs: uses: jupyterlab/maintainer-tools/.github/actions/base-setup@v1 - name: Download extension package - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: extension-artifacts @@ -139,7 +139,7 @@ jobs: - name: Upload Playwright Test report if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: jupyter_drives-playwright-tests path: | diff --git a/.github/workflows/check-release.yml b/.github/workflows/check-release.yml index 4c83b85..2ec8049 100644 --- a/.github/workflows/check-release.yml +++ b/.github/workflows/check-release.yml @@ -20,7 +20,7 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} - name: Upload Distributions - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: jupyter_drives-releaser-dist-${{ github.run_number }} path: .jupyter_releaser_checkout/dist