Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: experimental transfer sharepoint authentication to AzureAD #326

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.3.16-dev1

### Enchancements

* **Sharepoint coonner no longer uses depracted authorisation method. Uses EntraID instead via msal.**

## 0.3.16-dev0

### Fixes
Expand Down
89 changes: 89 additions & 0 deletions test/integration/connectors/test_sharepoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# test_sharepoint_integration.py
import os

import pytest
from pydantic import SecretStr
from test.integration.connectors.utils.validation.source import (
SourceValidationConfigs,
source_connector_validation,
)
from test.integration.utils import requires_env
from unstructured_ingest.v2.processes.connectors.sharepoint import (
CONNECTOR_TYPE,
SharepointAccessConfig,
SharepointConnectionConfig,
SharepointDownloader,
SharepointDownloaderConfig,
SharepointIndexer,
SharepointIndexerConfig,
SharepointPermissionsConfig,
)

SOURCE_TAG = "source"


@pytest.mark.asyncio
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
@requires_env(
"SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "SHAREPOINT_PERMISSIONS_TENANT", "SHAREPOINT_SITE"
)
async def test_sharepoint_source(temp_dir):
"""
Integration test that:
1) Creates a SharepointIndexer to list/enumerate items in a given site
2) Creates a SharepointDownloader to fetch each enumerated item
3) Runs a validation helper to confirm the end-to-end pipeline
"""
client_id = os.getenv("SHAREPOINT_CLIENT_ID")
client_cred = os.getenv("SHAREPOINT_CRED")
tenant = os.getenv("SHAREPOINT_PERMISSIONS_TENANT")
site_url = os.getenv("SHAREPOINT_SITE")

access_config = SharepointAccessConfig(client_cred=client_cred)

permissions_config = SharepointPermissionsConfig(
permissions_application_id=client_id,
permissions_client_cred=SecretStr(client_cred),
permissions_tenant=tenant,
authority_url=SecretStr("https://login.microsoftonline.com/"),
)

connection_config = SharepointConnectionConfig(
client_id=client_id,
site=site_url,
access_config=access_config,
permissions_config=permissions_config,
)

index_config = SharepointIndexerConfig(
path=None,
recursive=True,
omit_files=False,
omit_pages=False,
omit_lists=True,
)

download_config = SharepointDownloaderConfig(
download_dir=temp_dir # Directory where the files get saved
)

indexer = SharepointIndexer(
connection_config=connection_config,
index_config=index_config,
)
downloader = SharepointDownloader(
connection_config=connection_config,
download_config=download_config,
)

expected_files = 7

await source_connector_validation(
indexer=indexer,
downloader=downloader,
configs=SourceValidationConfigs(
test_id="sharepoint",
expected_num_files=expected_files,
validate_downloaded_files=True,
),
)
2 changes: 1 addition & 1 deletion unstructured_ingest/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.16-dev0" # pragma: no cover
__version__ = "0.3.16-dev1" # pragma: no cover
29 changes: 20 additions & 9 deletions unstructured_ingest/v2/processes/connectors/sharepoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,34 +91,45 @@ class SharepointConnectionConfig(ConnectionConfig):
access_config: Secret[SharepointAccessConfig]
permissions_config: Optional[SharepointPermissionsConfig] = None

@requires_dependencies(["office365"], extras="sharepoint")
@requires_dependencies(["msal", "office365"], extras="sharepoint")
def get_client(self) -> "ClientContext":
from office365.runtime.auth.client_credential import ClientCredential
from msal import ConfidentialClientApplication
from office365.sharepoint.client_context import ClientContext
from types import SimpleNamespace

try:
credentials = ClientCredential(
self.client_id, self.access_config.get_secret_value().client_cred
)
site_client = ClientContext(self.site).with_credentials(credentials)
token_scope = f"{self.site}/.default"
token_result = self.get_permissions_token(token_scope=token_scope)
if "access_token" not in token_result:
raise SourceConnectionNetworkError(
f"Failed to obtain token for SharePoint: \
{token_result.get('error_description', '')}"
)
access_token = token_result["access_token"]

def token_callback():
return SimpleNamespace(tokenType="Bearer", accessToken=access_token)
# Then set up the SharePoint client context with that token
site_client = ClientContext(self.site).with_access_token(token_callback)

except Exception as e:
logger.error(f"Couldn't set Sharepoint client: {e}")
raise e
return site_client

@requires_dependencies(["msal"], extras="sharepoint")
def get_permissions_token(self):
def get_permissions_token(self, token_scope: Optional[str] = "https://graph.microsoft.com/.default"):
from msal import ConfidentialClientApplication

try:
client_credential = self.permissions_config.permissions_client_cred.get_secret_value()
app = ConfidentialClientApplication(
authority=f"{self.permissions_config.authority_url.get_secret_value()}/"
authority=f"{self.permissions_config.authority_url.get_secret_value()}"
f"{self.permissions_config.permissions_tenant}",
client_id=self.permissions_config.permissions_application_id,
client_credential=client_credential,
)
token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
token = app.acquire_token_for_client(scopes=[token_scope])
except ValueError as exc:
logger.error("Couldn't set up credentials for Sharepoint")
raise exc
Expand Down