-
Notifications
You must be signed in to change notification settings - Fork 39
feat: experimental transfer sharepoint authentication to AzureAD #326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 9 commits
acc293f
252ccc0
06e3409
9a3ab36
1ce2811
b1a5aa0
ea937fc
9e39761
cf9211c
c84d20c
d4c8a6b
a3b6882
f5cd905
eab8adf
986d203
644ca1f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# test_sharepoint_integration.py | ||
import os | ||
|
||
import pytest | ||
|
||
from test.integration.connectors.utils.validation.source import ( | ||
SourceValidationConfigs, | ||
source_connector_validation, | ||
) | ||
from test.integration.utils import requires_env | ||
from unstructured_ingest.v2.processes.connectors.sharepoint import ( | ||
CONNECTOR_TYPE, | ||
SharepointAccessConfig, | ||
SharepointConnectionConfig, | ||
SharepointDownloader, | ||
SharepointDownloaderConfig, | ||
SharepointIndexer, | ||
SharepointIndexerConfig, | ||
SharepointPermissionsConfig, | ||
) | ||
|
||
SOURCE_TAG = "source" | ||
|
||
|
||
@pytest.mark.asyncio | ||
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG) | ||
@requires_env( | ||
"SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "SHAREPOINT_PERMISSIONS_TENANT", "SHAREPOINT_SITE" | ||
) | ||
async def test_sharepoint_source(temp_dir): | ||
""" | ||
Integration test that: | ||
1) Creates a SharepointIndexer to list/enumerate items in a given site | ||
2) Creates a SharepointDownloader to fetch each enumerated item | ||
3) Runs a validation helper to confirm the end-to-end pipeline | ||
""" | ||
client_id = os.getenv("SHAREPOINT_CLIENT_ID") | ||
client_cred = os.getenv("SHAREPOINT_CRED") | ||
tenant = os.getenv("SHAREPOINT_PERMISSIONS_TENANT") | ||
site_url = os.getenv("SHAREPOINT_SITE") | ||
|
||
access_config = SharepointAccessConfig(client_cred=client_cred) | ||
|
||
permissions_config = SharepointPermissionsConfig( | ||
permissions_application_id=None, | ||
permissions_tenant=tenant, | ||
permissions_client_cred=None, # or SecretStr(...) | ||
) | ||
|
||
connection_config = SharepointConnectionConfig( | ||
client_id=client_id, | ||
site=site_url, | ||
access_config=access_config, | ||
permissions_config=permissions_config, | ||
) | ||
|
||
index_config = SharepointIndexerConfig( | ||
path=None, | ||
recursive=True, | ||
omit_files=False, | ||
omit_pages=False, | ||
omit_lists=True, | ||
) | ||
|
||
download_config = SharepointDownloaderConfig( | ||
download_dir=temp_dir # Directory where the files get saved | ||
) | ||
|
||
indexer = SharepointIndexer( | ||
connection_config=connection_config, | ||
index_config=index_config, | ||
) | ||
downloader = SharepointDownloader( | ||
connection_config=connection_config, | ||
download_config=download_config, | ||
) | ||
|
||
expected_files = 7 | ||
|
||
await source_connector_validation( | ||
indexer=indexer, | ||
downloader=downloader, | ||
configs=SourceValidationConfigs( | ||
test_id="sharepoint", | ||
expected_num_files=expected_files, | ||
validate_downloaded_files=True, | ||
), | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = "0.3.13-dev1" # pragma: no cover | ||
__version__ = "0.3.13-dev2" # pragma: no cover |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,16 +91,33 @@ class SharepointConnectionConfig(ConnectionConfig): | |
access_config: Secret[SharepointAccessConfig] | ||
permissions_config: Optional[SharepointPermissionsConfig] = None | ||
|
||
@requires_dependencies(["office365"], extras="sharepoint") | ||
@requires_dependencies(["msal"], extras="sharepoint") | ||
def get_client(self) -> "ClientContext": | ||
from office365.runtime.auth.client_credential import ClientCredential | ||
from msal import ConfidentialClientApplication | ||
from office365.sharepoint.client_context import ClientContext | ||
|
||
try: | ||
credentials = ClientCredential( | ||
self.client_id, self.access_config.get_secret_value().client_cred | ||
# Acquire the token using MSAL, similar to get_permissions_token | ||
app = ConfidentialClientApplication( | ||
authority=f"{self.permissions_config.authority_url.get_secret_value()}/" | ||
f"{self.permissions_config.permissions_tenant}", | ||
client_id=self.permissions_config.permissions_application_id, | ||
mateuszkuprowski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
client_credential=self.permissions_config.permissions_client_cred.get_secret_value(), # noqa: E501 | ||
) | ||
token_result = app.acquire_token_for_client( | ||
scopes=[ | ||
f"https://{self.permissions_config.permissions_tenant}.sharepoint.com/.default" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when I use SHAREPOINT_PERMISSIONS_TENANT value in Keeper, this line reports an error. After I remove the second part of the string and only keeps the first part, it works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might need a little more explanation on this not sure which part do you mean? Like it works without the /.default/ but it doesn't with it? |
||
] | ||
) | ||
site_client = ClientContext(self.site).with_credentials(credentials) | ||
if "access_token" not in token_result: | ||
raise SourceConnectionNetworkError( | ||
f"Failed to obtain token for SharePoint: \ | ||
{token_result.get('error_description', '')}" | ||
) | ||
access_token = token_result["access_token"] | ||
|
||
# Then set up the SharePoint client context with that token | ||
site_client = ClientContext(self.site).with_access_token(access_token) | ||
except Exception as e: | ||
logger.error(f"Couldn't set Sharepoint client: {e}") | ||
raise e | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we provide with a value for permissions_client_cred and permissions_application_id here? otherwise this integration test would fail in CI pipeline
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It won't actually fail (and it doesn't) as this connector can work in 2 modes, either with or without permissions. There's a separate e2e test for that in the test_e2e/dest/sharepoint.sh and sharepoint_with_permissions.sh