8383
8484ASPX_EXTENSION = ".aspx"
8585
86- GRAPH_API_BASE = "https://graph.microsoft.com/v1.0"
86+ DEFAULT_AUTHORITY_HOST = "https://login.microsoftonline.com"
87+ DEFAULT_GRAPH_API_HOST = "https://graph.microsoft.com"
88+ DEFAULT_SHAREPOINT_DOMAIN_SUFFIX = "sharepoint.com"
89+
90+ GRAPH_API_BASE = f"{ DEFAULT_GRAPH_API_HOST } /v1.0"
8791GRAPH_API_MAX_RETRIES = 5
8892GRAPH_API_RETRYABLE_STATUSES = frozenset ({429 , 500 , 502 , 503 , 504 })
8993
@@ -285,10 +289,12 @@ def load_certificate_from_pfx(pfx_data: bytes, password: str) -> CertificateData
285289
286290
287291def acquire_token_for_rest (
288- msal_app : msal .ConfidentialClientApplication , sp_tenant_domain : str
292+ msal_app : msal .ConfidentialClientApplication ,
293+ sp_tenant_domain : str ,
294+ sharepoint_domain_suffix : str ,
289295) -> TokenResponse :
290296 token = msal_app .acquire_token_for_client (
291- scopes = [f"https://{ sp_tenant_domain } .sharepoint.com /.default" ]
297+ scopes = [f"https://{ sp_tenant_domain } .{ sharepoint_domain_suffix } /.default" ]
292298 )
293299 return TokenResponse .from_json (token )
294300
@@ -403,12 +409,13 @@ def _download_via_graph_api(
403409 drive_id : str ,
404410 item_id : str ,
405411 bytes_allowed : int ,
412+ graph_api_base : str ,
406413) -> bytes :
407414 """Download a drive item via the Graph API /content endpoint with a byte cap.
408415
409416 Raises SizeCapExceeded if the cap is exceeded.
410417 """
411- url = f"{ GRAPH_API_BASE } /drives/{ drive_id } /items/{ item_id } /content"
418+ url = f"{ graph_api_base } /drives/{ drive_id } /items/{ item_id } /content"
412419 headers = {"Authorization" : f"Bearer { access_token } " }
413420 with requests .get (
414421 url , headers = headers , stream = True , timeout = REQUEST_TIMEOUT_SECONDS
@@ -429,6 +436,7 @@ def _convert_driveitem_to_document_with_permissions(
429436 drive_name : str ,
430437 ctx : ClientContext | None ,
431438 graph_client : GraphClient ,
439+ graph_api_base : str ,
432440 include_permissions : bool = False ,
433441 parent_hierarchy_raw_node_id : str | None = None ,
434442 access_token : str | None = None ,
@@ -485,6 +493,7 @@ def _convert_driveitem_to_document_with_permissions(
485493 driveitem .drive_id ,
486494 driveitem .id ,
487495 SHAREPOINT_CONNECTOR_SIZE_THRESHOLD ,
496+ graph_api_base = graph_api_base ,
488497 )
489498 except SizeCapExceeded :
490499 logger .warning (
@@ -804,6 +813,9 @@ def __init__(
804813 sites : list [str ] = [],
805814 include_site_pages : bool = True ,
806815 include_site_documents : bool = True ,
816+ authority_host : str = DEFAULT_AUTHORITY_HOST ,
817+ graph_api_host : str = DEFAULT_GRAPH_API_HOST ,
818+ sharepoint_domain_suffix : str = DEFAULT_SHAREPOINT_DOMAIN_SUFFIX ,
807819 ) -> None :
808820 self .batch_size = batch_size
809821 self .sites = list (sites )
@@ -819,6 +831,10 @@ def __init__(
819831 self ._cached_rest_ctx : ClientContext | None = None
820832 self ._cached_rest_ctx_url : str | None = None
821833 self ._cached_rest_ctx_created_at : float = 0.0
834+ self .authority_host = authority_host .rstrip ("/" )
835+ self .graph_api_host = graph_api_host .rstrip ("/" )
836+ self .graph_api_base = f"{ self .graph_api_host } /v1.0"
837+ self .sharepoint_domain_suffix = sharepoint_domain_suffix
822838
823839 def validate_connector_settings (self ) -> None :
824840 # Validate that at least one content type is enabled
@@ -875,8 +891,9 @@ def _create_rest_client_context(self, site_url: str) -> ClientContext:
875891
876892 msal_app = self .msal_app
877893 sp_tenant_domain = self .sp_tenant_domain
894+ sp_domain_suffix = self .sharepoint_domain_suffix
878895 self ._cached_rest_ctx = ClientContext (site_url ).with_access_token (
879- lambda : acquire_token_for_rest (msal_app , sp_tenant_domain )
896+ lambda : acquire_token_for_rest (msal_app , sp_tenant_domain , sp_domain_suffix )
880897 )
881898 self ._cached_rest_ctx_url = site_url
882899 self ._cached_rest_ctx_created_at = time .monotonic ()
@@ -1148,7 +1165,7 @@ def _fetch_site_pages(
11481165 site_id = site .id
11491166
11501167 page_url : str | None = (
1151- f"{ GRAPH_API_BASE } /sites/{ site_id } /pages/microsoft.graph.sitePage"
1168+ f"{ self . graph_api_base } /sites/{ site_id } " f" /pages/microsoft.graph.sitePage"
11521169 )
11531170 params : dict [str , str ] | None = {"$expand" : "canvasLayout" }
11541171 total_yielded = 0
@@ -1175,7 +1192,7 @@ def _acquire_token(self) -> dict[str, Any]:
11751192 raise RuntimeError ("MSAL app is not initialized" )
11761193
11771194 token = self .msal_app .acquire_token_for_client (
1178- scopes = ["https://graph.microsoft.com /.default" ]
1195+ scopes = [f" { self . graph_api_host } /.default" ]
11791196 )
11801197 return token
11811198
@@ -1248,7 +1265,7 @@ def _iter_drive_items_paged(
12481265 Performs BFS folder traversal manually, fetching one page of children
12491266 at a time so that memory usage stays bounded regardless of drive size.
12501267 """
1251- base = f"{ GRAPH_API_BASE } /drives/{ drive_id } "
1268+ base = f"{ self . graph_api_base } /drives/{ drive_id } "
12521269 if folder_path :
12531270 start_url = f"{ base } /root:/{ folder_path } :/children"
12541271 else :
@@ -1308,7 +1325,7 @@ def _iter_drive_items_delta(
13081325 """
13091326 use_timestamp_token = start is not None and start > _EPOCH
13101327
1311- initial_url = f"{ GRAPH_API_BASE } /drives/{ drive_id } /root/delta"
1328+ initial_url = f"{ self . graph_api_base } /drives/{ drive_id } /root/delta"
13121329 if use_timestamp_token :
13131330 assert start is not None # mypy
13141331 token = quote (start .isoformat (timespec = "seconds" ))
@@ -1354,7 +1371,7 @@ def _iter_delta_pages(
13541371 drive_id ,
13551372 )
13561373 yield from self ._iter_delta_pages (
1357- initial_url = f"{ GRAPH_API_BASE } /drives/{ drive_id } /root/delta" ,
1374+ initial_url = f"{ self . graph_api_base } /drives/{ drive_id } /root/delta" ,
13581375 drive_id = drive_id ,
13591376 start = start ,
13601377 end = end ,
@@ -1471,7 +1488,7 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None
14711488 sp_private_key = credentials .get ("sp_private_key" )
14721489 sp_certificate_password = credentials .get ("sp_certificate_password" )
14731490
1474- authority_url = f"https://login.microsoftonline.com /{ sp_directory_id } "
1491+ authority_url = f"{ self . authority_host } /{ sp_directory_id } "
14751492
14761493 if auth_method == SharepointAuthMethod .CERTIFICATE .value :
14771494 logger .info ("Using certificate authentication" )
@@ -1512,7 +1529,7 @@ def _acquire_token_for_graph() -> dict[str, Any]:
15121529 raise ConnectorValidationError ("MSAL app is not initialized" )
15131530
15141531 token = self .msal_app .acquire_token_for_client (
1515- scopes = ["https://graph.microsoft.com /.default" ]
1532+ scopes = [f" { self . graph_api_host } /.default" ]
15161533 )
15171534 if token is None :
15181535 raise ConnectorValidationError ("Failed to acquire token for graph" )
@@ -1941,6 +1958,7 @@ def _load_from_checkpoint(
19411958 self .graph_client ,
19421959 include_permissions = include_permissions ,
19431960 parent_hierarchy_raw_node_id = parent_hierarchy_url ,
1961+ graph_api_base = self .graph_api_base ,
19441962 access_token = access_token ,
19451963 )
19461964
0 commit comments