Skip to content

Commit 9ed9b69

Browse files
committed
feat: added ability to create indexes through the sdk
# Conflicts: # pyproject.toml
1 parent 1abc6a1 commit 9ed9b69

3 files changed

Lines changed: 857 additions & 22 deletions

File tree

src/uipath/_services/context_grounding_service.py

Lines changed: 304 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from typing import Any, List, Optional, Tuple, Union
1+
import json
2+
from typing import Any, Dict, List, Optional, Tuple, Union
23

34
import httpx
45
from pydantic import TypeAdapter
@@ -9,6 +10,11 @@
910
from .._folder_context import FolderContext
1011
from .._utils import Endpoint, RequestSpec, header_folder, infer_bindings
1112
from .._utils.constants import (
13+
CONFLUENCE_DATA_SOURCE,
14+
DROPBOX_DATA_SOURCE,
15+
GOOGLE_DRIVE_DATA_SOURCE,
16+
LLMV4,
17+
ONEDRIVE_DATA_SOURCE,
1218
ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE,
1319
)
1420
from ..models import IngestionInProgressException
@@ -312,6 +318,122 @@ async def retrieve_by_id_async(
312318

313319
return response.json()
314320

321+
@traced(name="contextgrounding_create_index", run_type="uipath")
322+
@infer_bindings(resource_type="index")
323+
def create_index(
324+
self,
325+
name: str,
326+
source: Dict[str, Any],
327+
description: Optional[str] = None,
328+
cron_expression: Optional[str] = None,
329+
time_zone_id: Optional[str] = None,
330+
advanced_ingestion: Optional[bool] = True,
331+
preprocessing_request: Optional[str] = LLMV4,
332+
folder_key: Optional[str] = None,
333+
folder_path: Optional[str] = None,
334+
) -> ContextGroundingIndex:
335+
"""Create a new context grounding index.
336+
337+
Args:
338+
name (str): The name of the index to create.
339+
source (dict): Source configuration dictionary:
340+
- For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional)
341+
- For Google Drive: type="google", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
342+
- For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional)
343+
- For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
344+
- For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional)
345+
description (Optional[str]): Description of the index.
346+
cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM).
347+
time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time").
348+
advanced_ingestion (Optional[bool]): Enable advanced ingestion with preprocessing. Defaults to True.
349+
preprocessing_request (Optional[str]): The OData type for preprocessing request. Defaults to LLMV4.
350+
folder_key (Optional[str]): The key of the folder where the index will be created.
351+
folder_path (Optional[str]): The path of the folder where the index will be created.
352+
353+
Returns:
354+
ContextGroundingIndex: The created index information.
355+
"""
356+
spec = self._create_spec(
357+
name=name,
358+
description=description,
359+
source=source,
360+
cron_expression=cron_expression,
361+
time_zone_id=time_zone_id,
362+
advanced_ingestion=advanced_ingestion
363+
if advanced_ingestion is not None
364+
else True,
365+
preprocessing_request=preprocessing_request or LLMV4,
366+
folder_path=folder_path,
367+
folder_key=folder_key,
368+
)
369+
370+
response = self.request(
371+
spec.method,
372+
spec.endpoint,
373+
content=spec.content,
374+
headers=spec.headers,
375+
)
376+
377+
return ContextGroundingIndex.model_validate(response.json())
378+
379+
@traced(name="contextgrounding_create_index", run_type="uipath")
380+
@infer_bindings(resource_type="index")
381+
async def create_index_async(
382+
self,
383+
name: str,
384+
source: Dict[str, Any],
385+
description: Optional[str] = None,
386+
cron_expression: Optional[str] = None,
387+
time_zone_id: Optional[str] = None,
388+
advanced_ingestion: Optional[bool] = True,
389+
preprocessing_request: Optional[str] = LLMV4,
390+
folder_key: Optional[str] = None,
391+
folder_path: Optional[str] = None,
392+
) -> ContextGroundingIndex:
393+
"""Create a new context grounding index.
394+
395+
Args:
396+
name (str): The name of the index to create.
397+
source (dict): Source configuration dictionary:
398+
- For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional)
399+
- For Google Drive: type="google_drive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
400+
- For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional)
401+
- For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
402+
- For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional)
403+
description (Optional[str]): Description of the index.
404+
cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM).
405+
time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time").
406+
advanced_ingestion (Optional[bool]): Enable advanced ingestion with preprocessing. Defaults to True.
407+
preprocessing_request (Optional[str]): The OData type for preprocessing request. Defaults to LLMV4.
408+
folder_key (Optional[str]): The key of the folder where the index will be created.
409+
folder_path (Optional[str]): The path of the folder where the index will be created.
410+
411+
Returns:
412+
ContextGroundingIndex: The created index information.
413+
"""
414+
spec = self._create_spec(
415+
name=name,
416+
description=description,
417+
source=source,
418+
cron_expression=cron_expression,
419+
time_zone_id=time_zone_id,
420+
advanced_ingestion=advanced_ingestion
421+
if advanced_ingestion is not None
422+
else True,
423+
preprocessing_request=preprocessing_request or LLMV4,
424+
folder_path=folder_path,
425+
folder_key=folder_key,
426+
)
427+
428+
response = await self.request_async(
429+
spec.method,
430+
spec.endpoint,
431+
content=spec.content,
432+
headers=spec.headers,
433+
)
434+
435+
return ContextGroundingIndex.model_validate(response.json())
436+
315437
@traced(name="contextgrounding_search", run_type="uipath")
316438
def search(
317439
self,
@@ -575,40 +697,200 @@ def _create_spec(
575697
self,
576698
name: str,
577699
description: Optional[str],
578-
storage_bucket_name: Optional[str],
579-
file_name_glob: Optional[str],
580-
storage_bucket_folder_path: Optional[str],
700+
source: Dict[str, Any],
701+
advanced_ingestion: bool,
702+
preprocessing_request: str,
703+
cron_expression: Optional[str] = None,
704+
time_zone_id: Optional[str] = None,
581705
folder_key: Optional[str] = None,
582706
folder_path: Optional[str] = None,
583707
) -> RequestSpec:
708+
"""Create request spec for index creation.
709+
710+
Args:
711+
name: Index name
712+
description: Index description
713+
source: Source configuration dictionary
714+
cron_expression: Optional cron expression for scheduled indexing
715+
time_zone_id: Optional timezone for cron expression
716+
advanced_ingestion: Whether to enable advanced ingestion with preprocessing
717+
preprocessing_request: OData type for preprocessing request
718+
folder_key: Optional folder key
719+
folder_path: Optional folder path
720+
721+
Returns:
722+
RequestSpec for the create index request
723+
"""
724+
source_type = source.get("type", "").lower()
725+
584726
folder_key = self._resolve_folder_key(folder_key, folder_path)
727+
file_type = source.get("file_type")
728+
file_name_glob = f"**/*.{file_type}" if file_type else "**/*"
729+
730+
data_source = self._build_data_source(source_type, source, file_name_glob)
731+
732+
if cron_expression:
733+
data_source["indexer"] = {
734+
"cronExpression": cron_expression,
735+
"timeZoneId": time_zone_id or "UTC",
736+
}
737+
738+
payload = {
739+
"name": name,
740+
"description": description or "",
741+
"dataSource": data_source,
742+
}
743+
744+
if advanced_ingestion and preprocessing_request:
745+
payload["preProcessing"] = {
746+
"@odata.type": preprocessing_request,
747+
}
585748

586-
storage_bucket_folder_path = (
587-
storage_bucket_folder_path
588-
if storage_bucket_folder_path
589-
else self._folder_path
590-
)
591749
return RequestSpec(
592750
method="POST",
593751
endpoint=Endpoint("/ecs_/v2/indexes/create"),
594-
json={
595-
"name": name,
596-
"description": description,
597-
"dataSource": {
598-
"@odata.type": ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE,
599-
"folder": storage_bucket_folder_path,
600-
"bucketName": storage_bucket_name,
601-
"fileNameGlob": file_name_glob
602-
if file_name_glob is not None
603-
else "*",
604-
"directoryPath": "/",
605-
},
606-
},
752+
content=json.dumps(payload),
607753
headers={
608754
**header_folder(folder_key, None),
755+
"Content-Type": "application/json",
609756
},
610757
)
611758

759+
def _build_data_source(
760+
self, source_type: str, source: Dict[str, Any], file_name_glob: str
761+
) -> Dict[str, Any]:
762+
"""Build data source configuration based on type."""
763+
if source_type == "bucket":
764+
return self._build_bucket_data_source(source, file_name_glob)
765+
elif source_type in ["google_drive"]:
766+
return self._build_google_drive_data_source(source, file_name_glob)
767+
elif source_type == "dropbox":
768+
return self._build_dropbox_data_source(source, file_name_glob)
769+
elif source_type == "onedrive":
770+
return self._build_onedrive_data_source(source, file_name_glob)
771+
elif source_type == "confluence":
772+
return self._build_confluence_data_source(source, file_name_glob)
773+
else:
774+
raise ValueError(
775+
f"Unsupported data source type: {source_type}. "
776+
f"Supported types: bucket, google_drive, dropbox, onedrive, confluence"
777+
)
778+
779+
def _build_bucket_data_source(
780+
self, source: Dict[str, Any], file_name_glob: str
781+
) -> Dict[str, Any]:
782+
"""Build data source configuration for storage bucket."""
783+
required_fields = ["bucket_name", "folder_path"]
784+
for field in required_fields:
785+
if not source.get(field):
786+
raise ValueError(f"{field} is required for bucket data source")
787+
788+
return {
789+
"@odata.type": ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE,
790+
"folder": source["folder_path"],
791+
"bucketName": source["bucket_name"],
792+
"fileNameGlob": file_name_glob,
793+
"directoryPath": source.get("directory_path", "/"),
794+
}
795+
796+
def _build_google_drive_data_source(
797+
self, source: Dict[str, Any], file_name_glob: str
798+
) -> Dict[str, Any]:
799+
"""Build data source configuration for Google Drive."""
800+
required_fields = [
801+
"connection_id",
802+
"connection_name",
803+
"leaf_folder_id",
804+
"directory_path",
805+
"folder_path",
806+
]
807+
for field in required_fields:
808+
if not source.get(field):
809+
raise ValueError(f"{field} is required for Google Drive data source")
810+
811+
return {
812+
"@odata.type": GOOGLE_DRIVE_DATA_SOURCE,
813+
"folder": source["folder_path"],
814+
"connectionId": source["connection_id"],
815+
"connectionName": source["connection_name"],
816+
"leafFolderId": source["leaf_folder_id"],
817+
"directoryPath": source["directory_path"],
818+
"fileNameGlob": file_name_glob,
819+
}
820+
821+
def _build_dropbox_data_source(
822+
self, source: Dict[str, Any], file_name_glob: str
823+
) -> Dict[str, Any]:
824+
"""Build data source configuration for Dropbox."""
825+
required_fields = [
826+
"connection_id",
827+
"connection_name",
828+
"directory_path",
829+
"folder_path",
830+
]
831+
for field in required_fields:
832+
if not source.get(field):
833+
raise ValueError(f"{field} is required for Dropbox data source")
834+
835+
return {
836+
"@odata.type": DROPBOX_DATA_SOURCE,
837+
"folder": source["folder_path"],
838+
"connectionId": source["connection_id"],
839+
"connectionName": source["connection_name"],
840+
"directoryPath": source["directory_path"],
841+
"fileNameGlob": file_name_glob,
842+
}
843+
844+
def _build_onedrive_data_source(
845+
self, source: Dict[str, Any], file_name_glob: str
846+
) -> Dict[str, Any]:
847+
"""Build data source configuration for OneDrive."""
848+
required_fields = [
849+
"connection_id",
850+
"connection_name",
851+
"leaf_folder_id",
852+
"directory_path",
853+
"folder_path",
854+
]
855+
for field in required_fields:
856+
if not source.get(field):
857+
raise ValueError(f"{field} is required for OneDrive data source")
858+
859+
return {
860+
"@odata.type": ONEDRIVE_DATA_SOURCE,
861+
"folder": source["folder_path"],
862+
"connectionId": source["connection_id"],
863+
"connectionName": source["connection_name"],
864+
"leafFolderId": source["leaf_folder_id"],
865+
"directoryPath": source["directory_path"],
866+
"fileNameGlob": file_name_glob,
867+
}
868+
869+
def _build_confluence_data_source(
870+
self, source: Dict[str, Any], file_name_glob: str
871+
) -> Dict[str, Any]:
872+
"""Build data source configuration for Confluence."""
873+
required_fields = [
874+
"connection_id",
875+
"connection_name",
876+
"directory_path",
877+
"folder_path",
878+
"space_id",
879+
]
880+
for field in required_fields:
881+
if not source.get(field):
882+
raise ValueError(f"{field} is required for Confluence data source")
883+
884+
return {
885+
"@odata.type": CONFLUENCE_DATA_SOURCE,
886+
"folder": source["folder_path"],
887+
"connectionId": source["connection_id"],
888+
"connectionName": source["connection_name"],
889+
"directoryPath": source["directory_path"],
890+
"fileNameGlob": file_name_glob,
891+
"spaceId": source["space_id"],
892+
}
893+
612894
def _retrieve_by_id_spec(
613895
self,
614896
id: str,

src/uipath/_utils/constants.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@
2525
ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE = (
2626
"#UiPath.Vdbs.Domain.Api.V20Models.StorageBucketDataSourceRequest"
2727
)
28+
CONFLUENCE_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.ConfluenceDataSourceRequest"
29+
DROPBOX_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.DropboxDataSourceRequest"
30+
GOOGLE_DRIVE_DATA_SOURCE = (
31+
"#UiPath.Vdbs.Domain.Api.V20Models.GoogleDriveDataSourceRequest"
32+
)
33+
ONEDRIVE_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.OneDriveDataSourceRequest"
34+
35+
# Preprocessing request types
36+
LLMV3Mini = "#UiPath.Vdbs.Domain.Api.V20Models.LLMV3MiniPreProcessingRequest"
37+
LLMV4 = "#UiPath.Vdbs.Domain.Api.V20Models.LLMV4PreProcessingRequest"
38+
NativeV1 = "#UiPath.Vdbs.Domain.Api.V20Models.NativeV1PreProcessingRequest"
39+
2840

2941
# Local storage
3042
TEMP_ATTACHMENTS_FOLDER = "uipath_attachments"

0 commit comments

Comments
 (0)