|
1 | | -from typing import Any, List, Optional, Tuple, Union |
| 1 | +import json |
| 2 | +from typing import Any, Dict, List, Optional, Tuple, Union |
2 | 3 |
|
3 | 4 | import httpx |
4 | 5 | from pydantic import TypeAdapter |
|
9 | 10 | from .._folder_context import FolderContext |
10 | 11 | from .._utils import Endpoint, RequestSpec, header_folder, infer_bindings |
11 | 12 | from .._utils.constants import ( |
| 13 | + CONFLUENCE_DATA_SOURCE, |
| 14 | + DROPBOX_DATA_SOURCE, |
| 15 | + GOOGLE_DRIVE_DATA_SOURCE, |
| 16 | + LLMV4, |
| 17 | + ONEDRIVE_DATA_SOURCE, |
12 | 18 | ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE, |
13 | 19 | ) |
14 | 20 | from ..models import IngestionInProgressException |
@@ -312,6 +318,122 @@ async def retrieve_by_id_async( |
312 | 318 |
|
313 | 319 | return response.json() |
314 | 320 |
|
| 321 | + @traced(name="contextgrounding_create_index", run_type="uipath") |
| 322 | + @infer_bindings(resource_type="index") |
| 323 | + def create_index( |
| 324 | + self, |
| 325 | + name: str, |
| 326 | + source: Dict[str, Any], |
| 327 | + description: Optional[str] = None, |
| 328 | + cron_expression: Optional[str] = None, |
| 329 | + time_zone_id: Optional[str] = None, |
| 330 | + advanced_ingestion: Optional[bool] = True, |
| 331 | + preprocessing_request: Optional[str] = LLMV4, |
| 332 | + folder_key: Optional[str] = None, |
| 333 | + folder_path: Optional[str] = None, |
| 334 | + ) -> ContextGroundingIndex: |
| 335 | + """Create a new context grounding index. |
| 336 | +
|
| 337 | + Args: |
| 338 | + name (str): The name of the index to create. |
| 339 | + source (dict): Source configuration dictionary: |
| 340 | + - For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional) |
| 341 | + - For Google Drive: type="google", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional) |
| 342 | + - For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional) |
| 343 | + - For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional) |
| 344 | + - For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional) |
| 345 | + description (Optional[str]): Description of the index. |
| 346 | + cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM). |
| 347 | + time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time"). |
| 348 | + advanced_ingestion (Optional[bool]): Enable advanced ingestion with preprocessing. Defaults to True. |
| 349 | + preprocessing_request (Optional[str]): The OData type for preprocessing request. Defaults to LLMV4. |
| 350 | + folder_key (Optional[str]): The key of the folder where the index will be created. |
| 351 | + folder_path (Optional[str]): The path of the folder where the index will be created. |
| 352 | +
|
| 353 | + Returns: |
| 354 | + ContextGroundingIndex: The created index information. |
| 355 | + """ |
| 356 | + spec = self._create_spec( |
| 357 | + name=name, |
| 358 | + description=description, |
| 359 | + source=source, |
| 360 | + cron_expression=cron_expression, |
| 361 | + time_zone_id=time_zone_id, |
| 362 | + advanced_ingestion=advanced_ingestion |
| 363 | + if advanced_ingestion is not None |
| 364 | + else True, |
| 365 | + preprocessing_request=preprocessing_request or LLMV4, |
| 366 | + folder_path=folder_path, |
| 367 | + folder_key=folder_key, |
| 368 | + ) |
| 369 | + |
| 370 | + response = self.request( |
| 371 | + spec.method, |
| 372 | + spec.endpoint, |
| 373 | + content=spec.content, |
| 374 | + headers=spec.headers, |
| 375 | + ) |
| 376 | + |
| 377 | + return ContextGroundingIndex.model_validate(response.json()) |
| 378 | + |
| 379 | + @traced(name="contextgrounding_create_index", run_type="uipath") |
| 380 | + @infer_bindings(resource_type="index") |
| 381 | + async def create_index_async( |
| 382 | + self, |
| 383 | + name: str, |
| 384 | + source: Dict[str, Any], |
| 385 | + description: Optional[str] = None, |
| 386 | + cron_expression: Optional[str] = None, |
| 387 | + time_zone_id: Optional[str] = None, |
| 388 | + advanced_ingestion: Optional[bool] = True, |
| 389 | + preprocessing_request: Optional[str] = LLMV4, |
| 390 | + folder_key: Optional[str] = None, |
| 391 | + folder_path: Optional[str] = None, |
| 392 | + ) -> ContextGroundingIndex: |
| 393 | + """Create a new context grounding index. |
| 394 | +
|
| 395 | + Args: |
| 396 | + name (str): The name of the index to create. |
| 397 | + source (dict): Source configuration dictionary: |
| 398 | + - For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional) |
| 399 | + - For Google Drive: type="google_drive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional) |
| 400 | + - For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional) |
| 401 | + - For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional) |
| 402 | + - For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional) |
| 403 | + description (Optional[str]): Description of the index. |
| 404 | + cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM). |
| 405 | + time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time"). |
| 406 | + advanced_ingestion (Optional[bool]): Enable advanced ingestion with preprocessing. Defaults to True. |
| 407 | + preprocessing_request (Optional[str]): The OData type for preprocessing request. Defaults to LLMV4. |
| 408 | + folder_key (Optional[str]): The key of the folder where the index will be created. |
| 409 | + folder_path (Optional[str]): The path of the folder where the index will be created. |
| 410 | +
|
| 411 | + Returns: |
| 412 | + ContextGroundingIndex: The created index information. |
| 413 | + """ |
| 414 | + spec = self._create_spec( |
| 415 | + name=name, |
| 416 | + description=description, |
| 417 | + source=source, |
| 418 | + cron_expression=cron_expression, |
| 419 | + time_zone_id=time_zone_id, |
| 420 | + advanced_ingestion=advanced_ingestion |
| 421 | + if advanced_ingestion is not None |
| 422 | + else True, |
| 423 | + preprocessing_request=preprocessing_request or LLMV4, |
| 424 | + folder_path=folder_path, |
| 425 | + folder_key=folder_key, |
| 426 | + ) |
| 427 | + |
| 428 | + response = await self.request_async( |
| 429 | + spec.method, |
| 430 | + spec.endpoint, |
| 431 | + content=spec.content, |
| 432 | + headers=spec.headers, |
| 433 | + ) |
| 434 | + |
| 435 | + return ContextGroundingIndex.model_validate(response.json()) |
| 436 | + |
315 | 437 | @traced(name="contextgrounding_search", run_type="uipath") |
316 | 438 | def search( |
317 | 439 | self, |
@@ -575,40 +697,200 @@ def _create_spec( |
575 | 697 | self, |
576 | 698 | name: str, |
577 | 699 | description: Optional[str], |
578 | | - storage_bucket_name: Optional[str], |
579 | | - file_name_glob: Optional[str], |
580 | | - storage_bucket_folder_path: Optional[str], |
| 700 | + source: Dict[str, Any], |
| 701 | + advanced_ingestion: bool, |
| 702 | + preprocessing_request: str, |
| 703 | + cron_expression: Optional[str] = None, |
| 704 | + time_zone_id: Optional[str] = None, |
581 | 705 | folder_key: Optional[str] = None, |
582 | 706 | folder_path: Optional[str] = None, |
583 | 707 | ) -> RequestSpec: |
| 708 | + """Create request spec for index creation. |
| 709 | +
|
| 710 | + Args: |
| 711 | + name: Index name |
| 712 | + description: Index description |
| 713 | + source: Source configuration dictionary |
| 714 | + cron_expression: Optional cron expression for scheduled indexing |
| 715 | + time_zone_id: Optional timezone for cron expression |
| 716 | + advanced_ingestion: Whether to enable advanced ingestion with preprocessing |
| 717 | + preprocessing_request: OData type for preprocessing request |
| 718 | + folder_key: Optional folder key |
| 719 | + folder_path: Optional folder path |
| 720 | +
|
| 721 | + Returns: |
| 722 | + RequestSpec for the create index request |
| 723 | + """ |
| 724 | + source_type = source.get("type", "").lower() |
| 725 | + |
584 | 726 | folder_key = self._resolve_folder_key(folder_key, folder_path) |
| 727 | + file_type = source.get("file_type") |
| 728 | + file_name_glob = f"**/*.{file_type}" if file_type else "**/*" |
| 729 | + |
| 730 | + data_source = self._build_data_source(source_type, source, file_name_glob) |
| 731 | + |
| 732 | + if cron_expression: |
| 733 | + data_source["indexer"] = { |
| 734 | + "cronExpression": cron_expression, |
| 735 | + "timeZoneId": time_zone_id or "UTC", |
| 736 | + } |
| 737 | + |
| 738 | + payload = { |
| 739 | + "name": name, |
| 740 | + "description": description or "", |
| 741 | + "dataSource": data_source, |
| 742 | + } |
| 743 | + |
| 744 | + if advanced_ingestion and preprocessing_request: |
| 745 | + payload["preProcessing"] = { |
| 746 | + "@odata.type": preprocessing_request, |
| 747 | + } |
585 | 748 |
|
586 | | - storage_bucket_folder_path = ( |
587 | | - storage_bucket_folder_path |
588 | | - if storage_bucket_folder_path |
589 | | - else self._folder_path |
590 | | - ) |
591 | 749 | return RequestSpec( |
592 | 750 | method="POST", |
593 | 751 | endpoint=Endpoint("/ecs_/v2/indexes/create"), |
594 | | - json={ |
595 | | - "name": name, |
596 | | - "description": description, |
597 | | - "dataSource": { |
598 | | - "@odata.type": ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE, |
599 | | - "folder": storage_bucket_folder_path, |
600 | | - "bucketName": storage_bucket_name, |
601 | | - "fileNameGlob": file_name_glob |
602 | | - if file_name_glob is not None |
603 | | - else "*", |
604 | | - "directoryPath": "/", |
605 | | - }, |
606 | | - }, |
| 752 | + content=json.dumps(payload), |
607 | 753 | headers={ |
608 | 754 | **header_folder(folder_key, None), |
| 755 | + "Content-Type": "application/json", |
609 | 756 | }, |
610 | 757 | ) |
611 | 758 |
|
| 759 | + def _build_data_source( |
| 760 | + self, source_type: str, source: Dict[str, Any], file_name_glob: str |
| 761 | + ) -> Dict[str, Any]: |
| 762 | + """Build data source configuration based on type.""" |
| 763 | + if source_type == "bucket": |
| 764 | + return self._build_bucket_data_source(source, file_name_glob) |
| 765 | + elif source_type in ["google_drive"]: |
| 766 | + return self._build_google_drive_data_source(source, file_name_glob) |
| 767 | + elif source_type == "dropbox": |
| 768 | + return self._build_dropbox_data_source(source, file_name_glob) |
| 769 | + elif source_type == "onedrive": |
| 770 | + return self._build_onedrive_data_source(source, file_name_glob) |
| 771 | + elif source_type == "confluence": |
| 772 | + return self._build_confluence_data_source(source, file_name_glob) |
| 773 | + else: |
| 774 | + raise ValueError( |
| 775 | + f"Unsupported data source type: {source_type}. " |
| 776 | + f"Supported types: bucket, google_drive, dropbox, onedrive, confluence" |
| 777 | + ) |
| 778 | + |
| 779 | + def _build_bucket_data_source( |
| 780 | + self, source: Dict[str, Any], file_name_glob: str |
| 781 | + ) -> Dict[str, Any]: |
| 782 | + """Build data source configuration for storage bucket.""" |
| 783 | + required_fields = ["bucket_name", "folder_path"] |
| 784 | + for field in required_fields: |
| 785 | + if not source.get(field): |
| 786 | + raise ValueError(f"{field} is required for bucket data source") |
| 787 | + |
| 788 | + return { |
| 789 | + "@odata.type": ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE, |
| 790 | + "folder": source["folder_path"], |
| 791 | + "bucketName": source["bucket_name"], |
| 792 | + "fileNameGlob": file_name_glob, |
| 793 | + "directoryPath": source.get("directory_path", "/"), |
| 794 | + } |
| 795 | + |
| 796 | + def _build_google_drive_data_source( |
| 797 | + self, source: Dict[str, Any], file_name_glob: str |
| 798 | + ) -> Dict[str, Any]: |
| 799 | + """Build data source configuration for Google Drive.""" |
| 800 | + required_fields = [ |
| 801 | + "connection_id", |
| 802 | + "connection_name", |
| 803 | + "leaf_folder_id", |
| 804 | + "directory_path", |
| 805 | + "folder_path", |
| 806 | + ] |
| 807 | + for field in required_fields: |
| 808 | + if not source.get(field): |
| 809 | + raise ValueError(f"{field} is required for Google Drive data source") |
| 810 | + |
| 811 | + return { |
| 812 | + "@odata.type": GOOGLE_DRIVE_DATA_SOURCE, |
| 813 | + "folder": source["folder_path"], |
| 814 | + "connectionId": source["connection_id"], |
| 815 | + "connectionName": source["connection_name"], |
| 816 | + "leafFolderId": source["leaf_folder_id"], |
| 817 | + "directoryPath": source["directory_path"], |
| 818 | + "fileNameGlob": file_name_glob, |
| 819 | + } |
| 820 | + |
| 821 | + def _build_dropbox_data_source( |
| 822 | + self, source: Dict[str, Any], file_name_glob: str |
| 823 | + ) -> Dict[str, Any]: |
| 824 | + """Build data source configuration for Dropbox.""" |
| 825 | + required_fields = [ |
| 826 | + "connection_id", |
| 827 | + "connection_name", |
| 828 | + "directory_path", |
| 829 | + "folder_path", |
| 830 | + ] |
| 831 | + for field in required_fields: |
| 832 | + if not source.get(field): |
| 833 | + raise ValueError(f"{field} is required for Dropbox data source") |
| 834 | + |
| 835 | + return { |
| 836 | + "@odata.type": DROPBOX_DATA_SOURCE, |
| 837 | + "folder": source["folder_path"], |
| 838 | + "connectionId": source["connection_id"], |
| 839 | + "connectionName": source["connection_name"], |
| 840 | + "directoryPath": source["directory_path"], |
| 841 | + "fileNameGlob": file_name_glob, |
| 842 | + } |
| 843 | + |
| 844 | + def _build_onedrive_data_source( |
| 845 | + self, source: Dict[str, Any], file_name_glob: str |
| 846 | + ) -> Dict[str, Any]: |
| 847 | + """Build data source configuration for OneDrive.""" |
| 848 | + required_fields = [ |
| 849 | + "connection_id", |
| 850 | + "connection_name", |
| 851 | + "leaf_folder_id", |
| 852 | + "directory_path", |
| 853 | + "folder_path", |
| 854 | + ] |
| 855 | + for field in required_fields: |
| 856 | + if not source.get(field): |
| 857 | + raise ValueError(f"{field} is required for OneDrive data source") |
| 858 | + |
| 859 | + return { |
| 860 | + "@odata.type": ONEDRIVE_DATA_SOURCE, |
| 861 | + "folder": source["folder_path"], |
| 862 | + "connectionId": source["connection_id"], |
| 863 | + "connectionName": source["connection_name"], |
| 864 | + "leafFolderId": source["leaf_folder_id"], |
| 865 | + "directoryPath": source["directory_path"], |
| 866 | + "fileNameGlob": file_name_glob, |
| 867 | + } |
| 868 | + |
| 869 | + def _build_confluence_data_source( |
| 870 | + self, source: Dict[str, Any], file_name_glob: str |
| 871 | + ) -> Dict[str, Any]: |
| 872 | + """Build data source configuration for Confluence.""" |
| 873 | + required_fields = [ |
| 874 | + "connection_id", |
| 875 | + "connection_name", |
| 876 | + "directory_path", |
| 877 | + "folder_path", |
| 878 | + "space_id", |
| 879 | + ] |
| 880 | + for field in required_fields: |
| 881 | + if not source.get(field): |
| 882 | + raise ValueError(f"{field} is required for Confluence data source") |
| 883 | + |
| 884 | + return { |
| 885 | + "@odata.type": CONFLUENCE_DATA_SOURCE, |
| 886 | + "folder": source["folder_path"], |
| 887 | + "connectionId": source["connection_id"], |
| 888 | + "connectionName": source["connection_name"], |
| 889 | + "directoryPath": source["directory_path"], |
| 890 | + "fileNameGlob": file_name_glob, |
| 891 | + "spaceId": source["space_id"], |
| 892 | + } |
| 893 | + |
612 | 894 | def _retrieve_by_id_spec( |
613 | 895 | self, |
614 | 896 | id: str, |
|
0 commit comments