From 60dbe3bf83d8f3f378cfef58d8802907b37d1691 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Mon, 8 Jul 2024 13:10:31 +0200 Subject: [PATCH 1/8] WIP docstrings work, constructor works --- Makefile | 8 +- package/__init__.py | 0 package/generated/azure_blob_storage.py | 58 +++++++++++ package/generated/box.py | 53 ++++++++++ package/generated/confluence.py | 97 +++++++++++++++++++ package/generated/dir.py | 35 +++++++ package/generated/dropbox.py | 69 +++++++++++++ package/generated/github.py | 89 +++++++++++++++++ package/generated/gmail.py | 53 ++++++++++ package/generated/google_cloud_storage.py | 42 ++++++++ package/generated/google_drive.py | 73 ++++++++++++++ package/generated/googledrivedatasource.py | 73 ++++++++++++++ package/generated/graphql.py | 84 ++++++++++++++++ package/generated/jira.py | 93 ++++++++++++++++++ package/generated/microsoft_teams.py | 49 ++++++++++ package/generated/mongodb.py | 68 +++++++++++++ package/generated/mssql.py | 78 +++++++++++++++ package/generated/mysql.py | 69 +++++++++++++ package/generated/network_drive.py | 67 +++++++++++++ package/generated/notion.py | 50 ++++++++++ package/generated/onedrive.py | 59 +++++++++++ package/generated/oracle.py | 82 ++++++++++++++++ package/generated/outlook.py | 86 ++++++++++++++++ package/generated/postgresql.py | 74 ++++++++++++++ package/generated/redis.py | 70 +++++++++++++ package/generated/s3.py | 63 ++++++++++++ package/generated/salesforce.py | 54 +++++++++++ package/generated/servicenow.py | 64 ++++++++++++ package/generated/sharepoint_online.py | 90 +++++++++++++++++ package/generated/sharepoint_server.py | 77 +++++++++++++++ package/generated/slack.py | 49 ++++++++++ package/generated/zoom.py | 56 +++++++++++ requirements/package-dev.txt | 2 + scripts/codegen/generate_wrappers.py | 51 ++++++++++ .../templates/datasource_wrapper.jinja2 | 30 ++++++ 35 files changed, 2114 insertions(+), 1 deletion(-) create mode 100644 package/__init__.py create mode 100644 package/generated/azure_blob_storage.py create mode 100644 package/generated/box.py create mode 100644 package/generated/confluence.py create mode 100644 package/generated/dir.py create mode 100644 package/generated/dropbox.py create mode 100644 package/generated/github.py create mode 100644 package/generated/gmail.py create mode 100644 package/generated/google_cloud_storage.py create mode 100644 package/generated/google_drive.py create mode 100644 package/generated/googledrivedatasource.py create mode 100644 package/generated/graphql.py create mode 100644 package/generated/jira.py create mode 100644 package/generated/microsoft_teams.py create mode 100644 package/generated/mongodb.py create mode 100644 package/generated/mssql.py create mode 100644 package/generated/mysql.py create mode 100644 package/generated/network_drive.py create mode 100644 package/generated/notion.py create mode 100644 package/generated/onedrive.py create mode 100644 package/generated/oracle.py create mode 100644 package/generated/outlook.py create mode 100644 package/generated/postgresql.py create mode 100644 package/generated/redis.py create mode 100644 package/generated/s3.py create mode 100644 package/generated/salesforce.py create mode 100644 package/generated/servicenow.py create mode 100644 package/generated/sharepoint_online.py create mode 100644 package/generated/sharepoint_server.py create mode 100644 package/generated/slack.py create mode 100644 package/generated/zoom.py create mode 100644 requirements/package-dev.txt create mode 100644 scripts/codegen/generate_wrappers.py create mode 100644 scripts/codegen/templates/datasource_wrapper.jinja2 diff --git a/Makefile b/Makefile index b0531803e..7108a3ac7 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,6 @@ bin/elastic-ingest: bin/python bin/black: bin/python bin/pip install -r requirements/$(ARCH).txt bin/pip install -r requirements/tests.txt - bin/pytest: bin/python bin/pip install -r requirements/$(ARCH).txt @@ -85,3 +84,10 @@ docker-run: docker-push: docker push $(DOCKER_IMAGE_NAME):$(VERSION)-SNAPSHOT + +bin/package-dev: requirements/package-dev.txt + bin/pip install -r requirements/$(ARCH).txt + bin/pip install -r requirements/package-dev.txt + +generate_wrappers: bin/package-dev + bin/python scripts/codegen/generate_wrappers.py diff --git a/package/__init__.py b/package/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/package/generated/azure_blob_storage.py b/package/generated/azure_blob_storage.py new file mode 100644 index 000000000..81adb6c39 --- /dev/null +++ b/package/generated/azure_blob_storage.py @@ -0,0 +1,58 @@ +from application.base import BaseDataSource + + +class AzureBlobStorageDataSource(AzureBlobStorageDataSource): + """ + AzureBlobStorageDataSource class generated for connecting to the data source. + + Args: + + account_name (str): Azure Blob Storage account name + + account_key (str): Azure Blob Storage account key + + blob_endpoint (str): Azure Blob Storage blob endpoint + + containers (list): Azure Blob Storage containers + + retry_count (int): Retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + account_name=None, + account_key=None, + blob_endpoint=None, + containers=None, + retry_count=None, + concurrent_downloads=None, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.account_name = account_name + self.account_key = account_key + self.blob_endpoint = blob_endpoint + self.containers = containers + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/box.py b/package/generated/box.py new file mode 100644 index 000000000..69371f074 --- /dev/null +++ b/package/generated/box.py @@ -0,0 +1,53 @@ +from application.base import BaseDataSource + + +class BoxDataSource(BoxDataSource): + """ + BoxDataSource class generated for connecting to the data source. + + Args: + + is_enterprise (str): Box Account + + client_id (str): Client ID + + client_secret (str): Client Secret + + refresh_token (str): Refresh Token + + enterprise_id (int): Enterprise ID + + concurrent_downloads (int): Maximum concurrent downloads + + """ + + def __init__( + self, + is_enterprise="box_free", + client_id=None, + client_secret=None, + refresh_token=None, + enterprise_id=None, + concurrent_downloads=None, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.is_enterprise = is_enterprise + self.client_id = client_id + self.client_secret = client_secret + self.refresh_token = refresh_token + self.enterprise_id = enterprise_id + self.concurrent_downloads = concurrent_downloads diff --git a/package/generated/confluence.py b/package/generated/confluence.py new file mode 100644 index 000000000..e0f27e394 --- /dev/null +++ b/package/generated/confluence.py @@ -0,0 +1,97 @@ +from application.base import BaseDataSource + + +class ConfluenceDataSource(ConfluenceDataSource): + """ + ConfluenceDataSource class generated for connecting to the data source. + + Args: + + data_source (str): Confluence data source + + username (str): Confluence Server username + + password (str): Confluence Server password + + data_center_username (str): Confluence Data Center username + + data_center_password (str): Confluence Data Center password + + account_email (str): Confluence Cloud account email + + api_token (str): Confluence Cloud API token + + confluence_url (str): Confluence URL + + spaces (list): Confluence space keys + - This configurable field is ignored when Advanced Sync Rules are used. + + index_labels (bool): Enable indexing labels + - Enabling this will increase the amount of network calls to the source, and may decrease performance + + ssl_enabled (bool): Enable SSL + + ssl_ca (str): SSL certificate + + retry_count (int): Retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in confluence are maintained in Elasticsearch. This enables you to restrict and personalize read-access users have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + data_source="confluence_server", + username=None, + password=None, + data_center_username=None, + data_center_password=None, + account_email=None, + api_token=None, + confluence_url=None, + spaces=None, + index_labels=False, + ssl_enabled=False, + ssl_ca=None, + retry_count=None, + concurrent_downloads=None, + use_document_level_security=False, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.data_source = data_source + self.username = username + self.password = password + self.data_center_username = data_center_username + self.data_center_password = data_center_password + self.account_email = account_email + self.api_token = api_token + self.confluence_url = confluence_url + self.spaces = spaces + self.index_labels = index_labels + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads + self.use_document_level_security = use_document_level_security + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/dir.py b/package/generated/dir.py new file mode 100644 index 000000000..b1ca6bff3 --- /dev/null +++ b/package/generated/dir.py @@ -0,0 +1,35 @@ +from application.base import BaseDataSource + + +class DirectoryDataSource(DirectoryDataSource): + """ + DirectoryDataSource class generated for connecting to the data source. + + Args: + + directory (str): Directory path + + pattern (str): File glob-like pattern + + """ + + def __init__( + self, directory="/Users/jedr/connectors/connectors/sources", pattern="**/*.*" + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.directory = directory + self.pattern = pattern diff --git a/package/generated/dropbox.py b/package/generated/dropbox.py new file mode 100644 index 000000000..49922a2a6 --- /dev/null +++ b/package/generated/dropbox.py @@ -0,0 +1,69 @@ +from application.base import BaseDataSource + + +class DropboxDataSource(DropboxDataSource): + """ + DropboxDataSource class generated for connecting to the data source. + + Args: + + path (str): Path to fetch files/folders + - Path is ignored when Advanced Sync Rules are used. + + app_key (str): App Key + + app_secret (str): App secret + + refresh_token (str): Refresh token + + retry_count (int): Retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in Dropbox are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + include_inherited_users_and_groups (bool): Include groups and inherited users + - Include groups and inherited users when indexing permissions. Enabling this configurable field will cause a significant performance degradation. + + """ + + def __init__( + self, + path=None, + app_key=None, + app_secret=None, + refresh_token=None, + retry_count=None, + concurrent_downloads=None, + use_text_extraction_service=False, + use_document_level_security=False, + include_inherited_users_and_groups=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.path = path + self.app_key = app_key + self.app_secret = app_secret + self.refresh_token = refresh_token + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads + self.use_text_extraction_service = use_text_extraction_service + self.use_document_level_security = use_document_level_security + self.include_inherited_users_and_groups = include_inherited_users_and_groups diff --git a/package/generated/github.py b/package/generated/github.py new file mode 100644 index 000000000..601219390 --- /dev/null +++ b/package/generated/github.py @@ -0,0 +1,89 @@ +from application.base import BaseDataSource + + +class GitHubDataSource(GitHubDataSource): + """ + GitHubDataSource class generated for connecting to the data source. + + Args: + + data_source (str): Data source + + host (str): Server URL + + auth_method (str): Authentication method + + token (str): Token + + repo_type (str): Repository Type + - The Document Level Security feature is not available for the Other Repository Type + + org_name (str): Organization Name + + app_id (int): App ID + + private_key (str): App private key + + repositories (list): List of repositories + - This configurable field is ignored when Advanced Sync Rules are used. + + ssl_enabled (bool): Enable SSL + + ssl_ca (str): SSL certificate + + retry_count (int): Maximum retries per request + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in GitHub are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + """ + + def __init__( + self, + data_source="github_server", + host=None, + auth_method="personal_access_token", + token=None, + repo_type="other", + org_name=None, + app_id=None, + private_key=None, + repositories=None, + ssl_enabled=False, + ssl_ca=None, + retry_count="3", + use_text_extraction_service=False, + use_document_level_security=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.data_source = data_source + self.host = host + self.auth_method = auth_method + self.token = token + self.repo_type = repo_type + self.org_name = org_name + self.app_id = app_id + self.private_key = private_key + self.repositories = repositories + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca + self.retry_count = retry_count + self.use_text_extraction_service = use_text_extraction_service + self.use_document_level_security = use_document_level_security diff --git a/package/generated/gmail.py b/package/generated/gmail.py new file mode 100644 index 000000000..52429c26f --- /dev/null +++ b/package/generated/gmail.py @@ -0,0 +1,53 @@ +from application.base import BaseDataSource + + +class GMailDataSource(GMailDataSource): + """ + GMailDataSource class generated for connecting to the data source. + + Args: + + service_account_credentials (str): GMail service account JSON + + subject (str): Google Workspace admin email + - Admin account email address + + customer_id (str): Google customer id + - Google admin console -> Account -> Settings -> Customer Id + + include_spam_and_trash (bool): Include spam and trash emails + - Will include spam and trash emails, when set to true. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in GMail are maintained in Elasticsearch. This enables you to restrict and personalize read-access users have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + """ + + def __init__( + self, + service_account_credentials=None, + subject=None, + customer_id=None, + include_spam_and_trash=False, + use_document_level_security=True, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.service_account_credentials = service_account_credentials + self.subject = subject + self.customer_id = customer_id + self.include_spam_and_trash = include_spam_and_trash + self.use_document_level_security = use_document_level_security diff --git a/package/generated/google_cloud_storage.py b/package/generated/google_cloud_storage.py new file mode 100644 index 000000000..aa2934cae --- /dev/null +++ b/package/generated/google_cloud_storage.py @@ -0,0 +1,42 @@ +from application.base import BaseDataSource + + +class GoogleCloudStorageDataSource(GoogleCloudStorageDataSource): + """ + GoogleCloudStorageDataSource class generated for connecting to the data source. + + Args: + + buckets (list): Google Cloud Storage buckets + + service_account_credentials (str): Google Cloud service account JSON + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + buckets=None, + service_account_credentials=None, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.buckets = buckets + self.service_account_credentials = service_account_credentials + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/google_drive.py b/package/generated/google_drive.py new file mode 100644 index 000000000..f94301281 --- /dev/null +++ b/package/generated/google_drive.py @@ -0,0 +1,73 @@ +from application.base import BaseDataSource + + +class GoogleDriveDataSource(GoogleDriveDataSource): + """ + GoogleDriveDataSource class generated for connecting to the data source. + + Args: + + service_account_credentials (str): Google Drive service account JSON + - This connectors authenticates as a service account to synchronize content from Google Drive. + + use_domain_wide_delegation_for_sync (bool): Use domain-wide delegation for data sync + - Enable domain-wide delegation to automatically sync content from all shared and personal drives in the Google workspace. This eliminates the need to manually share Google Drive data with your service account, though it may increase sync time. If disabled, only items and folders manually shared with the service account will be synced. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + google_workspace_admin_email_for_data_sync (str): Google Workspace admin email + - Provide the admin email to be used with domain-wide delegation for data sync. This email enables the connector to utilize the Admin Directory API for listing organization users. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + google_workspace_email_for_shared_drives_sync (str): Google Workspace email for syncing shared drives + - Provide the Google Workspace user email for discovery and syncing of shared drives. Only the shared drives this user has access to will be synced. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in Google Drive are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + google_workspace_admin_email (str): Google Workspace admin email + - In order to use Document Level Security you need to enable Google Workspace domain-wide delegation of authority for your service account. A service account with delegated authority can impersonate admin user with sufficient permissions to fetch all users and their corresponding permissions. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + max_concurrency (int): Maximum concurrent HTTP requests + - This setting determines the maximum number of concurrent HTTP requests sent to the Google API to fetch data. Increasing this value can improve data retrieval speed, but it may also place higher demands on system resources and network bandwidth. + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + service_account_credentials=None, + use_domain_wide_delegation_for_sync=False, + google_workspace_admin_email_for_data_sync=None, + google_workspace_email_for_shared_drives_sync=None, + use_document_level_security=False, + google_workspace_admin_email=None, + max_concurrency=None, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.service_account_credentials = service_account_credentials + self.use_domain_wide_delegation_for_sync = use_domain_wide_delegation_for_sync + self.google_workspace_admin_email_for_data_sync = ( + google_workspace_admin_email_for_data_sync + ) + self.google_workspace_email_for_shared_drives_sync = ( + google_workspace_email_for_shared_drives_sync + ) + self.use_document_level_security = use_document_level_security + self.google_workspace_admin_email = google_workspace_admin_email + self.max_concurrency = max_concurrency + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/googledrivedatasource.py b/package/generated/googledrivedatasource.py new file mode 100644 index 000000000..2177e7121 --- /dev/null +++ b/package/generated/googledrivedatasource.py @@ -0,0 +1,73 @@ +from application.base import BaseDataSource + + +class GoogleDriveDataSource(GoogleDriveDataSource): + """ + GoogleDriveDataSource class generated for connecting to the data source. + + Args: + + service_account_credentials (): Google Drive service account JSON + - This connectors authenticates as a service account to synchronize content from Google Drive. + + use_domain_wide_delegation_for_sync (): Use domain-wide delegation for data sync + - Enable domain-wide delegation to automatically sync content from all shared and personal drives in the Google workspace. This eliminates the need to manually share Google Drive data with your service account, though it may increase sync time. If disabled, only items and folders manually shared with the service account will be synced. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + google_workspace_admin_email_for_data_sync (): Google Workspace admin email + - Provide the admin email to be used with domain-wide delegation for data sync. This email enables the connector to utilize the Admin Directory API for listing organization users. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + google_workspace_email_for_shared_drives_sync (): Google Workspace email for syncing shared drives + - Provide the Google Workspace user email for discovery and syncing of shared drives. Only the shared drives this user has access to will be synced. + + use_document_level_security (): Enable document level security + - Document level security ensures identities and permissions set in Google Drive are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + google_workspace_admin_email (): Google Workspace admin email + - In order to use Document Level Security you need to enable Google Workspace domain-wide delegation of authority for your service account. A service account with delegated authority can impersonate admin user with sufficient permissions to fetch all users and their corresponding permissions. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + max_concurrency (): Maximum concurrent HTTP requests + - This setting determines the maximum number of concurrent HTTP requests sent to the Google API to fetch data. Increasing this value can improve data retrieval speed, but it may also place higher demands on system resources and network bandwidth. + + use_text_extraction_service (): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + service_account_credentials=None, + use_domain_wide_delegation_for_sync=False, + google_workspace_admin_email_for_data_sync=None, + google_workspace_email_for_shared_drives_sync=None, + use_document_level_security=False, + google_workspace_admin_email=None, + max_concurrency=None, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.service_account_credentials = service_account_credentials + self.use_domain_wide_delegation_for_sync = use_domain_wide_delegation_for_sync + self.google_workspace_admin_email_for_data_sync = ( + google_workspace_admin_email_for_data_sync + ) + self.google_workspace_email_for_shared_drives_sync = ( + google_workspace_email_for_shared_drives_sync + ) + self.use_document_level_security = use_document_level_security + self.google_workspace_admin_email = google_workspace_admin_email + self.max_concurrency = max_concurrency + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/graphql.py b/package/generated/graphql.py new file mode 100644 index 000000000..a56d64849 --- /dev/null +++ b/package/generated/graphql.py @@ -0,0 +1,84 @@ +from application.base import BaseDataSource + + +class GraphQLDataSource(GraphQLDataSource): + """ + GraphQLDataSource class generated for connecting to the data source. + + Args: + + http_endpoint (str): GraphQL HTTP endpoint + + http_method (str): HTTP method for GraphQL requests + + authentication_method (str): Authentication Method + + username (str): Username + + password (str): Password + + token (str): Bearer Token + + graphql_query (str): GraphQL Body + + graphql_variables (str): Graphql Variables + + graphql_object_to_id_map (str): GraphQL Objects to ID mapping + - Specifies which GraphQL objects should be indexed as individual documents. This allows finer control over indexing, ensuring only relevant data sections from the GraphQL response are stored as separate documents. Use a JSON with key as the GraphQL object name and value as string field within the document, with the requirement that each document must have a distinct value for this field. Use '.' to provide full path of the object from the root of the response. For example {'organization.users.nodes': 'id'} + + headers (str): Headers + + pagination_model (str): Pagination model + - For cursor-based pagination, add 'pageInfo' and an 'after' argument variable in your query at the desired node (Pagination key). Use 'after' query argument with a variable to iterate through pages. Detailed examples and setup instructions are available in the docs. + + pagination_key (str): Pagination key + - Specifies which GraphQL object is used for pagination. Use '.' to provide full path of the object from the root of the response. For example 'organization.users' + + connection_timeout (int): Connection Timeout + + """ + + def __init__( + self, + http_endpoint=None, + http_method="post", + authentication_method="none", + username=None, + password=None, + token=None, + graphql_query=None, + graphql_variables=None, + graphql_object_to_id_map=None, + headers=None, + pagination_model="no_pagination", + pagination_key=None, + connection_timeout=None, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.http_endpoint = http_endpoint + self.http_method = http_method + self.authentication_method = authentication_method + self.username = username + self.password = password + self.token = token + self.graphql_query = graphql_query + self.graphql_variables = graphql_variables + self.graphql_object_to_id_map = graphql_object_to_id_map + self.headers = headers + self.pagination_model = pagination_model + self.pagination_key = pagination_key + self.connection_timeout = connection_timeout diff --git a/package/generated/jira.py b/package/generated/jira.py new file mode 100644 index 000000000..fca5b788d --- /dev/null +++ b/package/generated/jira.py @@ -0,0 +1,93 @@ +from application.base import BaseDataSource + + +class JiraDataSource(JiraDataSource): + """ + JiraDataSource class generated for connecting to the data source. + + Args: + + data_source (str): Jira data source + + username (str): Jira Server username + + password (str): Jira Server password + + data_center_username (str): Jira Data Center username + + data_center_password (str): Jira Data Center password + + account_email (str): Jira Cloud email address + - Email address associated with Jira Cloud account. E.g. jane.doe@gmail.com + + api_token (str): Jira Cloud API token + + jira_url (str): Jira host url + + projects (list): Jira project keys + - This configurable field is ignored when Advanced Sync Rules are used. + + ssl_enabled (bool): Enable SSL + + ssl_ca (str): SSL certificate + + retry_count (int): Retries for failed requests + + concurrent_downloads (int): Maximum concurrent downloads + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in Jira are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. Only 1000 users can be fetched for Jira Data Center. + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + data_source="jira_cloud", + username=None, + password=None, + data_center_username=None, + data_center_password=None, + account_email=None, + api_token=None, + jira_url=None, + projects=None, + ssl_enabled=False, + ssl_ca=None, + retry_count=None, + concurrent_downloads=None, + use_document_level_security=False, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.data_source = data_source + self.username = username + self.password = password + self.data_center_username = data_center_username + self.data_center_password = data_center_password + self.account_email = account_email + self.api_token = api_token + self.jira_url = jira_url + self.projects = projects + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads + self.use_document_level_security = use_document_level_security + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/microsoft_teams.py b/package/generated/microsoft_teams.py new file mode 100644 index 000000000..d735fa46d --- /dev/null +++ b/package/generated/microsoft_teams.py @@ -0,0 +1,49 @@ +from application.base import BaseDataSource + + +class MicrosoftTeamsDataSource(MicrosoftTeamsDataSource): + """ + MicrosoftTeamsDataSource class generated for connecting to the data source. + + Args: + + tenant_id (str): Tenant ID + + client_id (str): Client ID + + secret_value (str): Secret value + + username (str): Username + + password (str): Password + + """ + + def __init__( + self, + tenant_id=None, + client_id=None, + secret_value=None, + username=None, + password=None, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.tenant_id = tenant_id + self.client_id = client_id + self.secret_value = secret_value + self.username = username + self.password = password diff --git a/package/generated/mongodb.py b/package/generated/mongodb.py new file mode 100644 index 000000000..af5796799 --- /dev/null +++ b/package/generated/mongodb.py @@ -0,0 +1,68 @@ +from application.base import BaseDataSource + + +class MongoDataSource(MongoDataSource): + """ + MongoDataSource class generated for connecting to the data source. + + Args: + + host (str): Server hostname + + user (str): Username + + password (str): Password + + database (str): Database + + collection (str): Collection + + direct_connection (bool): Direct connection + + ssl_enabled (bool): SSL/TLS Connection + - This option establishes a secure connection to the MongoDB server using SSL/TLS encryption. Ensure that your MongoDB deployment supports SSL/TLS connections. Enable if MongoDB cluster uses DNS SRV records. + + ssl_ca (str): Certificate Authority (.pem) + - Specifies the root certificate from the Certificate Authority. The value of the certificate is used to validate the certificate presented by the MongoDB instance. + + tls_insecure (bool): Skip certificate verification + - This option skips certificate validation for TLS/SSL connections to your MongoDB server. We strongly recommend setting this option to 'disable'. + + """ + + def __init__( + self, + host=None, + user=None, + password=None, + database=None, + collection=None, + direct_connection=False, + ssl_enabled=False, + ssl_ca=None, + tls_insecure=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.host = host + self.user = user + self.password = password + self.database = database + self.collection = collection + self.direct_connection = direct_connection + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca + self.tls_insecure = tls_insecure diff --git a/package/generated/mssql.py b/package/generated/mssql.py new file mode 100644 index 000000000..371d9a272 --- /dev/null +++ b/package/generated/mssql.py @@ -0,0 +1,78 @@ +from application.base import BaseDataSource + + +class MSSQLDataSource(MSSQLDataSource): + """ + MSSQLDataSource class generated for connecting to the data source. + + Args: + + host (str): Host + + port (int): Port + + username (str): Username + + password (str): Password + + database (str): Database + + tables (list): Comma-separated list of tables + - This configurable field is ignored when Advanced Sync Rules are used. + + fetch_size (int): Rows fetched per request + + retry_count (int): Retries per request + + schema (str): Schema + + ssl_enabled (bool): Enable SSL verification + + ssl_ca (str): SSL certificate + + validate_host (bool): Validate host + + """ + + def __init__( + self, + host=None, + port=None, + username=None, + password=None, + database=None, + tables="*", + fetch_size=None, + retry_count=None, + schema=None, + ssl_enabled=False, + ssl_ca=None, + validate_host=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.host = host + self.port = port + self.username = username + self.password = password + self.database = database + self.tables = tables + self.fetch_size = fetch_size + self.retry_count = retry_count + self.schema = schema + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca + self.validate_host = validate_host diff --git a/package/generated/mysql.py b/package/generated/mysql.py new file mode 100644 index 000000000..b50f96c15 --- /dev/null +++ b/package/generated/mysql.py @@ -0,0 +1,69 @@ +from application.base import BaseDataSource + + +class MySqlDataSource(MySqlDataSource): + """ + MySqlDataSource class generated for connecting to the data source. + + Args: + + host (str): Host + + port (int): Port + + user (str): Username + + password (str): Password + + database (str): Database + + tables (list): Comma-separated list of tables + + ssl_enabled (bool): Enable SSL + + ssl_ca (str): SSL certificate + + fetch_size (int): Rows fetched per request + + retry_count (int): Retries per request + + """ + + def __init__( + self, + host=None, + port=None, + user=None, + password=None, + database=None, + tables="*", + ssl_enabled=False, + ssl_ca=None, + fetch_size=None, + retry_count=None, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.host = host + self.port = port + self.user = user + self.password = password + self.database = database + self.tables = tables + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca + self.fetch_size = fetch_size + self.retry_count = retry_count diff --git a/package/generated/network_drive.py b/package/generated/network_drive.py new file mode 100644 index 000000000..c01a645ff --- /dev/null +++ b/package/generated/network_drive.py @@ -0,0 +1,67 @@ +from application.base import BaseDataSource + + +class NASDataSource(NASDataSource): + """ + NASDataSource class generated for connecting to the data source. + + Args: + + username (str): Username + + password (str): Password + + server_ip (str): SMB IP + + server_port (int): SMB port + + drive_path (str): SMB path + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in your network drive are mirrored in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + drive_type (str): Drive type + + identity_mappings (str): Path of CSV file containing users and groups SID (For Linux Network Drive) + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + username=None, + password=None, + server_ip=None, + server_port=None, + drive_path=None, + use_document_level_security=False, + drive_type="windows", + identity_mappings=None, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.username = username + self.password = password + self.server_ip = server_ip + self.server_port = server_port + self.drive_path = drive_path + self.use_document_level_security = use_document_level_security + self.drive_type = drive_type + self.identity_mappings = identity_mappings + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/notion.py b/package/generated/notion.py new file mode 100644 index 000000000..137d3445d --- /dev/null +++ b/package/generated/notion.py @@ -0,0 +1,50 @@ +from application.base import BaseDataSource + + +class NotionDataSource(NotionDataSource): + """ + NotionDataSource class generated for connecting to the data source. + + Args: + + notion_secret_key (str): Notion Secret Key + + databases (list): List of Databases + + pages (list): List of Pages + + index_comments (bool): Enable indexing comments + - Enabling this will increase the amount of network calls to the source, and may decrease performance + + concurrent_downloads (int): Maximum concurrent downloads + + """ + + def __init__( + self, + notion_secret_key=None, + databases=None, + pages=None, + index_comments=False, + concurrent_downloads=None, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.notion_secret_key = notion_secret_key + self.databases = databases + self.pages = pages + self.index_comments = index_comments + self.concurrent_downloads = concurrent_downloads diff --git a/package/generated/onedrive.py b/package/generated/onedrive.py new file mode 100644 index 000000000..9a2b889e6 --- /dev/null +++ b/package/generated/onedrive.py @@ -0,0 +1,59 @@ +from application.base import BaseDataSource + + +class OneDriveDataSource(OneDriveDataSource): + """ + OneDriveDataSource class generated for connecting to the data source. + + Args: + + client_id (str): Azure application Client ID + + client_secret (str): Azure application Client Secret + + tenant_id (str): Azure application Tenant ID + + retry_count (int): Maximum retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in OneDrive are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + client_id=None, + client_secret=None, + tenant_id=None, + retry_count=None, + concurrent_downloads=None, + use_document_level_security=False, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.client_id = client_id + self.client_secret = client_secret + self.tenant_id = tenant_id + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads + self.use_document_level_security = use_document_level_security + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/oracle.py b/package/generated/oracle.py new file mode 100644 index 000000000..fb5c8ae62 --- /dev/null +++ b/package/generated/oracle.py @@ -0,0 +1,82 @@ +from application.base import BaseDataSource + + +class OracleDataSource(OracleDataSource): + """ + OracleDataSource class generated for connecting to the data source. + + Args: + + host (str): Host + + port (int): Port + + username (str): Username + + password (str): Password + + connection_source (str): Connection Source + - Select 'Service Name' option if connecting to a pluggable database + + sid (str): SID + + service_name (str): Service Name + + tables (list): Comma-separated list of tables + + fetch_size (int): Rows fetched per request + + retry_count (int): Retries per request + + oracle_protocol (str): Oracle connection protocol + + oracle_home (str): Path to Oracle Home + + wallet_configuration_path (str): Path to SSL Wallet configuration files + + """ + + def __init__( + self, + host=None, + port=None, + username=None, + password=None, + connection_source="sid", + sid=None, + service_name=None, + tables="*", + fetch_size=None, + retry_count=None, + oracle_protocol="TCP", + oracle_home="", + wallet_configuration_path=None, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.host = host + self.port = port + self.username = username + self.password = password + self.connection_source = connection_source + self.sid = sid + self.service_name = service_name + self.tables = tables + self.fetch_size = fetch_size + self.retry_count = retry_count + self.oracle_protocol = oracle_protocol + self.oracle_home = oracle_home + self.wallet_configuration_path = wallet_configuration_path diff --git a/package/generated/outlook.py b/package/generated/outlook.py new file mode 100644 index 000000000..2e55e62e4 --- /dev/null +++ b/package/generated/outlook.py @@ -0,0 +1,86 @@ +from application.base import BaseDataSource + + +class OutlookDataSource(OutlookDataSource): + """ + OutlookDataSource class generated for connecting to the data source. + + Args: + + data_source (str): Outlook data source + + tenant_id (str): Tenant ID + + client_id (str): Client ID + + client_secret (str): Client Secret Value + + exchange_server (str): Exchange Server + - Exchange server's IP address. E.g. 127.0.0.1 + + active_directory_server (str): Active Directory Server + - Active Directory server's IP address. E.g. 127.0.0.1 + + username (str): Exchange server username + + password (str): Exchange server password + + domain (str): Exchange server domain name + - Domain name such as gmail.com, outlook.com + + ssl_enabled (bool): Enable SSL + + ssl_ca (str): SSL certificate + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in Outlook are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + """ + + def __init__( + self, + data_source="outlook_cloud", + tenant_id=None, + client_id=None, + client_secret=None, + exchange_server=None, + active_directory_server=None, + username=None, + password=None, + domain=None, + ssl_enabled=False, + ssl_ca=None, + use_text_extraction_service=False, + use_document_level_security=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.data_source = data_source + self.tenant_id = tenant_id + self.client_id = client_id + self.client_secret = client_secret + self.exchange_server = exchange_server + self.active_directory_server = active_directory_server + self.username = username + self.password = password + self.domain = domain + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca + self.use_text_extraction_service = use_text_extraction_service + self.use_document_level_security = use_document_level_security diff --git a/package/generated/postgresql.py b/package/generated/postgresql.py new file mode 100644 index 000000000..1bd442e98 --- /dev/null +++ b/package/generated/postgresql.py @@ -0,0 +1,74 @@ +from application.base import BaseDataSource + + +class PostgreSQLDataSource(PostgreSQLDataSource): + """ + PostgreSQLDataSource class generated for connecting to the data source. + + Args: + + host (str): Host + + port (int): Port + + username (str): Username + + password (str): Password + + database (str): Database + + schema (str): Schema + + tables (list): Comma-separated list of tables + - This configurable field is ignored when Advanced Sync Rules are used. + + fetch_size (int): Rows fetched per request + + retry_count (int): Retries per request + + ssl_enabled (bool): Enable SSL verification + + ssl_ca (str): SSL certificate + + """ + + def __init__( + self, + host=None, + port=None, + username=None, + password=None, + database=None, + schema=None, + tables="*", + fetch_size=None, + retry_count=None, + ssl_enabled=False, + ssl_ca=None, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.host = host + self.port = port + self.username = username + self.password = password + self.database = database + self.schema = schema + self.tables = tables + self.fetch_size = fetch_size + self.retry_count = retry_count + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca diff --git a/package/generated/redis.py b/package/generated/redis.py new file mode 100644 index 000000000..e42d3393a --- /dev/null +++ b/package/generated/redis.py @@ -0,0 +1,70 @@ +from application.base import BaseDataSource + + +class RedisDataSource(RedisDataSource): + """ + RedisDataSource class generated for connecting to the data source. + + Args: + + host (str): Host + + port (int): Port + + username (str): Username + + password (str): Password + + database (list): Comma-separated list of databases + - Databases are ignored when Advanced Sync Rules are used. + + ssl_enabled (bool): SSL/TLS Connection + - This option establishes a secure connection to Redis using SSL/TLS encryption. Ensure that your Redis deployment supports SSL/TLS connections. + + mutual_tls_enabled (bool): Mutual SSL/TLS Connection + - This option establishes a secure connection to Redis using mutual SSL/TLS encryption. Ensure that your Redis deployment supports mutual SSL/TLS connections. + + tls_certfile (str): client certificate file for SSL/TLS + - Specifies the client certificate from the Certificate Authority. The value of the certificate is used to validate the certificate presented by the Redis instance. + + tls_keyfile (str): client private key file for SSL/TLS + - Specifies the client private key from the Certificate Authority. The value of the key is used to validate the connection in the Redis instance. + + """ + + def __init__( + self, + host=None, + port=None, + username=None, + password=None, + database="*", + ssl_enabled=False, + mutual_tls_enabled=False, + tls_certfile=None, + tls_keyfile=None, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.host = host + self.port = port + self.username = username + self.password = password + self.database = database + self.ssl_enabled = ssl_enabled + self.mutual_tls_enabled = mutual_tls_enabled + self.tls_certfile = tls_certfile + self.tls_keyfile = tls_keyfile diff --git a/package/generated/s3.py b/package/generated/s3.py new file mode 100644 index 000000000..bec9f023e --- /dev/null +++ b/package/generated/s3.py @@ -0,0 +1,63 @@ +from application.base import BaseDataSource + + +class S3DataSource(S3DataSource): + """ + S3DataSource class generated for connecting to the data source. + + Args: + + buckets (list): AWS Buckets + - AWS Buckets are ignored when Advanced Sync Rules are used. + + aws_access_key_id (str): AWS Access Key Id + + aws_secret_access_key (str): AWS Secret Key + + read_timeout (int): Read timeout + + connect_timeout (int): Connection timeout + + max_attempts (int): Maximum retry attempts + + page_size (int): Maximum size of page + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + buckets=None, + aws_access_key_id=None, + aws_secret_access_key=None, + read_timeout=None, + connect_timeout=None, + max_attempts=None, + page_size=None, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.buckets = buckets + self.aws_access_key_id = aws_access_key_id + self.aws_secret_access_key = aws_secret_access_key + self.read_timeout = read_timeout + self.connect_timeout = connect_timeout + self.max_attempts = max_attempts + self.page_size = page_size + self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/salesforce.py b/package/generated/salesforce.py new file mode 100644 index 000000000..79959004f --- /dev/null +++ b/package/generated/salesforce.py @@ -0,0 +1,54 @@ +from application.base import BaseDataSource + + +class SalesforceDataSource(SalesforceDataSource): + """ + SalesforceDataSource class generated for connecting to the data source. + + Args: + + domain (str): Domain + - The domain for your Salesforce instance. If your Salesforce URL is 'foo.my.salesforce.com', the domain would be 'foo'. + + client_id (str): Client ID + - The client id for your OAuth2-enabled connected app. Also called 'consumer key' + + client_secret (str): Client Secret + - The client secret for your OAuth2-enabled connected app. Also called 'consumer secret' + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in Salesforce are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + """ + + def __init__( + self, + domain=None, + client_id=None, + client_secret=None, + use_text_extraction_service=False, + use_document_level_security=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.domain = domain + self.client_id = client_id + self.client_secret = client_secret + self.use_text_extraction_service = use_text_extraction_service + self.use_document_level_security = use_document_level_security diff --git a/package/generated/servicenow.py b/package/generated/servicenow.py new file mode 100644 index 000000000..eab5c81fe --- /dev/null +++ b/package/generated/servicenow.py @@ -0,0 +1,64 @@ +from application.base import BaseDataSource + + +class ServiceNowDataSource(ServiceNowDataSource): + """ + ServiceNowDataSource class generated for connecting to the data source. + + Args: + + url (str): Service URL + + username (str): Username + + password (str): Password + + services (list): Comma-separated list of services + - List of services is ignored when Advanced Sync Rules are used. + + retry_count (int): Retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in ServiceNow are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + """ + + def __init__( + self, + url=None, + username=None, + password=None, + services="*", + retry_count=None, + concurrent_downloads=None, + use_text_extraction_service=False, + use_document_level_security=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.url = url + self.username = username + self.password = password + self.services = services + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads + self.use_text_extraction_service = use_text_extraction_service + self.use_document_level_security = use_document_level_security diff --git a/package/generated/sharepoint_online.py b/package/generated/sharepoint_online.py new file mode 100644 index 000000000..2f703c4c1 --- /dev/null +++ b/package/generated/sharepoint_online.py @@ -0,0 +1,90 @@ +from application.base import BaseDataSource + + +class SharepointOnlineDataSource(SharepointOnlineDataSource): + """ + SharepointOnlineDataSource class generated for connecting to the data source. + + Args: + + tenant_id (str): Tenant ID + + tenant_name (str): Tenant name + + client_id (str): Client ID + + secret_value (str): Secret value + + site_collections (list): Comma-separated list of sites + - A comma-separated list of sites to ingest data from. If enumerating all sites, use * to include all available sites, or specify a list of site names. Otherwise, specify a list of site paths. + + enumerate_all_sites (bool): Enumerate all sites? + - If enabled, sites will be fetched in bulk, then filtered down to the configured list of sites. This is efficient when syncing many sites. If disabled, each configured site will be fetched with an individual request. This is efficient when syncing fewer sites. + + fetch_subsites (bool): Fetch sub-sites of configured sites? + - Whether subsites of the configured site(s) should be automatically fetched. + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in Sharepoint Online are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + fetch_drive_item_permissions (bool): Fetch drive item permissions + - Enable this option to fetch drive item specific permissions. This setting can increase sync time. + + fetch_unique_page_permissions (bool): Fetch unique page permissions + - Enable this option to fetch unique page permissions. This setting can increase sync time. If this setting is disabled a page will inherit permissions from its parent site. + + fetch_unique_list_permissions (bool): Fetch unique list permissions + - Enable this option to fetch unique list permissions. This setting can increase sync time. If this setting is disabled a list will inherit permissions from its parent site. + + fetch_unique_list_item_permissions (bool): Fetch unique list item permissions + - Enable this option to fetch unique list item permissions. This setting can increase sync time. If this setting is disabled a list item will inherit permissions from its parent site. + + """ + + def __init__( + self, + tenant_id=None, + tenant_name=None, + client_id=None, + secret_value=None, + site_collections="*", + enumerate_all_sites=True, + fetch_subsites=True, + use_text_extraction_service=False, + use_document_level_security=False, + fetch_drive_item_permissions=True, + fetch_unique_page_permissions=True, + fetch_unique_list_permissions=True, + fetch_unique_list_item_permissions=True, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.tenant_id = tenant_id + self.tenant_name = tenant_name + self.client_id = client_id + self.secret_value = secret_value + self.site_collections = site_collections + self.enumerate_all_sites = enumerate_all_sites + self.fetch_subsites = fetch_subsites + self.use_text_extraction_service = use_text_extraction_service + self.use_document_level_security = use_document_level_security + self.fetch_drive_item_permissions = fetch_drive_item_permissions + self.fetch_unique_page_permissions = fetch_unique_page_permissions + self.fetch_unique_list_permissions = fetch_unique_list_permissions + self.fetch_unique_list_item_permissions = fetch_unique_list_item_permissions diff --git a/package/generated/sharepoint_server.py b/package/generated/sharepoint_server.py new file mode 100644 index 000000000..b89b83a37 --- /dev/null +++ b/package/generated/sharepoint_server.py @@ -0,0 +1,77 @@ +from application.base import BaseDataSource + + +class SharepointServerDataSource(SharepointServerDataSource): + """ + SharepointServerDataSource class generated for connecting to the data source. + + Args: + + username (str): SharePoint Server username + + password (str): SharePoint Server password + + host_url (str): SharePoint host + + site_collections (list): Comma-separated list of SharePoint site collections to index + + ssl_enabled (bool): Enable SSL + + ssl_ca (str): SSL certificate + + retry_count (int): Retries per request + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + use_document_level_security (bool): Enable document level security + - Document level security ensures identities and permissions set in your SharePoint Server are mirrored in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + + fetch_unique_list_permissions (bool): Fetch unique list permissions + - Enable this option to fetch unique list permissions. This setting can increase sync time. If this setting is disabled a list will inherit permissions from its parent site. + + fetch_unique_list_item_permissions (bool): Fetch unique list item permissions + - Enable this option to fetch unique list item permissions. This setting can increase sync time. If this setting is disabled a list item will inherit permissions from its parent site. + + """ + + def __init__( + self, + username=None, + password=None, + host_url=None, + site_collections=None, + ssl_enabled=False, + ssl_ca=None, + retry_count=None, + use_text_extraction_service=False, + use_document_level_security=False, + fetch_unique_list_permissions=True, + fetch_unique_list_item_permissions=True, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.username = username + self.password = password + self.host_url = host_url + self.site_collections = site_collections + self.ssl_enabled = ssl_enabled + self.ssl_ca = ssl_ca + self.retry_count = retry_count + self.use_text_extraction_service = use_text_extraction_service + self.use_document_level_security = use_document_level_security + self.fetch_unique_list_permissions = fetch_unique_list_permissions + self.fetch_unique_list_item_permissions = fetch_unique_list_item_permissions diff --git a/package/generated/slack.py b/package/generated/slack.py new file mode 100644 index 000000000..1bd6a2dfd --- /dev/null +++ b/package/generated/slack.py @@ -0,0 +1,49 @@ +from application.base import BaseDataSource + + +class SlackDataSource(SlackDataSource): + """ + SlackDataSource class generated for connecting to the data source. + + Args: + + token (str): Authentication Token + - The Slack Authentication Token for the slack application you created. See the docs for details. + + fetch_last_n_days (int): Days of message history to fetch + - How far back in time to request message history from slack. Messages older than this will not be indexed. + + auto_join_channels (bool): Automatically join channels + - The Slack application bot will only be able to read conversation history from channels it has joined. The default requires it to be manually invited to channels. Enabling this allows it to automatically invite itself into all public channels. + + sync_users (bool): Sync users + - Whether or not Slack Users should be indexed as documents in Elasticsearch. + + """ + + def __init__( + self, + token=None, + fetch_last_n_days=None, + auto_join_channels=False, + sync_users=True, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.token = token + self.fetch_last_n_days = fetch_last_n_days + self.auto_join_channels = auto_join_channels + self.sync_users = sync_users diff --git a/package/generated/zoom.py b/package/generated/zoom.py new file mode 100644 index 000000000..0817c1939 --- /dev/null +++ b/package/generated/zoom.py @@ -0,0 +1,56 @@ +from application.base import BaseDataSource + + +class ZoomDataSource(ZoomDataSource): + """ + ZoomDataSource class generated for connecting to the data source. + + Args: + + account_id (str): Account ID + + client_id (str): Client ID + + client_secret (str): Client secret + + fetch_past_meeting_details (bool): Fetch past meeting details + - Enable this option to fetch past past meeting details. This setting can increase sync time. + + recording_age (int): Recording Age Limit (Months) + - How far back in time to request recordings from zoom. Recordings older than this will not be indexed. + + use_text_extraction_service (bool): Use text extraction service + - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. + + """ + + def __init__( + self, + account_id=None, + client_id=None, + client_secret=None, + fetch_past_meeting_details=False, + recording_age=None, + use_text_extraction_service=False, + ): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]["value"] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value["value"] is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + self.account_id = account_id + self.client_id = client_id + self.client_secret = client_secret + self.fetch_past_meeting_details = fetch_past_meeting_details + self.recording_age = recording_age + self.use_text_extraction_service = use_text_extraction_service diff --git a/requirements/package-dev.txt b/requirements/package-dev.txt new file mode 100644 index 000000000..97231eaa3 --- /dev/null +++ b/requirements/package-dev.txt @@ -0,0 +1,2 @@ +Jinja2==3.1.4 +black==24.4.2 diff --git a/scripts/codegen/generate_wrappers.py b/scripts/codegen/generate_wrappers.py new file mode 100644 index 000000000..efc338e82 --- /dev/null +++ b/scripts/codegen/generate_wrappers.py @@ -0,0 +1,51 @@ +import os +import importlib +from jinja2 import Environment, FileSystemLoader +from black import format_file_in_place, FileMode, WriteBack +from pathlib import Path + +from connectors.config import _default_config + + +def generate_wrapper_class_code(template_env, data_source_class, class_name): + config = data_source_class.get_default_configuration() + base_class_name = data_source_class.__name__ + + params = [(key, value.get("value", None)) for key, value in config.items()] + + template = template_env.get_template("datasource_wrapper.jinja2") + class_code = template.render( + class_name=class_name, + base_class_name=base_class_name, + params=params, + config=config, + ) + return class_code + + +def write_class_to_file(class_code, class_name, output_dir): + file_path = os.path.join(output_dir, f"{class_name.lower()}.py") + with open(file_path, "w") as file: + file.write(class_code) + format_file_in_place(Path(file_path), fast=False, mode=FileMode(), write_back=WriteBack.YES) + + +def generate_and_write_wrapper_classes(sources, output_dir): + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + env = Environment(loader=FileSystemLoader("scripts/codegen/templates")) + + for key, value in sources.items(): + module_name, class_name = value.split(":") + module = importlib.import_module(module_name) + data_source_class = getattr(module, class_name) + class_code = generate_wrapper_class_code(env, data_source_class, class_name) + write_class_to_file(class_code, key, output_dir) + + +# Example usage +connectors_config = _default_config() +data_source_classes = connectors_config["sources"] +output_dir = os.path.join("package", "generated") +generate_and_write_wrapper_classes(data_source_classes, output_dir) diff --git a/scripts/codegen/templates/datasource_wrapper.jinja2 b/scripts/codegen/templates/datasource_wrapper.jinja2 new file mode 100644 index 000000000..b63ff727a --- /dev/null +++ b/scripts/codegen/templates/datasource_wrapper.jinja2 @@ -0,0 +1,30 @@ +from application.base import BaseDataSource + +class {{ class_name }}({{ base_class_name }}): + """ + {{ class_name }} class generated for connecting to the data source. + + Args: + {% for param, value in params %} + {{ param }} ({{ config[param]['type'] }}): {{ config[param].label }}{% if config[param].tooltip %} + - {{ config[param].tooltip }}{% endif %} + {% endfor %} + """ + def __init__(self, {% for param, value in params %}{{ param }}={% if value is none %}None{% elif value is boolean %}{{ value }}{% else %}'{{ value }}'{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}): + configuration = self.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args[key] is not None: + configuration[key]['value'] = args[key] + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in configuration.items(): + if value['value'] is None and value.get('required', True): + raise ValueError(f"Missing required configuration field: {key}") + + super().__init__(configuration) + + {% for key in config.keys() %}self.{{ key }} = {{ key }} + {% endfor %} From 4395ad0843eee096f7d29329476bf7ab31d06291 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Mon, 8 Jul 2024 17:23:46 +0200 Subject: [PATCH 2/8] Progress with exposing connectors in package --- Makefile | 5 +- package/connectors/__init__.py | 0 package/connectors/connector_base.py | 49 ++++++++++++ package/connectors/generated/__init__.py | 0 .../generated/azure_blob_storage.py | 63 +++++++++++++++ package/{ => connectors}/generated/box.py | 30 +++++--- .../{ => connectors}/generated/confluence.py | 42 +++++----- package/connectors/generated/dir.py | 43 +++++++++++ package/connectors/generated/dropbox.py | 67 ++++++++++++++++ package/{ => connectors}/generated/github.py | 40 +++++----- package/connectors/generated/gmail.py | 56 ++++++++++++++ .../generated/google_cloud_storage.py | 43 +++++++++++ .../generated/google_drive.py | 40 +++++----- package/{ => connectors}/generated/graphql.py | 30 +++++--- package/{ => connectors}/generated/jira.py | 42 +++++----- .../connectors/generated/microsoft_teams.py | 59 ++++++++++++++ package/{ => connectors}/generated/mongodb.py | 28 ++++--- package/{ => connectors}/generated/mssql.py | 32 +++++--- package/{ => connectors}/generated/mysql.py | 32 +++++--- package/connectors/generated/network_drive.py | 65 ++++++++++++++++ package/{ => connectors}/generated/notion.py | 30 +++++--- package/connectors/generated/onedrive.py | 57 ++++++++++++++ package/{ => connectors}/generated/oracle.py | 34 ++++---- package/{ => connectors}/generated/outlook.py | 38 +++++---- .../{ => connectors}/generated/postgresql.py | 32 +++++--- package/{ => connectors}/generated/redis.py | 28 ++++--- package/{ => connectors}/generated/s3.py | 41 +++++----- package/connectors/generated/salesforce.py | 47 +++++++++++ package/connectors/generated/servicenow.py | 62 +++++++++++++++ .../generated/sharepoint_online.py | 40 +++++----- .../generated/sharepoint_server.py | 42 +++++----- package/{ => connectors}/generated/slack.py | 28 ++++--- package/{ => connectors}/generated/zoom.py | 33 ++++---- package/generated/azure_blob_storage.py | 58 -------------- package/generated/dir.py | 35 --------- package/generated/dropbox.py | 69 ----------------- package/generated/gmail.py | 53 ------------- package/generated/google_cloud_storage.py | 42 ---------- package/generated/googledrivedatasource.py | 73 ------------------ package/generated/microsoft_teams.py | 49 ------------ package/generated/network_drive.py | 67 ---------------- package/generated/onedrive.py | 59 -------------- package/generated/salesforce.py | 54 ------------- package/generated/servicenow.py | 64 --------------- scripts/codegen/generate_connectors.py | 77 +++++++++++++++++++ scripts/codegen/generate_connectors_init.py | 29 +++++++ scripts/codegen/generate_wrappers.py | 51 ------------ .../templates/connector_template.jinja2 | 38 +++++++++ .../templates/datasource_wrapper.jinja2 | 30 -------- .../codegen/templates/init_template.jinja2 | 11 +++ 50 files changed, 1138 insertions(+), 999 deletions(-) create mode 100644 package/connectors/__init__.py create mode 100644 package/connectors/connector_base.py create mode 100644 package/connectors/generated/__init__.py create mode 100644 package/connectors/generated/azure_blob_storage.py rename package/{ => connectors}/generated/box.py (51%) rename package/{ => connectors}/generated/confluence.py (58%) create mode 100644 package/connectors/generated/dir.py create mode 100644 package/connectors/generated/dropbox.py rename package/{ => connectors}/generated/github.py (52%) create mode 100644 package/connectors/generated/gmail.py create mode 100644 package/connectors/generated/google_cloud_storage.py rename package/{ => connectors}/generated/google_drive.py (69%) rename package/{ => connectors}/generated/graphql.py (75%) rename package/{ => connectors}/generated/jira.py (55%) create mode 100644 package/connectors/generated/microsoft_teams.py rename package/{ => connectors}/generated/mongodb.py (66%) rename package/{ => connectors}/generated/mssql.py (60%) rename package/{ => connectors}/generated/mysql.py (54%) create mode 100644 package/connectors/generated/network_drive.py rename package/{ => connectors}/generated/notion.py (50%) create mode 100644 package/connectors/generated/onedrive.py rename package/{ => connectors}/generated/oracle.py (62%) rename package/{ => connectors}/generated/outlook.py (53%) rename package/{ => connectors}/generated/postgresql.py (58%) rename package/{ => connectors}/generated/redis.py (68%) rename package/{ => connectors}/generated/s3.py (50%) create mode 100644 package/connectors/generated/salesforce.py create mode 100644 package/connectors/generated/servicenow.py rename package/{ => connectors}/generated/sharepoint_online.py (68%) rename package/{ => connectors}/generated/sharepoint_server.py (55%) rename package/{ => connectors}/generated/slack.py (60%) rename package/{ => connectors}/generated/zoom.py (54%) delete mode 100644 package/generated/azure_blob_storage.py delete mode 100644 package/generated/dir.py delete mode 100644 package/generated/dropbox.py delete mode 100644 package/generated/gmail.py delete mode 100644 package/generated/google_cloud_storage.py delete mode 100644 package/generated/googledrivedatasource.py delete mode 100644 package/generated/microsoft_teams.py delete mode 100644 package/generated/network_drive.py delete mode 100644 package/generated/onedrive.py delete mode 100644 package/generated/salesforce.py delete mode 100644 package/generated/servicenow.py create mode 100644 scripts/codegen/generate_connectors.py create mode 100644 scripts/codegen/generate_connectors_init.py delete mode 100644 scripts/codegen/generate_wrappers.py create mode 100644 scripts/codegen/templates/connector_template.jinja2 delete mode 100644 scripts/codegen/templates/datasource_wrapper.jinja2 create mode 100644 scripts/codegen/templates/init_template.jinja2 diff --git a/Makefile b/Makefile index 7108a3ac7..5a70d8a41 100644 --- a/Makefile +++ b/Makefile @@ -89,5 +89,6 @@ bin/package-dev: requirements/package-dev.txt bin/pip install -r requirements/$(ARCH).txt bin/pip install -r requirements/package-dev.txt -generate_wrappers: bin/package-dev - bin/python scripts/codegen/generate_wrappers.py +generate_connector_package: bin/package-dev + bin/python scripts/codegen/generate_connectors.py + bin/python scripts/codegen/generate_connectors_init.py diff --git a/package/connectors/__init__.py b/package/connectors/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/package/connectors/connector_base.py b/package/connectors/connector_base.py new file mode 100644 index 000000000..ea320fa79 --- /dev/null +++ b/package/connectors/connector_base.py @@ -0,0 +1,49 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +import asyncio +from typing import AsyncIterator, Iterator + + +class ConnectorBase: + def __init__(self, data_provider): + + # Check if all fields marked as 'required' in config are present with values, if not raise an exception + for key, value in data_provider.configuration.items(): + if value.get("value") is None and value.get("required", True): + raise ValueError(f"Missing required configuration field: {key}") + + self.data_provider = data_provider + + def get_configuration(self): + return self.data_provider.configuration + + def lazy_load(self) -> Iterator[dict]: + async_gen = self.alazy_load() + loop = asyncio.get_event_loop() + + try: + while True: + item = loop.run_until_complete(self._next_item(async_gen)) + if item is None: + break + yield item + except StopAsyncIteration: + return + + async def _next_item(self, async_gen): + try: + return await async_gen.__anext__() + except StopAsyncIteration: + return None + + async def alazy_load( + self, + ) -> AsyncIterator[dict]: + async for doc, lazy_download in self.data_provider.get_docs(filtering=None): + # TODO: not all sources have timestamp field and support downloads + # data = await lazy_download(doit=True, timestamp=doc[TIMESTAMP_FIELD]) + # doc.update(data) + yield doc diff --git a/package/connectors/generated/__init__.py b/package/connectors/generated/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/package/connectors/generated/azure_blob_storage.py b/package/connectors/generated/azure_blob_storage.py new file mode 100644 index 000000000..dae06913a --- /dev/null +++ b/package/connectors/generated/azure_blob_storage.py @@ -0,0 +1,63 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.azure_blob_storage import AzureBlobStorageDataSource +from package.connectors.connector_base import ConnectorBase + + +class AzureBlobStorageConnector(ConnectorBase): + """ + AzureBlobStorageConnector class generated for connecting to the data source. + + Args: + + account_name (str): Azure Blob Storage account name + + account_key (str): Azure Blob Storage account key + + blob_endpoint (str): Azure Blob Storage blob endpoint + + containers (list): Azure Blob Storage containers + + retry_count (int): Retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + """ + + def __init__( + self, + account_name=None, + account_key=None, + blob_endpoint=None, + containers=None, + retry_count=3, + concurrent_downloads=100, + ): + + configuration = AzureBlobStorageDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__( + data_provider=AzureBlobStorageDataSource(connector_configuration) + ) + + self.account_name = account_name + self.account_key = account_key + self.blob_endpoint = blob_endpoint + self.containers = containers + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads diff --git a/package/generated/box.py b/package/connectors/generated/box.py similarity index 51% rename from package/generated/box.py rename to package/connectors/generated/box.py index 69371f074..f3b420991 100644 --- a/package/generated/box.py +++ b/package/connectors/generated/box.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.box import BoxDataSource +from package.connectors.connector_base import ConnectorBase -class BoxDataSource(BoxDataSource): + +class BoxConnector(ConnectorBase): """ - BoxDataSource class generated for connecting to the data source. + BoxConnector class generated for connecting to the data source. Args: @@ -28,22 +38,20 @@ def __init__( client_secret=None, refresh_token=None, enterprise_id=None, - concurrent_downloads=None, + concurrent_downloads=15, ): - configuration = self.get_default_configuration() + + configuration = BoxDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=BoxDataSource(connector_configuration)) self.is_enterprise = is_enterprise self.client_id = client_id diff --git a/package/generated/confluence.py b/package/connectors/generated/confluence.py similarity index 58% rename from package/generated/confluence.py rename to package/connectors/generated/confluence.py index e0f27e394..0810c24db 100644 --- a/package/generated/confluence.py +++ b/package/connectors/generated/confluence.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.confluence import ConfluenceDataSource +from package.connectors.connector_base import ConnectorBase -class ConfluenceDataSource(ConfluenceDataSource): + +class ConfluenceConnector(ConnectorBase): """ - ConfluenceDataSource class generated for connecting to the data source. + ConfluenceConnector class generated for connecting to the data source. Args: @@ -37,12 +47,6 @@ class ConfluenceDataSource(ConfluenceDataSource): concurrent_downloads (int): Maximum concurrent downloads - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in confluence are maintained in Elasticsearch. This enables you to restrict and personalize read-access users have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - """ def __init__( @@ -59,25 +63,21 @@ def __init__( index_labels=False, ssl_enabled=False, ssl_ca=None, - retry_count=None, - concurrent_downloads=None, - use_document_level_security=False, - use_text_extraction_service=False, + retry_count=3, + concurrent_downloads=50, ): - configuration = self.get_default_configuration() + + configuration = ConfluenceDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=ConfluenceDataSource(connector_configuration)) self.data_source = data_source self.username = username @@ -93,5 +93,3 @@ def __init__( self.ssl_ca = ssl_ca self.retry_count = retry_count self.concurrent_downloads = concurrent_downloads - self.use_document_level_security = use_document_level_security - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/connectors/generated/dir.py b/package/connectors/generated/dir.py new file mode 100644 index 000000000..bffb760f8 --- /dev/null +++ b/package/connectors/generated/dir.py @@ -0,0 +1,43 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.directory import DirectoryDataSource +from package.connectors.connector_base import ConnectorBase + + +class DirectoryConnector(ConnectorBase): + """ + DirectoryConnector class generated for connecting to the data source. + + Args: + + directory (str): Directory path + + pattern (str): File glob-like pattern + + """ + + def __init__( + self, directory="/Users/jedr/connectors/connectors/sources", pattern="**/*.*" + ): + + configuration = DirectoryDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__(data_provider=DirectoryDataSource(connector_configuration)) + + self.directory = directory + self.pattern = pattern diff --git a/package/connectors/generated/dropbox.py b/package/connectors/generated/dropbox.py new file mode 100644 index 000000000..48f38d2a4 --- /dev/null +++ b/package/connectors/generated/dropbox.py @@ -0,0 +1,67 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.dropbox import DropboxDataSource +from package.connectors.connector_base import ConnectorBase + + +class DropboxConnector(ConnectorBase): + """ + DropboxConnector class generated for connecting to the data source. + + Args: + + path (str): Path to fetch files/folders + - Path is ignored when Advanced Sync Rules are used. + + app_key (str): App Key + + app_secret (str): App secret + + refresh_token (str): Refresh token + + retry_count (int): Retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + include_inherited_users_and_groups (bool): Include groups and inherited users + - Include groups and inherited users when indexing permissions. Enabling this configurable field will cause a significant performance degradation. + + """ + + def __init__( + self, + path=None, + app_key=None, + app_secret=None, + refresh_token=None, + retry_count=3, + concurrent_downloads=100, + include_inherited_users_and_groups=False, + ): + + configuration = DropboxDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__(data_provider=DropboxDataSource(connector_configuration)) + + self.path = path + self.app_key = app_key + self.app_secret = app_secret + self.refresh_token = refresh_token + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads + self.include_inherited_users_and_groups = include_inherited_users_and_groups diff --git a/package/generated/github.py b/package/connectors/generated/github.py similarity index 52% rename from package/generated/github.py rename to package/connectors/generated/github.py index 601219390..a8dc61315 100644 --- a/package/generated/github.py +++ b/package/connectors/generated/github.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.github import GitHubDataSource +from package.connectors.connector_base import ConnectorBase -class GitHubDataSource(GitHubDataSource): + +class GitHubConnector(ConnectorBase): """ - GitHubDataSource class generated for connecting to the data source. + GitHubConnector class generated for connecting to the data source. Args: @@ -33,12 +43,6 @@ class GitHubDataSource(GitHubDataSource): retry_count (int): Maximum retries per request - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in GitHub are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - """ def __init__( @@ -54,24 +58,20 @@ def __init__( repositories=None, ssl_enabled=False, ssl_ca=None, - retry_count="3", - use_text_extraction_service=False, - use_document_level_security=False, + retry_count=3, ): - configuration = self.get_default_configuration() + + configuration = GitHubDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=GitHubDataSource(connector_configuration)) self.data_source = data_source self.host = host @@ -85,5 +85,3 @@ def __init__( self.ssl_enabled = ssl_enabled self.ssl_ca = ssl_ca self.retry_count = retry_count - self.use_text_extraction_service = use_text_extraction_service - self.use_document_level_security = use_document_level_security diff --git a/package/connectors/generated/gmail.py b/package/connectors/generated/gmail.py new file mode 100644 index 000000000..384f669f3 --- /dev/null +++ b/package/connectors/generated/gmail.py @@ -0,0 +1,56 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.gmail import GMailDataSource +from package.connectors.connector_base import ConnectorBase + + +class GMailConnector(ConnectorBase): + """ + GMailConnector class generated for connecting to the data source. + + Args: + + service_account_credentials (str): GMail service account JSON + + subject (str): Google Workspace admin email + - Admin account email address + + customer_id (str): Google customer id + - Google admin console -> Account -> Settings -> Customer Id + + include_spam_and_trash (bool): Include spam and trash emails + - Will include spam and trash emails, when set to true. + + """ + + def __init__( + self, + service_account_credentials=None, + subject=None, + customer_id=None, + include_spam_and_trash=False, + ): + + configuration = GMailDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__(data_provider=GMailDataSource(connector_configuration)) + + self.service_account_credentials = service_account_credentials + self.subject = subject + self.customer_id = customer_id + self.include_spam_and_trash = include_spam_and_trash diff --git a/package/connectors/generated/google_cloud_storage.py b/package/connectors/generated/google_cloud_storage.py new file mode 100644 index 000000000..b3014d3b3 --- /dev/null +++ b/package/connectors/generated/google_cloud_storage.py @@ -0,0 +1,43 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.google_cloud_storage import GoogleCloudStorageDataSource +from package.connectors.connector_base import ConnectorBase + + +class GoogleCloudStorageConnector(ConnectorBase): + """ + GoogleCloudStorageConnector class generated for connecting to the data source. + + Args: + + buckets (list): Google Cloud Storage buckets + + service_account_credentials (str): Google Cloud service account JSON + + """ + + def __init__(self, buckets=None, service_account_credentials=None): + + configuration = GoogleCloudStorageDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__( + data_provider=GoogleCloudStorageDataSource(connector_configuration) + ) + + self.buckets = buckets + self.service_account_credentials = service_account_credentials diff --git a/package/generated/google_drive.py b/package/connectors/generated/google_drive.py similarity index 69% rename from package/generated/google_drive.py rename to package/connectors/generated/google_drive.py index f94301281..72cca2689 100644 --- a/package/generated/google_drive.py +++ b/package/connectors/generated/google_drive.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.google_drive import GoogleDriveDataSource +from package.connectors.connector_base import ConnectorBase -class GoogleDriveDataSource(GoogleDriveDataSource): + +class GoogleDriveConnector(ConnectorBase): """ - GoogleDriveDataSource class generated for connecting to the data source. + GoogleDriveConnector class generated for connecting to the data source. Args: @@ -19,18 +29,12 @@ class GoogleDriveDataSource(GoogleDriveDataSource): google_workspace_email_for_shared_drives_sync (str): Google Workspace email for syncing shared drives - Provide the Google Workspace user email for discovery and syncing of shared drives. Only the shared drives this user has access to will be synced. - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in Google Drive are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - google_workspace_admin_email (str): Google Workspace admin email - In order to use Document Level Security you need to enable Google Workspace domain-wide delegation of authority for your service account. A service account with delegated authority can impersonate admin user with sufficient permissions to fetch all users and their corresponding permissions. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. max_concurrency (int): Maximum concurrent HTTP requests - This setting determines the maximum number of concurrent HTTP requests sent to the Google API to fetch data. Increasing this value can improve data retrieval speed, but it may also place higher demands on system resources and network bandwidth. - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - """ def __init__( @@ -39,25 +43,21 @@ def __init__( use_domain_wide_delegation_for_sync=False, google_workspace_admin_email_for_data_sync=None, google_workspace_email_for_shared_drives_sync=None, - use_document_level_security=False, google_workspace_admin_email=None, - max_concurrency=None, - use_text_extraction_service=False, + max_concurrency=25, ): - configuration = self.get_default_configuration() + + configuration = GoogleDriveDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=GoogleDriveDataSource(connector_configuration)) self.service_account_credentials = service_account_credentials self.use_domain_wide_delegation_for_sync = use_domain_wide_delegation_for_sync @@ -67,7 +67,5 @@ def __init__( self.google_workspace_email_for_shared_drives_sync = ( google_workspace_email_for_shared_drives_sync ) - self.use_document_level_security = use_document_level_security self.google_workspace_admin_email = google_workspace_admin_email self.max_concurrency = max_concurrency - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/graphql.py b/package/connectors/generated/graphql.py similarity index 75% rename from package/generated/graphql.py rename to package/connectors/generated/graphql.py index a56d64849..f346e60e1 100644 --- a/package/generated/graphql.py +++ b/package/connectors/generated/graphql.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.graphql import GraphQLDataSource +from package.connectors.connector_base import ConnectorBase -class GraphQLDataSource(GraphQLDataSource): + +class GraphQLConnector(ConnectorBase): """ - GraphQLDataSource class generated for connecting to the data source. + GraphQLConnector class generated for connecting to the data source. Args: @@ -52,22 +62,20 @@ def __init__( headers=None, pagination_model="no_pagination", pagination_key=None, - connection_timeout=None, + connection_timeout=300, ): - configuration = self.get_default_configuration() + + configuration = GraphQLDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=GraphQLDataSource(connector_configuration)) self.http_endpoint = http_endpoint self.http_method = http_method diff --git a/package/generated/jira.py b/package/connectors/generated/jira.py similarity index 55% rename from package/generated/jira.py rename to package/connectors/generated/jira.py index fca5b788d..44c17aaac 100644 --- a/package/generated/jira.py +++ b/package/connectors/generated/jira.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.jira import JiraDataSource +from package.connectors.connector_base import ConnectorBase -class JiraDataSource(JiraDataSource): + +class JiraConnector(ConnectorBase): """ - JiraDataSource class generated for connecting to the data source. + JiraConnector class generated for connecting to the data source. Args: @@ -35,12 +45,6 @@ class JiraDataSource(JiraDataSource): concurrent_downloads (int): Maximum concurrent downloads - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in Jira are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. Only 1000 users can be fetched for Jira Data Center. - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - """ def __init__( @@ -56,25 +60,21 @@ def __init__( projects=None, ssl_enabled=False, ssl_ca=None, - retry_count=None, - concurrent_downloads=None, - use_document_level_security=False, - use_text_extraction_service=False, + retry_count=3, + concurrent_downloads=100, ): - configuration = self.get_default_configuration() + + configuration = JiraDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=JiraDataSource(connector_configuration)) self.data_source = data_source self.username = username @@ -89,5 +89,3 @@ def __init__( self.ssl_ca = ssl_ca self.retry_count = retry_count self.concurrent_downloads = concurrent_downloads - self.use_document_level_security = use_document_level_security - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/connectors/generated/microsoft_teams.py b/package/connectors/generated/microsoft_teams.py new file mode 100644 index 000000000..da81fbc83 --- /dev/null +++ b/package/connectors/generated/microsoft_teams.py @@ -0,0 +1,59 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.microsoft_teams import MicrosoftTeamsDataSource +from package.connectors.connector_base import ConnectorBase + + +class MicrosoftTeamsConnector(ConnectorBase): + """ + MicrosoftTeamsConnector class generated for connecting to the data source. + + Args: + + tenant_id (str): Tenant ID + + client_id (str): Client ID + + secret_value (str): Secret value + + username (str): Username + + password (str): Password + + """ + + def __init__( + self, + tenant_id=None, + client_id=None, + secret_value=None, + username=None, + password=None, + ): + + configuration = MicrosoftTeamsDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__( + data_provider=MicrosoftTeamsDataSource(connector_configuration) + ) + + self.tenant_id = tenant_id + self.client_id = client_id + self.secret_value = secret_value + self.username = username + self.password = password diff --git a/package/generated/mongodb.py b/package/connectors/generated/mongodb.py similarity index 66% rename from package/generated/mongodb.py rename to package/connectors/generated/mongodb.py index af5796799..d62cf2b74 100644 --- a/package/generated/mongodb.py +++ b/package/connectors/generated/mongodb.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.mongo import MongoDataSource +from package.connectors.connector_base import ConnectorBase -class MongoDataSource(MongoDataSource): + +class MongoConnector(ConnectorBase): """ - MongoDataSource class generated for connecting to the data source. + MongoConnector class generated for connecting to the data source. Args: @@ -42,20 +52,18 @@ def __init__( ssl_ca=None, tls_insecure=False, ): - configuration = self.get_default_configuration() + + configuration = MongoDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=MongoDataSource(connector_configuration)) self.host = host self.user = user diff --git a/package/generated/mssql.py b/package/connectors/generated/mssql.py similarity index 60% rename from package/generated/mssql.py rename to package/connectors/generated/mssql.py index 371d9a272..5374a1e86 100644 --- a/package/generated/mssql.py +++ b/package/connectors/generated/mssql.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.mssql import MSSQLDataSource +from package.connectors.connector_base import ConnectorBase -class MSSQLDataSource(MSSQLDataSource): + +class MSSQLConnector(ConnectorBase): """ - MSSQLDataSource class generated for connecting to the data source. + MSSQLConnector class generated for connecting to the data source. Args: @@ -42,27 +52,25 @@ def __init__( password=None, database=None, tables="*", - fetch_size=None, - retry_count=None, + fetch_size=50, + retry_count=3, schema=None, ssl_enabled=False, ssl_ca=None, validate_host=False, ): - configuration = self.get_default_configuration() + + configuration = MSSQLDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=MSSQLDataSource(connector_configuration)) self.host = host self.port = port diff --git a/package/generated/mysql.py b/package/connectors/generated/mysql.py similarity index 54% rename from package/generated/mysql.py rename to package/connectors/generated/mysql.py index b50f96c15..5552f6407 100644 --- a/package/generated/mysql.py +++ b/package/connectors/generated/mysql.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.mysql import MySqlDataSource +from package.connectors.connector_base import ConnectorBase -class MySqlDataSource(MySqlDataSource): + +class MySqlConnector(ConnectorBase): """ - MySqlDataSource class generated for connecting to the data source. + MySqlConnector class generated for connecting to the data source. Args: @@ -39,23 +49,21 @@ def __init__( tables="*", ssl_enabled=False, ssl_ca=None, - fetch_size=None, - retry_count=None, + fetch_size=50, + retry_count=3, ): - configuration = self.get_default_configuration() + + configuration = MySqlDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=MySqlDataSource(connector_configuration)) self.host = host self.port = port diff --git a/package/connectors/generated/network_drive.py b/package/connectors/generated/network_drive.py new file mode 100644 index 000000000..37f3b97e3 --- /dev/null +++ b/package/connectors/generated/network_drive.py @@ -0,0 +1,65 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.network_drive import NASDataSource +from package.connectors.connector_base import ConnectorBase + + +class NASConnector(ConnectorBase): + """ + NASConnector class generated for connecting to the data source. + + Args: + + username (str): Username + + password (str): Password + + server_ip (str): SMB IP + + server_port (int): SMB port + + drive_path (str): SMB path + + drive_type (str): Drive type + + identity_mappings (str): Path of CSV file containing users and groups SID (For Linux Network Drive) + + """ + + def __init__( + self, + username=None, + password=None, + server_ip=None, + server_port=None, + drive_path=None, + drive_type="windows", + identity_mappings=None, + ): + + configuration = NASDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__(data_provider=NASDataSource(connector_configuration)) + + self.username = username + self.password = password + self.server_ip = server_ip + self.server_port = server_port + self.drive_path = drive_path + self.drive_type = drive_type + self.identity_mappings = identity_mappings diff --git a/package/generated/notion.py b/package/connectors/generated/notion.py similarity index 50% rename from package/generated/notion.py rename to package/connectors/generated/notion.py index 137d3445d..e477e6596 100644 --- a/package/generated/notion.py +++ b/package/connectors/generated/notion.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.notion import NotionDataSource +from package.connectors.connector_base import ConnectorBase -class NotionDataSource(NotionDataSource): + +class NotionConnector(ConnectorBase): """ - NotionDataSource class generated for connecting to the data source. + NotionConnector class generated for connecting to the data source. Args: @@ -26,22 +36,20 @@ def __init__( databases=None, pages=None, index_comments=False, - concurrent_downloads=None, + concurrent_downloads=30, ): - configuration = self.get_default_configuration() + + configuration = NotionDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=NotionDataSource(connector_configuration)) self.notion_secret_key = notion_secret_key self.databases = databases diff --git a/package/connectors/generated/onedrive.py b/package/connectors/generated/onedrive.py new file mode 100644 index 000000000..3ef95dc94 --- /dev/null +++ b/package/connectors/generated/onedrive.py @@ -0,0 +1,57 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.onedrive import OneDriveDataSource +from package.connectors.connector_base import ConnectorBase + + +class OneDriveConnector(ConnectorBase): + """ + OneDriveConnector class generated for connecting to the data source. + + Args: + + client_id (str): Azure application Client ID + + client_secret (str): Azure application Client Secret + + tenant_id (str): Azure application Tenant ID + + retry_count (int): Maximum retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + """ + + def __init__( + self, + client_id=None, + client_secret=None, + tenant_id=None, + retry_count=3, + concurrent_downloads=15, + ): + + configuration = OneDriveDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__(data_provider=OneDriveDataSource(connector_configuration)) + + self.client_id = client_id + self.client_secret = client_secret + self.tenant_id = tenant_id + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads diff --git a/package/generated/oracle.py b/package/connectors/generated/oracle.py similarity index 62% rename from package/generated/oracle.py rename to package/connectors/generated/oracle.py index fb5c8ae62..f6b5e8553 100644 --- a/package/generated/oracle.py +++ b/package/connectors/generated/oracle.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.oracle import OracleDataSource +from package.connectors.connector_base import ConnectorBase -class OracleDataSource(OracleDataSource): + +class OracleConnector(ConnectorBase): """ - OracleDataSource class generated for connecting to the data source. + OracleConnector class generated for connecting to the data source. Args: @@ -46,26 +56,24 @@ def __init__( sid=None, service_name=None, tables="*", - fetch_size=None, - retry_count=None, + fetch_size=50, + retry_count=3, oracle_protocol="TCP", oracle_home="", - wallet_configuration_path=None, + wallet_configuration_path="", ): - configuration = self.get_default_configuration() + + configuration = OracleDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=OracleDataSource(connector_configuration)) self.host = host self.port = port diff --git a/package/generated/outlook.py b/package/connectors/generated/outlook.py similarity index 53% rename from package/generated/outlook.py rename to package/connectors/generated/outlook.py index 2e55e62e4..ff9fc09bd 100644 --- a/package/generated/outlook.py +++ b/package/connectors/generated/outlook.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.outlook import OutlookDataSource +from package.connectors.connector_base import ConnectorBase -class OutlookDataSource(OutlookDataSource): + +class OutlookConnector(ConnectorBase): """ - OutlookDataSource class generated for connecting to the data source. + OutlookConnector class generated for connecting to the data source. Args: @@ -32,12 +42,6 @@ class OutlookDataSource(OutlookDataSource): ssl_ca (str): SSL certificate - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in Outlook are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - """ def __init__( @@ -53,23 +57,19 @@ def __init__( domain=None, ssl_enabled=False, ssl_ca=None, - use_text_extraction_service=False, - use_document_level_security=False, ): - configuration = self.get_default_configuration() + + configuration = OutlookDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=OutlookDataSource(connector_configuration)) self.data_source = data_source self.tenant_id = tenant_id @@ -82,5 +82,3 @@ def __init__( self.domain = domain self.ssl_enabled = ssl_enabled self.ssl_ca = ssl_ca - self.use_text_extraction_service = use_text_extraction_service - self.use_document_level_security = use_document_level_security diff --git a/package/generated/postgresql.py b/package/connectors/generated/postgresql.py similarity index 58% rename from package/generated/postgresql.py rename to package/connectors/generated/postgresql.py index 1bd442e98..e080a6b21 100644 --- a/package/generated/postgresql.py +++ b/package/connectors/generated/postgresql.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.postgresql import PostgreSQLDataSource +from package.connectors.connector_base import ConnectorBase -class PostgreSQLDataSource(PostgreSQLDataSource): + +class PostgreSQLConnector(ConnectorBase): """ - PostgreSQLDataSource class generated for connecting to the data source. + PostgreSQLConnector class generated for connecting to the data source. Args: @@ -41,25 +51,23 @@ def __init__( database=None, schema=None, tables="*", - fetch_size=None, - retry_count=None, + fetch_size=50, + retry_count=3, ssl_enabled=False, ssl_ca=None, ): - configuration = self.get_default_configuration() + + configuration = PostgreSQLDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=PostgreSQLDataSource(connector_configuration)) self.host = host self.port = port diff --git a/package/generated/redis.py b/package/connectors/generated/redis.py similarity index 68% rename from package/generated/redis.py rename to package/connectors/generated/redis.py index e42d3393a..c34337dc9 100644 --- a/package/generated/redis.py +++ b/package/connectors/generated/redis.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.redis import RedisDataSource +from package.connectors.connector_base import ConnectorBase -class RedisDataSource(RedisDataSource): + +class RedisConnector(ConnectorBase): """ - RedisDataSource class generated for connecting to the data source. + RedisConnector class generated for connecting to the data source. Args: @@ -44,20 +54,18 @@ def __init__( tls_certfile=None, tls_keyfile=None, ): - configuration = self.get_default_configuration() + + configuration = RedisDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=RedisDataSource(connector_configuration)) self.host = host self.port = port diff --git a/package/generated/s3.py b/package/connectors/generated/s3.py similarity index 50% rename from package/generated/s3.py rename to package/connectors/generated/s3.py index bec9f023e..f6cabcace 100644 --- a/package/generated/s3.py +++ b/package/connectors/generated/s3.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.s3 import S3DataSource +from package.connectors.connector_base import ConnectorBase -class S3DataSource(S3DataSource): + +class S3Connector(ConnectorBase): """ - S3DataSource class generated for connecting to the data source. + S3Connector class generated for connecting to the data source. Args: @@ -22,9 +32,6 @@ class S3DataSource(S3DataSource): page_size (int): Maximum size of page - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - """ def __init__( @@ -32,26 +39,23 @@ def __init__( buckets=None, aws_access_key_id=None, aws_secret_access_key=None, - read_timeout=None, - connect_timeout=None, - max_attempts=None, - page_size=None, - use_text_extraction_service=False, + read_timeout=90, + connect_timeout=90, + max_attempts=5, + page_size=100, ): - configuration = self.get_default_configuration() + + configuration = S3DataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=S3DataSource(connector_configuration)) self.buckets = buckets self.aws_access_key_id = aws_access_key_id @@ -60,4 +64,3 @@ def __init__( self.connect_timeout = connect_timeout self.max_attempts = max_attempts self.page_size = page_size - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/connectors/generated/salesforce.py b/package/connectors/generated/salesforce.py new file mode 100644 index 000000000..5bd71e8b9 --- /dev/null +++ b/package/connectors/generated/salesforce.py @@ -0,0 +1,47 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.salesforce import SalesforceDataSource +from package.connectors.connector_base import ConnectorBase + + +class SalesforceConnector(ConnectorBase): + """ + SalesforceConnector class generated for connecting to the data source. + + Args: + + domain (str): Domain + - The domain for your Salesforce instance. If your Salesforce URL is 'foo.my.salesforce.com', the domain would be 'foo'. + + client_id (str): Client ID + - The client id for your OAuth2-enabled connected app. Also called 'consumer key' + + client_secret (str): Client Secret + - The client secret for your OAuth2-enabled connected app. Also called 'consumer secret' + + """ + + def __init__(self, domain=None, client_id=None, client_secret=None): + + configuration = SalesforceDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__(data_provider=SalesforceDataSource(connector_configuration)) + + self.domain = domain + self.client_id = client_id + self.client_secret = client_secret diff --git a/package/connectors/generated/servicenow.py b/package/connectors/generated/servicenow.py new file mode 100644 index 000000000..7078e15d6 --- /dev/null +++ b/package/connectors/generated/servicenow.py @@ -0,0 +1,62 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from connectors.sources.servicenow import ServiceNowDataSource +from package.connectors.connector_base import ConnectorBase + + +class ServiceNowConnector(ConnectorBase): + """ + ServiceNowConnector class generated for connecting to the data source. + + Args: + + url (str): Service URL + + username (str): Username + + password (str): Password + + services (list): Comma-separated list of services + - List of services is ignored when Advanced Sync Rules are used. + + retry_count (int): Retries per request + + concurrent_downloads (int): Maximum concurrent downloads + + """ + + def __init__( + self, + url=None, + username=None, + password=None, + services="*", + retry_count=3, + concurrent_downloads=10, + ): + + configuration = ServiceNowDataSource.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]["value"] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__(data_provider=ServiceNowDataSource(connector_configuration)) + + self.url = url + self.username = username + self.password = password + self.services = services + self.retry_count = retry_count + self.concurrent_downloads = concurrent_downloads diff --git a/package/generated/sharepoint_online.py b/package/connectors/generated/sharepoint_online.py similarity index 68% rename from package/generated/sharepoint_online.py rename to package/connectors/generated/sharepoint_online.py index 2f703c4c1..fce44b209 100644 --- a/package/generated/sharepoint_online.py +++ b/package/connectors/generated/sharepoint_online.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.sharepoint_online import SharepointOnlineDataSource +from package.connectors.connector_base import ConnectorBase -class SharepointOnlineDataSource(SharepointOnlineDataSource): + +class SharepointOnlineConnector(ConnectorBase): """ - SharepointOnlineDataSource class generated for connecting to the data source. + SharepointOnlineConnector class generated for connecting to the data source. Args: @@ -24,12 +34,6 @@ class SharepointOnlineDataSource(SharepointOnlineDataSource): fetch_subsites (bool): Fetch sub-sites of configured sites? - Whether subsites of the configured site(s) should be automatically fetched. - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in Sharepoint Online are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - fetch_drive_item_permissions (bool): Fetch drive item permissions - Enable this option to fetch drive item specific permissions. This setting can increase sync time. @@ -53,27 +57,25 @@ def __init__( site_collections="*", enumerate_all_sites=True, fetch_subsites=True, - use_text_extraction_service=False, - use_document_level_security=False, fetch_drive_item_permissions=True, fetch_unique_page_permissions=True, fetch_unique_list_permissions=True, fetch_unique_list_item_permissions=True, ): - configuration = self.get_default_configuration() + + configuration = SharepointOnlineDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__( + data_provider=SharepointOnlineDataSource(connector_configuration) + ) self.tenant_id = tenant_id self.tenant_name = tenant_name @@ -82,8 +84,6 @@ def __init__( self.site_collections = site_collections self.enumerate_all_sites = enumerate_all_sites self.fetch_subsites = fetch_subsites - self.use_text_extraction_service = use_text_extraction_service - self.use_document_level_security = use_document_level_security self.fetch_drive_item_permissions = fetch_drive_item_permissions self.fetch_unique_page_permissions = fetch_unique_page_permissions self.fetch_unique_list_permissions = fetch_unique_list_permissions diff --git a/package/generated/sharepoint_server.py b/package/connectors/generated/sharepoint_server.py similarity index 55% rename from package/generated/sharepoint_server.py rename to package/connectors/generated/sharepoint_server.py index b89b83a37..8f2f41ac1 100644 --- a/package/generated/sharepoint_server.py +++ b/package/connectors/generated/sharepoint_server.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.sharepoint_server import SharepointServerDataSource +from package.connectors.connector_base import ConnectorBase -class SharepointServerDataSource(SharepointServerDataSource): + +class SharepointServerConnector(ConnectorBase): """ - SharepointServerDataSource class generated for connecting to the data source. + SharepointServerConnector class generated for connecting to the data source. Args: @@ -21,12 +31,6 @@ class SharepointServerDataSource(SharepointServerDataSource): retry_count (int): Retries per request - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in your SharePoint Server are mirrored in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - fetch_unique_list_permissions (bool): Fetch unique list permissions - Enable this option to fetch unique list permissions. This setting can increase sync time. If this setting is disabled a list will inherit permissions from its parent site. @@ -43,26 +47,24 @@ def __init__( site_collections=None, ssl_enabled=False, ssl_ca=None, - retry_count=None, - use_text_extraction_service=False, - use_document_level_security=False, + retry_count=3, fetch_unique_list_permissions=True, fetch_unique_list_item_permissions=True, ): - configuration = self.get_default_configuration() + + configuration = SharepointServerDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__( + data_provider=SharepointServerDataSource(connector_configuration) + ) self.username = username self.password = password @@ -71,7 +73,5 @@ def __init__( self.ssl_enabled = ssl_enabled self.ssl_ca = ssl_ca self.retry_count = retry_count - self.use_text_extraction_service = use_text_extraction_service - self.use_document_level_security = use_document_level_security self.fetch_unique_list_permissions = fetch_unique_list_permissions self.fetch_unique_list_item_permissions = fetch_unique_list_item_permissions diff --git a/package/generated/slack.py b/package/connectors/generated/slack.py similarity index 60% rename from package/generated/slack.py rename to package/connectors/generated/slack.py index 1bd6a2dfd..8e7d4fec1 100644 --- a/package/generated/slack.py +++ b/package/connectors/generated/slack.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.slack import SlackDataSource +from package.connectors.connector_base import ConnectorBase -class SlackDataSource(SlackDataSource): + +class SlackConnector(ConnectorBase): """ - SlackDataSource class generated for connecting to the data source. + SlackConnector class generated for connecting to the data source. Args: @@ -28,20 +38,18 @@ def __init__( auto_join_channels=False, sync_users=True, ): - configuration = self.get_default_configuration() + + configuration = SlackDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=SlackDataSource(connector_configuration)) self.token = token self.fetch_last_n_days = fetch_last_n_days diff --git a/package/generated/zoom.py b/package/connectors/generated/zoom.py similarity index 54% rename from package/generated/zoom.py rename to package/connectors/generated/zoom.py index 0817c1939..69700839f 100644 --- a/package/generated/zoom.py +++ b/package/connectors/generated/zoom.py @@ -1,9 +1,19 @@ -from application.base import BaseDataSource +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. +from connectors.source import DataSourceConfiguration +from connectors.sources.zoom import ZoomDataSource +from package.connectors.connector_base import ConnectorBase -class ZoomDataSource(ZoomDataSource): + +class ZoomConnector(ConnectorBase): """ - ZoomDataSource class generated for connecting to the data source. + ZoomConnector class generated for connecting to the data source. Args: @@ -19,9 +29,6 @@ class ZoomDataSource(ZoomDataSource): recording_age (int): Recording Age Limit (Months) - How far back in time to request recordings from zoom. Recordings older than this will not be indexed. - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - """ def __init__( @@ -31,26 +38,22 @@ def __init__( client_secret=None, fetch_past_meeting_details=False, recording_age=None, - use_text_extraction_service=False, ): - configuration = self.get_default_configuration() + + configuration = ZoomDataSource.get_default_configuration() # Apply the user provided configuration in the class constructor args = locals() for key in configuration.keys(): - if args[key] is not None: + if args.get(key) is not None: configuration[key]["value"] = args[key] - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + connector_configuration = DataSourceConfiguration(configuration) - super().__init__(configuration) + super().__init__(data_provider=ZoomDataSource(connector_configuration)) self.account_id = account_id self.client_id = client_id self.client_secret = client_secret self.fetch_past_meeting_details = fetch_past_meeting_details self.recording_age = recording_age - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/azure_blob_storage.py b/package/generated/azure_blob_storage.py deleted file mode 100644 index 81adb6c39..000000000 --- a/package/generated/azure_blob_storage.py +++ /dev/null @@ -1,58 +0,0 @@ -from application.base import BaseDataSource - - -class AzureBlobStorageDataSource(AzureBlobStorageDataSource): - """ - AzureBlobStorageDataSource class generated for connecting to the data source. - - Args: - - account_name (str): Azure Blob Storage account name - - account_key (str): Azure Blob Storage account key - - blob_endpoint (str): Azure Blob Storage blob endpoint - - containers (list): Azure Blob Storage containers - - retry_count (int): Retries per request - - concurrent_downloads (int): Maximum concurrent downloads - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - """ - - def __init__( - self, - account_name=None, - account_key=None, - blob_endpoint=None, - containers=None, - retry_count=None, - concurrent_downloads=None, - use_text_extraction_service=False, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.account_name = account_name - self.account_key = account_key - self.blob_endpoint = blob_endpoint - self.containers = containers - self.retry_count = retry_count - self.concurrent_downloads = concurrent_downloads - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/dir.py b/package/generated/dir.py deleted file mode 100644 index b1ca6bff3..000000000 --- a/package/generated/dir.py +++ /dev/null @@ -1,35 +0,0 @@ -from application.base import BaseDataSource - - -class DirectoryDataSource(DirectoryDataSource): - """ - DirectoryDataSource class generated for connecting to the data source. - - Args: - - directory (str): Directory path - - pattern (str): File glob-like pattern - - """ - - def __init__( - self, directory="/Users/jedr/connectors/connectors/sources", pattern="**/*.*" - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.directory = directory - self.pattern = pattern diff --git a/package/generated/dropbox.py b/package/generated/dropbox.py deleted file mode 100644 index 49922a2a6..000000000 --- a/package/generated/dropbox.py +++ /dev/null @@ -1,69 +0,0 @@ -from application.base import BaseDataSource - - -class DropboxDataSource(DropboxDataSource): - """ - DropboxDataSource class generated for connecting to the data source. - - Args: - - path (str): Path to fetch files/folders - - Path is ignored when Advanced Sync Rules are used. - - app_key (str): App Key - - app_secret (str): App secret - - refresh_token (str): Refresh token - - retry_count (int): Retries per request - - concurrent_downloads (int): Maximum concurrent downloads - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in Dropbox are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - - include_inherited_users_and_groups (bool): Include groups and inherited users - - Include groups and inherited users when indexing permissions. Enabling this configurable field will cause a significant performance degradation. - - """ - - def __init__( - self, - path=None, - app_key=None, - app_secret=None, - refresh_token=None, - retry_count=None, - concurrent_downloads=None, - use_text_extraction_service=False, - use_document_level_security=False, - include_inherited_users_and_groups=False, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.path = path - self.app_key = app_key - self.app_secret = app_secret - self.refresh_token = refresh_token - self.retry_count = retry_count - self.concurrent_downloads = concurrent_downloads - self.use_text_extraction_service = use_text_extraction_service - self.use_document_level_security = use_document_level_security - self.include_inherited_users_and_groups = include_inherited_users_and_groups diff --git a/package/generated/gmail.py b/package/generated/gmail.py deleted file mode 100644 index 52429c26f..000000000 --- a/package/generated/gmail.py +++ /dev/null @@ -1,53 +0,0 @@ -from application.base import BaseDataSource - - -class GMailDataSource(GMailDataSource): - """ - GMailDataSource class generated for connecting to the data source. - - Args: - - service_account_credentials (str): GMail service account JSON - - subject (str): Google Workspace admin email - - Admin account email address - - customer_id (str): Google customer id - - Google admin console -> Account -> Settings -> Customer Id - - include_spam_and_trash (bool): Include spam and trash emails - - Will include spam and trash emails, when set to true. - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in GMail are maintained in Elasticsearch. This enables you to restrict and personalize read-access users have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - - """ - - def __init__( - self, - service_account_credentials=None, - subject=None, - customer_id=None, - include_spam_and_trash=False, - use_document_level_security=True, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.service_account_credentials = service_account_credentials - self.subject = subject - self.customer_id = customer_id - self.include_spam_and_trash = include_spam_and_trash - self.use_document_level_security = use_document_level_security diff --git a/package/generated/google_cloud_storage.py b/package/generated/google_cloud_storage.py deleted file mode 100644 index aa2934cae..000000000 --- a/package/generated/google_cloud_storage.py +++ /dev/null @@ -1,42 +0,0 @@ -from application.base import BaseDataSource - - -class GoogleCloudStorageDataSource(GoogleCloudStorageDataSource): - """ - GoogleCloudStorageDataSource class generated for connecting to the data source. - - Args: - - buckets (list): Google Cloud Storage buckets - - service_account_credentials (str): Google Cloud service account JSON - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - """ - - def __init__( - self, - buckets=None, - service_account_credentials=None, - use_text_extraction_service=False, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.buckets = buckets - self.service_account_credentials = service_account_credentials - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/googledrivedatasource.py b/package/generated/googledrivedatasource.py deleted file mode 100644 index 2177e7121..000000000 --- a/package/generated/googledrivedatasource.py +++ /dev/null @@ -1,73 +0,0 @@ -from application.base import BaseDataSource - - -class GoogleDriveDataSource(GoogleDriveDataSource): - """ - GoogleDriveDataSource class generated for connecting to the data source. - - Args: - - service_account_credentials (): Google Drive service account JSON - - This connectors authenticates as a service account to synchronize content from Google Drive. - - use_domain_wide_delegation_for_sync (): Use domain-wide delegation for data sync - - Enable domain-wide delegation to automatically sync content from all shared and personal drives in the Google workspace. This eliminates the need to manually share Google Drive data with your service account, though it may increase sync time. If disabled, only items and folders manually shared with the service account will be synced. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. - - google_workspace_admin_email_for_data_sync (): Google Workspace admin email - - Provide the admin email to be used with domain-wide delegation for data sync. This email enables the connector to utilize the Admin Directory API for listing organization users. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. - - google_workspace_email_for_shared_drives_sync (): Google Workspace email for syncing shared drives - - Provide the Google Workspace user email for discovery and syncing of shared drives. Only the shared drives this user has access to will be synced. - - use_document_level_security (): Enable document level security - - Document level security ensures identities and permissions set in Google Drive are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - - google_workspace_admin_email (): Google Workspace admin email - - In order to use Document Level Security you need to enable Google Workspace domain-wide delegation of authority for your service account. A service account with delegated authority can impersonate admin user with sufficient permissions to fetch all users and their corresponding permissions. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. - - max_concurrency (): Maximum concurrent HTTP requests - - This setting determines the maximum number of concurrent HTTP requests sent to the Google API to fetch data. Increasing this value can improve data retrieval speed, but it may also place higher demands on system resources and network bandwidth. - - use_text_extraction_service (): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - """ - - def __init__( - self, - service_account_credentials=None, - use_domain_wide_delegation_for_sync=False, - google_workspace_admin_email_for_data_sync=None, - google_workspace_email_for_shared_drives_sync=None, - use_document_level_security=False, - google_workspace_admin_email=None, - max_concurrency=None, - use_text_extraction_service=False, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.service_account_credentials = service_account_credentials - self.use_domain_wide_delegation_for_sync = use_domain_wide_delegation_for_sync - self.google_workspace_admin_email_for_data_sync = ( - google_workspace_admin_email_for_data_sync - ) - self.google_workspace_email_for_shared_drives_sync = ( - google_workspace_email_for_shared_drives_sync - ) - self.use_document_level_security = use_document_level_security - self.google_workspace_admin_email = google_workspace_admin_email - self.max_concurrency = max_concurrency - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/microsoft_teams.py b/package/generated/microsoft_teams.py deleted file mode 100644 index d735fa46d..000000000 --- a/package/generated/microsoft_teams.py +++ /dev/null @@ -1,49 +0,0 @@ -from application.base import BaseDataSource - - -class MicrosoftTeamsDataSource(MicrosoftTeamsDataSource): - """ - MicrosoftTeamsDataSource class generated for connecting to the data source. - - Args: - - tenant_id (str): Tenant ID - - client_id (str): Client ID - - secret_value (str): Secret value - - username (str): Username - - password (str): Password - - """ - - def __init__( - self, - tenant_id=None, - client_id=None, - secret_value=None, - username=None, - password=None, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.tenant_id = tenant_id - self.client_id = client_id - self.secret_value = secret_value - self.username = username - self.password = password diff --git a/package/generated/network_drive.py b/package/generated/network_drive.py deleted file mode 100644 index c01a645ff..000000000 --- a/package/generated/network_drive.py +++ /dev/null @@ -1,67 +0,0 @@ -from application.base import BaseDataSource - - -class NASDataSource(NASDataSource): - """ - NASDataSource class generated for connecting to the data source. - - Args: - - username (str): Username - - password (str): Password - - server_ip (str): SMB IP - - server_port (int): SMB port - - drive_path (str): SMB path - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in your network drive are mirrored in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - - drive_type (str): Drive type - - identity_mappings (str): Path of CSV file containing users and groups SID (For Linux Network Drive) - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - """ - - def __init__( - self, - username=None, - password=None, - server_ip=None, - server_port=None, - drive_path=None, - use_document_level_security=False, - drive_type="windows", - identity_mappings=None, - use_text_extraction_service=False, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.username = username - self.password = password - self.server_ip = server_ip - self.server_port = server_port - self.drive_path = drive_path - self.use_document_level_security = use_document_level_security - self.drive_type = drive_type - self.identity_mappings = identity_mappings - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/onedrive.py b/package/generated/onedrive.py deleted file mode 100644 index 9a2b889e6..000000000 --- a/package/generated/onedrive.py +++ /dev/null @@ -1,59 +0,0 @@ -from application.base import BaseDataSource - - -class OneDriveDataSource(OneDriveDataSource): - """ - OneDriveDataSource class generated for connecting to the data source. - - Args: - - client_id (str): Azure application Client ID - - client_secret (str): Azure application Client Secret - - tenant_id (str): Azure application Tenant ID - - retry_count (int): Maximum retries per request - - concurrent_downloads (int): Maximum concurrent downloads - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in OneDrive are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - """ - - def __init__( - self, - client_id=None, - client_secret=None, - tenant_id=None, - retry_count=None, - concurrent_downloads=None, - use_document_level_security=False, - use_text_extraction_service=False, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.client_id = client_id - self.client_secret = client_secret - self.tenant_id = tenant_id - self.retry_count = retry_count - self.concurrent_downloads = concurrent_downloads - self.use_document_level_security = use_document_level_security - self.use_text_extraction_service = use_text_extraction_service diff --git a/package/generated/salesforce.py b/package/generated/salesforce.py deleted file mode 100644 index 79959004f..000000000 --- a/package/generated/salesforce.py +++ /dev/null @@ -1,54 +0,0 @@ -from application.base import BaseDataSource - - -class SalesforceDataSource(SalesforceDataSource): - """ - SalesforceDataSource class generated for connecting to the data source. - - Args: - - domain (str): Domain - - The domain for your Salesforce instance. If your Salesforce URL is 'foo.my.salesforce.com', the domain would be 'foo'. - - client_id (str): Client ID - - The client id for your OAuth2-enabled connected app. Also called 'consumer key' - - client_secret (str): Client Secret - - The client secret for your OAuth2-enabled connected app. Also called 'consumer secret' - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in Salesforce are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - - """ - - def __init__( - self, - domain=None, - client_id=None, - client_secret=None, - use_text_extraction_service=False, - use_document_level_security=False, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.domain = domain - self.client_id = client_id - self.client_secret = client_secret - self.use_text_extraction_service = use_text_extraction_service - self.use_document_level_security = use_document_level_security diff --git a/package/generated/servicenow.py b/package/generated/servicenow.py deleted file mode 100644 index eab5c81fe..000000000 --- a/package/generated/servicenow.py +++ /dev/null @@ -1,64 +0,0 @@ -from application.base import BaseDataSource - - -class ServiceNowDataSource(ServiceNowDataSource): - """ - ServiceNowDataSource class generated for connecting to the data source. - - Args: - - url (str): Service URL - - username (str): Username - - password (str): Password - - services (list): Comma-separated list of services - - List of services is ignored when Advanced Sync Rules are used. - - retry_count (int): Retries per request - - concurrent_downloads (int): Maximum concurrent downloads - - use_text_extraction_service (bool): Use text extraction service - - Requires a separate deployment of the Elastic Text Extraction Service. Requires that pipeline settings disable text extraction. - - use_document_level_security (bool): Enable document level security - - Document level security ensures identities and permissions set in ServiceNow are maintained in Elasticsearch. This enables you to restrict and personalize read-access users and groups have to documents in this index. Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - - """ - - def __init__( - self, - url=None, - username=None, - password=None, - services="*", - retry_count=None, - concurrent_downloads=None, - use_text_extraction_service=False, - use_document_level_security=False, - ): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]["value"] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value["value"] is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - self.url = url - self.username = username - self.password = password - self.services = services - self.retry_count = retry_count - self.concurrent_downloads = concurrent_downloads - self.use_text_extraction_service = use_text_extraction_service - self.use_document_level_security = use_document_level_security diff --git a/scripts/codegen/generate_connectors.py b/scripts/codegen/generate_connectors.py new file mode 100644 index 000000000..f3bf7b977 --- /dev/null +++ b/scripts/codegen/generate_connectors.py @@ -0,0 +1,77 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# + +import os +import importlib +from jinja2 import Environment, FileSystemLoader +from black import format_file_in_place, FileMode, WriteBack +from pathlib import Path + +from connectors.config import _default_config + +# service-specific configuration keys that should not be exposed in the packaged class +CONFIG_KEYS_TO_SKIP = {"use_document_level_security", "use_text_extraction_service"} + + +def generate_wrapper_class_code( + template_env, data_source_class, data_source_module, class_name +): + config = data_source_class.get_default_configuration() + + # remove keys that should not be exposed in the packaged class + connector_config = { + key: value for key, value in config.items() if key not in CONFIG_KEYS_TO_SKIP + } + + constructor_args = [ + (key, value.get("value", value.get("default_value", None))) + for key, value in connector_config.items() + ] + + template = template_env.get_template("connector_template.jinja2") + class_code = template.render( + class_name=class_name.replace('DataSource', 'Connector'), + data_source_class=data_source_class.__name__, + data_source_module=data_source_module, + params=constructor_args, + config=connector_config, + ) + return class_code + + +def write_class_to_file(class_code, class_name, output_dir): + file_path = os.path.join(output_dir, f"{class_name.lower()}.py") + with open(file_path, "w") as file: + file.write(class_code) + format_file_in_place( + Path(file_path), fast=False, mode=FileMode(), write_back=WriteBack.YES + ) + + +def generate_and_write_wrapper_classes(sources, output_dir): + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + env = Environment(loader=FileSystemLoader("scripts/codegen/templates")) + + for key, value in sources.items(): + module_name, class_name = value.split(":") + module = importlib.import_module(module_name) + data_source_class = getattr(module, class_name) + class_code = generate_wrapper_class_code( + template_env=env, + data_source_class=data_source_class, + data_source_module=module_name, + class_name=class_name, + ) + write_class_to_file(class_code, key, output_dir) + + +if __name__ == "__main__": + connectors_config = _default_config() + data_source_classes = connectors_config["sources"] + output_dir = os.path.join("package", "connectors", "generated") + generate_and_write_wrapper_classes(data_source_classes, output_dir) diff --git a/scripts/codegen/generate_connectors_init.py b/scripts/codegen/generate_connectors_init.py new file mode 100644 index 000000000..697d2a71b --- /dev/null +++ b/scripts/codegen/generate_connectors_init.py @@ -0,0 +1,29 @@ +import os +from jinja2 import Environment, FileSystemLoader +from black import format_file_in_place, FileMode, WriteBack, NothingChanged +from pathlib import Path + +def generate_init_file(output_dir, generated_dir, template_env): + init_file_path = os.path.join(output_dir, '__init__.py') + imports = [] + + for filename in os.listdir(generated_dir): + if filename.endswith('.py') and filename != '__init__.py': + module_name = filename[:-3] + class_name = ''.join(word.title() for word in module_name.split('_')) + 'Connector' + imports.append((module_name, class_name)) + + template = template_env.get_template("init_template.jinja2") + init_code = template.render(imports=imports) + + with open(init_file_path, 'w') as init_file: + init_file.write(init_code) + + format_file_in_place( Path(init_file_path), fast=False, mode=FileMode(), write_back=WriteBack.YES) + + +if __name__ == "__main__": + connectors_dir = os.path.join("package", "connectors") + generated_dir = os.path.join(connectors_dir, "generated") + env = Environment(loader=FileSystemLoader("scripts/codegen/templates")) + generate_init_file(connectors_dir, generated_dir, env) diff --git a/scripts/codegen/generate_wrappers.py b/scripts/codegen/generate_wrappers.py deleted file mode 100644 index efc338e82..000000000 --- a/scripts/codegen/generate_wrappers.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import importlib -from jinja2 import Environment, FileSystemLoader -from black import format_file_in_place, FileMode, WriteBack -from pathlib import Path - -from connectors.config import _default_config - - -def generate_wrapper_class_code(template_env, data_source_class, class_name): - config = data_source_class.get_default_configuration() - base_class_name = data_source_class.__name__ - - params = [(key, value.get("value", None)) for key, value in config.items()] - - template = template_env.get_template("datasource_wrapper.jinja2") - class_code = template.render( - class_name=class_name, - base_class_name=base_class_name, - params=params, - config=config, - ) - return class_code - - -def write_class_to_file(class_code, class_name, output_dir): - file_path = os.path.join(output_dir, f"{class_name.lower()}.py") - with open(file_path, "w") as file: - file.write(class_code) - format_file_in_place(Path(file_path), fast=False, mode=FileMode(), write_back=WriteBack.YES) - - -def generate_and_write_wrapper_classes(sources, output_dir): - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - env = Environment(loader=FileSystemLoader("scripts/codegen/templates")) - - for key, value in sources.items(): - module_name, class_name = value.split(":") - module = importlib.import_module(module_name) - data_source_class = getattr(module, class_name) - class_code = generate_wrapper_class_code(env, data_source_class, class_name) - write_class_to_file(class_code, key, output_dir) - - -# Example usage -connectors_config = _default_config() -data_source_classes = connectors_config["sources"] -output_dir = os.path.join("package", "generated") -generate_and_write_wrapper_classes(data_source_classes, output_dir) diff --git a/scripts/codegen/templates/connector_template.jinja2 b/scripts/codegen/templates/connector_template.jinja2 new file mode 100644 index 000000000..494536efc --- /dev/null +++ b/scripts/codegen/templates/connector_template.jinja2 @@ -0,0 +1,38 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +from connectors.source import DataSourceConfiguration +from {{ data_source_module }} import {{ data_source_class }} +from package.connectors.connector_base import ConnectorBase + +class {{ class_name }}(ConnectorBase): + """ + {{ class_name }} class generated for connecting to the data source. + + Args: + {% for param, value in params %} + {{ param }} ({{ config[param]['type'] }}): {{ config[param].label }}{% if config[param].tooltip %} + - {{ config[param].tooltip }}{% endif %} + {% endfor %} + """ + def __init__(self, {% for param, value in params %}{{ param }}={% if value is none %}None{% elif value is string %}'{{ value }}'{% else %}{{ value }}{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}): + + configuration = {{ data_source_class }}.get_default_configuration() + + # Apply the user provided configuration in the class constructor + args = locals() + for key in configuration.keys(): + if args.get(key) is not None: + configuration[key]['value'] = args[key] + + connector_configuration = DataSourceConfiguration(configuration) + + super().__init__(data_provider={{ data_source_class }}(connector_configuration)) + + {% for key in config.keys() %}self.{{ key }} = {{ key }} + {% endfor %} diff --git a/scripts/codegen/templates/datasource_wrapper.jinja2 b/scripts/codegen/templates/datasource_wrapper.jinja2 deleted file mode 100644 index b63ff727a..000000000 --- a/scripts/codegen/templates/datasource_wrapper.jinja2 +++ /dev/null @@ -1,30 +0,0 @@ -from application.base import BaseDataSource - -class {{ class_name }}({{ base_class_name }}): - """ - {{ class_name }} class generated for connecting to the data source. - - Args: - {% for param, value in params %} - {{ param }} ({{ config[param]['type'] }}): {{ config[param].label }}{% if config[param].tooltip %} - - {{ config[param].tooltip }}{% endif %} - {% endfor %} - """ - def __init__(self, {% for param, value in params %}{{ param }}={% if value is none %}None{% elif value is boolean %}{{ value }}{% else %}'{{ value }}'{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}): - configuration = self.get_default_configuration() - - # Apply the user provided configuration in the class constructor - args = locals() - for key in configuration.keys(): - if args[key] is not None: - configuration[key]['value'] = args[key] - - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in configuration.items(): - if value['value'] is None and value.get('required', True): - raise ValueError(f"Missing required configuration field: {key}") - - super().__init__(configuration) - - {% for key in config.keys() %}self.{{ key }} = {{ key }} - {% endfor %} diff --git a/scripts/codegen/templates/init_template.jinja2 b/scripts/codegen/templates/init_template.jinja2 new file mode 100644 index 000000000..ba58662c8 --- /dev/null +++ b/scripts/codegen/templates/init_template.jinja2 @@ -0,0 +1,11 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + +{% for module, class_name in imports %} +from .generated.{{ module }} import {{ class_name }} +{% endfor %} From 24c7064a3b77871f03a4a1d190ddfce5f0254ca6 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Wed, 10 Jul 2024 13:19:02 +0200 Subject: [PATCH 3/8] [WIP] work on packaging --- package/connectors/__init__.py | 66 ++++++++ package/connectors/connector_base.py | 155 ++++++++++++++---- .../generated/azure_blob_storage.py | 3 +- package/connectors/generated/box.py | 3 +- package/connectors/generated/confluence.py | 5 +- package/connectors/generated/dir.py | 9 +- package/connectors/generated/dropbox.py | 5 +- package/connectors/generated/github.py | 5 +- package/connectors/generated/gmail.py | 5 +- .../generated/google_cloud_storage.py | 5 +- package/connectors/generated/google_drive.py | 5 +- package/connectors/generated/graphql.py | 5 +- package/connectors/generated/jira.py | 5 +- .../connectors/generated/microsoft_teams.py | 3 +- package/connectors/generated/mongodb.py | 5 +- package/connectors/generated/mssql.py | 5 +- package/connectors/generated/mysql.py | 5 +- package/connectors/generated/network_drive.py | 3 +- package/connectors/generated/notion.py | 5 +- package/connectors/generated/onedrive.py | 5 +- package/connectors/generated/oracle.py | 5 +- package/connectors/generated/outlook.py | 5 +- package/connectors/generated/postgresql.py | 5 +- package/connectors/generated/redis.py | 5 +- package/connectors/generated/s3.py | 3 +- package/connectors/generated/salesforce.py | 6 +- package/connectors/generated/servicenow.py | 5 +- .../connectors/generated/sharepoint_online.py | 3 +- .../connectors/generated/sharepoint_server.py | 3 +- package/connectors/generated/slack.py | 5 +- package/connectors/generated/zoom.py | 5 +- package/setup.py | 44 +++++ requirements/package-dev.txt | 1 + scripts/codegen/generate_connectors_init.py | 28 ++-- .../templates/connector_template.jinja2 | 4 +- 35 files changed, 355 insertions(+), 79 deletions(-) create mode 100644 package/setup.py diff --git a/package/connectors/__init__.py b/package/connectors/__init__.py index e69de29bb..bfb84ca38 100644 --- a/package/connectors/__init__.py +++ b/package/connectors/__init__.py @@ -0,0 +1,66 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + + +from .generated.azure_blob_storage import AzureBlobStorageConnector + +from .generated.box import BoxConnector + +from .generated.confluence import ConfluenceConnector + +from .generated.dir import DirectoryConnector + +from .generated.dropbox import DropboxConnector + +from .generated.github import GitHubConnector + +from .generated.gmail import GMailConnector + +from .generated.google_cloud_storage import GoogleCloudStorageConnector + +from .generated.google_drive import GoogleDriveConnector + +from .generated.graphql import GraphQLConnector + +from .generated.jira import JiraConnector + +from .generated.microsoft_teams import MicrosoftTeamsConnector + +from .generated.mongodb import MongoConnector + +from .generated.mssql import MSSQLConnector + +from .generated.mysql import MySqlConnector + +from .generated.network_drive import NASConnector + +from .generated.notion import NotionConnector + +from .generated.onedrive import OneDriveConnector + +from .generated.oracle import OracleConnector + +from .generated.outlook import OutlookConnector + +from .generated.postgresql import PostgreSQLConnector + +from .generated.redis import RedisConnector + +from .generated.s3 import S3Connector + +from .generated.salesforce import SalesforceConnector + +from .generated.servicenow import ServiceNowConnector + +from .generated.sharepoint_online import SharepointOnlineConnector + +from .generated.sharepoint_server import SharepointServerConnector + +from .generated.slack import SlackConnector + +from .generated.zoom import ZoomConnector diff --git a/package/connectors/connector_base.py b/package/connectors/connector_base.py index ea320fa79..d63af6475 100644 --- a/package/connectors/connector_base.py +++ b/package/connectors/connector_base.py @@ -1,49 +1,136 @@ -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License 2.0; -# you may not use this file except in compliance with the Elastic License 2.0. -# -import asyncio -from typing import AsyncIterator, Iterator +import base64 +import logging +from typing import AsyncIterator, Dict +from tika import parser -class ConnectorBase: - def __init__(self, data_provider): +from connectors.es.settings import TIMESTAMP_FIELD + + +def extract_content_with_tika(b64_content: str) -> str: + """ + Extracts text content from a base64-encoded binary content using Tika. + + Args: + b64_content (str): Base64 encoded content. + + Returns: + str: Extracted text content. + """ + binary_data = base64.b64decode(b64_content) + + # Parse the binary data using Tika + parsed = parser.from_buffer(binary_data) + + # Extract text and metadata + text = parsed.get("content", "") - # Check if all fields marked as 'required' in config are present with values, if not raise an exception - for key, value in data_provider.configuration.items(): - if value.get("value") is None and value.get("required", True): - raise ValueError(f"Missing required configuration field: {key}") + return text + +class ConnectorBase: + def __init__(self, data_provider, logger=None, download_content=True): + """ + Initializes the ConnectorBase instance. + + Args: + data_provider: An instance of the data provider. + logger (logging.Logger, optional): Logger instance. Defaults to None. + download_content (bool, optional): Flag to determine if content should be downloaded. Defaults to True. + """ self.data_provider = data_provider + self.download_content = download_content + + if logger is None: + logger = logging.getLogger("elastic-connectors") + self.logger = logger + self.data_provider.set_logger(logger) def get_configuration(self): + """ + Gets the configuration from the data provider. + + Returns: + The configuration of the data provider. + """ return self.data_provider.configuration - def lazy_load(self) -> Iterator[dict]: - async_gen = self.alazy_load() - loop = asyncio.get_event_loop() + async def validate(self): + """ + Validates the data provider configuration and pings the data provider. + Raises: + Exception: If validation or ping fails. + """ try: - while True: - item = loop.run_until_complete(self._next_item(async_gen)) - if item is None: - break - yield item - except StopAsyncIteration: - return - - async def _next_item(self, async_gen): + await self.data_provider.validate_config() + await self.ping() + except Exception as e: + self.logger.error("Validation failed", exc_info=True) + raise e + + async def ping(self): + """ + Pings the data provider. + + Raises: + Exception: If ping fails. + """ try: - return await async_gen.__anext__() - except StopAsyncIteration: - return None + return await self.data_provider.ping() + except Exception as e: + self.logger.error("Ping failed", exc_info=True) + raise e + + async def async_get_docs(self) -> AsyncIterator[Dict]: + """ + Asynchronously retrieves documents from the data provider. + + Yields: + dict: A document from the data provider. - async def alazy_load( - self, - ) -> AsyncIterator[dict]: + Raises: + Exception: If an error occurs while extracting content. + """ async for doc, lazy_download in self.data_provider.get_docs(filtering=None): - # TODO: not all sources have timestamp field and support downloads - # data = await lazy_download(doit=True, timestamp=doc[TIMESTAMP_FIELD]) - # doc.update(data) + + doc["id"] = doc.pop("_id") + + if lazy_download is not None and self.download_content: + # content downloaded and represented in a binary format {'_attachment': } + try: + data = await lazy_download( + doit=True, timestamp=doc[TIMESTAMP_FIELD] + ) + # binary to string conversion + binary_data = data.get("_attachment", None) + + text = extract_content_with_tika(binary_data) + + doc.update({"body": text}) + except Exception as e: + print(f"Error extracting content: {e}") + yield doc + + async def close(self): + """ + Closes the data provider connection. + """ + await self.data_provider.close() + + async def __aenter__(self): + """ + Asynchronous context manager entry. Validates the configuration. + + Returns: + ConnectorBase: The instance itself. + """ + await self.validate() + return self + + async def __aexit__(self, exc_type, exc_value, traceback): + """ + Asynchronous context manager exit. Closes the data provider connection. + """ + await self.close() diff --git a/package/connectors/generated/azure_blob_storage.py b/package/connectors/generated/azure_blob_storage.py index dae06913a..201fc272b 100644 --- a/package/connectors/generated/azure_blob_storage.py +++ b/package/connectors/generated/azure_blob_storage.py @@ -39,6 +39,7 @@ def __init__( containers=None, retry_count=3, concurrent_downloads=100, + **kwargs ): configuration = AzureBlobStorageDataSource.get_default_configuration() @@ -52,7 +53,7 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) super().__init__( - data_provider=AzureBlobStorageDataSource(connector_configuration) + data_provider=AzureBlobStorageDataSource(connector_configuration), **kwargs ) self.account_name = account_name diff --git a/package/connectors/generated/box.py b/package/connectors/generated/box.py index f3b420991..48a82aee4 100644 --- a/package/connectors/generated/box.py +++ b/package/connectors/generated/box.py @@ -39,6 +39,7 @@ def __init__( refresh_token=None, enterprise_id=None, concurrent_downloads=15, + **kwargs ): configuration = BoxDataSource.get_default_configuration() @@ -51,7 +52,7 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=BoxDataSource(connector_configuration)) + super().__init__(data_provider=BoxDataSource(connector_configuration), **kwargs) self.is_enterprise = is_enterprise self.client_id = client_id diff --git a/package/connectors/generated/confluence.py b/package/connectors/generated/confluence.py index 0810c24db..2ae5eca1a 100644 --- a/package/connectors/generated/confluence.py +++ b/package/connectors/generated/confluence.py @@ -65,6 +65,7 @@ def __init__( ssl_ca=None, retry_count=3, concurrent_downloads=50, + **kwargs ): configuration = ConfluenceDataSource.get_default_configuration() @@ -77,7 +78,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=ConfluenceDataSource(connector_configuration)) + super().__init__( + data_provider=ConfluenceDataSource(connector_configuration), **kwargs + ) self.data_source = data_source self.username = username diff --git a/package/connectors/generated/dir.py b/package/connectors/generated/dir.py index bffb760f8..1eaffa689 100644 --- a/package/connectors/generated/dir.py +++ b/package/connectors/generated/dir.py @@ -24,7 +24,10 @@ class DirectoryConnector(ConnectorBase): """ def __init__( - self, directory="/Users/jedr/connectors/connectors/sources", pattern="**/*.*" + self, + directory="/Users/jedr/connectors/connectors/sources", + pattern="**/*.*", + **kwargs ): configuration = DirectoryDataSource.get_default_configuration() @@ -37,7 +40,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=DirectoryDataSource(connector_configuration)) + super().__init__( + data_provider=DirectoryDataSource(connector_configuration), **kwargs + ) self.directory = directory self.pattern = pattern diff --git a/package/connectors/generated/dropbox.py b/package/connectors/generated/dropbox.py index 48f38d2a4..04eaf7329 100644 --- a/package/connectors/generated/dropbox.py +++ b/package/connectors/generated/dropbox.py @@ -44,6 +44,7 @@ def __init__( retry_count=3, concurrent_downloads=100, include_inherited_users_and_groups=False, + **kwargs ): configuration = DropboxDataSource.get_default_configuration() @@ -56,7 +57,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=DropboxDataSource(connector_configuration)) + super().__init__( + data_provider=DropboxDataSource(connector_configuration), **kwargs + ) self.path = path self.app_key = app_key diff --git a/package/connectors/generated/github.py b/package/connectors/generated/github.py index a8dc61315..2b2cb40e0 100644 --- a/package/connectors/generated/github.py +++ b/package/connectors/generated/github.py @@ -59,6 +59,7 @@ def __init__( ssl_enabled=False, ssl_ca=None, retry_count=3, + **kwargs ): configuration = GitHubDataSource.get_default_configuration() @@ -71,7 +72,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=GitHubDataSource(connector_configuration)) + super().__init__( + data_provider=GitHubDataSource(connector_configuration), **kwargs + ) self.data_source = data_source self.host = host diff --git a/package/connectors/generated/gmail.py b/package/connectors/generated/gmail.py index 384f669f3..3a25159e8 100644 --- a/package/connectors/generated/gmail.py +++ b/package/connectors/generated/gmail.py @@ -36,6 +36,7 @@ def __init__( subject=None, customer_id=None, include_spam_and_trash=False, + **kwargs ): configuration = GMailDataSource.get_default_configuration() @@ -48,7 +49,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=GMailDataSource(connector_configuration)) + super().__init__( + data_provider=GMailDataSource(connector_configuration), **kwargs + ) self.service_account_credentials = service_account_credentials self.subject = subject diff --git a/package/connectors/generated/google_cloud_storage.py b/package/connectors/generated/google_cloud_storage.py index b3014d3b3..609c3642b 100644 --- a/package/connectors/generated/google_cloud_storage.py +++ b/package/connectors/generated/google_cloud_storage.py @@ -23,7 +23,7 @@ class GoogleCloudStorageConnector(ConnectorBase): """ - def __init__(self, buckets=None, service_account_credentials=None): + def __init__(self, buckets=None, service_account_credentials=None, **kwargs): configuration = GoogleCloudStorageDataSource.get_default_configuration() @@ -36,7 +36,8 @@ def __init__(self, buckets=None, service_account_credentials=None): connector_configuration = DataSourceConfiguration(configuration) super().__init__( - data_provider=GoogleCloudStorageDataSource(connector_configuration) + data_provider=GoogleCloudStorageDataSource(connector_configuration), + **kwargs ) self.buckets = buckets diff --git a/package/connectors/generated/google_drive.py b/package/connectors/generated/google_drive.py index 72cca2689..96fcdd2df 100644 --- a/package/connectors/generated/google_drive.py +++ b/package/connectors/generated/google_drive.py @@ -45,6 +45,7 @@ def __init__( google_workspace_email_for_shared_drives_sync=None, google_workspace_admin_email=None, max_concurrency=25, + **kwargs ): configuration = GoogleDriveDataSource.get_default_configuration() @@ -57,7 +58,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=GoogleDriveDataSource(connector_configuration)) + super().__init__( + data_provider=GoogleDriveDataSource(connector_configuration), **kwargs + ) self.service_account_credentials = service_account_credentials self.use_domain_wide_delegation_for_sync = use_domain_wide_delegation_for_sync diff --git a/package/connectors/generated/graphql.py b/package/connectors/generated/graphql.py index f346e60e1..5679438d1 100644 --- a/package/connectors/generated/graphql.py +++ b/package/connectors/generated/graphql.py @@ -63,6 +63,7 @@ def __init__( pagination_model="no_pagination", pagination_key=None, connection_timeout=300, + **kwargs ): configuration = GraphQLDataSource.get_default_configuration() @@ -75,7 +76,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=GraphQLDataSource(connector_configuration)) + super().__init__( + data_provider=GraphQLDataSource(connector_configuration), **kwargs + ) self.http_endpoint = http_endpoint self.http_method = http_method diff --git a/package/connectors/generated/jira.py b/package/connectors/generated/jira.py index 44c17aaac..bfbab7d63 100644 --- a/package/connectors/generated/jira.py +++ b/package/connectors/generated/jira.py @@ -62,6 +62,7 @@ def __init__( ssl_ca=None, retry_count=3, concurrent_downloads=100, + **kwargs ): configuration = JiraDataSource.get_default_configuration() @@ -74,7 +75,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=JiraDataSource(connector_configuration)) + super().__init__( + data_provider=JiraDataSource(connector_configuration), **kwargs + ) self.data_source = data_source self.username = username diff --git a/package/connectors/generated/microsoft_teams.py b/package/connectors/generated/microsoft_teams.py index da81fbc83..f18af063f 100644 --- a/package/connectors/generated/microsoft_teams.py +++ b/package/connectors/generated/microsoft_teams.py @@ -36,6 +36,7 @@ def __init__( secret_value=None, username=None, password=None, + **kwargs ): configuration = MicrosoftTeamsDataSource.get_default_configuration() @@ -49,7 +50,7 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) super().__init__( - data_provider=MicrosoftTeamsDataSource(connector_configuration) + data_provider=MicrosoftTeamsDataSource(connector_configuration), **kwargs ) self.tenant_id = tenant_id diff --git a/package/connectors/generated/mongodb.py b/package/connectors/generated/mongodb.py index d62cf2b74..b22d63f1b 100644 --- a/package/connectors/generated/mongodb.py +++ b/package/connectors/generated/mongodb.py @@ -51,6 +51,7 @@ def __init__( ssl_enabled=False, ssl_ca=None, tls_insecure=False, + **kwargs ): configuration = MongoDataSource.get_default_configuration() @@ -63,7 +64,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=MongoDataSource(connector_configuration)) + super().__init__( + data_provider=MongoDataSource(connector_configuration), **kwargs + ) self.host = host self.user = user diff --git a/package/connectors/generated/mssql.py b/package/connectors/generated/mssql.py index 5374a1e86..01ac5e2b4 100644 --- a/package/connectors/generated/mssql.py +++ b/package/connectors/generated/mssql.py @@ -58,6 +58,7 @@ def __init__( ssl_enabled=False, ssl_ca=None, validate_host=False, + **kwargs ): configuration = MSSQLDataSource.get_default_configuration() @@ -70,7 +71,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=MSSQLDataSource(connector_configuration)) + super().__init__( + data_provider=MSSQLDataSource(connector_configuration), **kwargs + ) self.host = host self.port = port diff --git a/package/connectors/generated/mysql.py b/package/connectors/generated/mysql.py index 5552f6407..c18a27123 100644 --- a/package/connectors/generated/mysql.py +++ b/package/connectors/generated/mysql.py @@ -51,6 +51,7 @@ def __init__( ssl_ca=None, fetch_size=50, retry_count=3, + **kwargs ): configuration = MySqlDataSource.get_default_configuration() @@ -63,7 +64,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=MySqlDataSource(connector_configuration)) + super().__init__( + data_provider=MySqlDataSource(connector_configuration), **kwargs + ) self.host = host self.port = port diff --git a/package/connectors/generated/network_drive.py b/package/connectors/generated/network_drive.py index 37f3b97e3..3f46ad1cc 100644 --- a/package/connectors/generated/network_drive.py +++ b/package/connectors/generated/network_drive.py @@ -42,6 +42,7 @@ def __init__( drive_path=None, drive_type="windows", identity_mappings=None, + **kwargs ): configuration = NASDataSource.get_default_configuration() @@ -54,7 +55,7 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=NASDataSource(connector_configuration)) + super().__init__(data_provider=NASDataSource(connector_configuration), **kwargs) self.username = username self.password = password diff --git a/package/connectors/generated/notion.py b/package/connectors/generated/notion.py index e477e6596..a85418a18 100644 --- a/package/connectors/generated/notion.py +++ b/package/connectors/generated/notion.py @@ -37,6 +37,7 @@ def __init__( pages=None, index_comments=False, concurrent_downloads=30, + **kwargs ): configuration = NotionDataSource.get_default_configuration() @@ -49,7 +50,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=NotionDataSource(connector_configuration)) + super().__init__( + data_provider=NotionDataSource(connector_configuration), **kwargs + ) self.notion_secret_key = notion_secret_key self.databases = databases diff --git a/package/connectors/generated/onedrive.py b/package/connectors/generated/onedrive.py index 3ef95dc94..4c1a18d2d 100644 --- a/package/connectors/generated/onedrive.py +++ b/package/connectors/generated/onedrive.py @@ -36,6 +36,7 @@ def __init__( tenant_id=None, retry_count=3, concurrent_downloads=15, + **kwargs ): configuration = OneDriveDataSource.get_default_configuration() @@ -48,7 +49,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=OneDriveDataSource(connector_configuration)) + super().__init__( + data_provider=OneDriveDataSource(connector_configuration), **kwargs + ) self.client_id = client_id self.client_secret = client_secret diff --git a/package/connectors/generated/oracle.py b/package/connectors/generated/oracle.py index f6b5e8553..a7e884b5f 100644 --- a/package/connectors/generated/oracle.py +++ b/package/connectors/generated/oracle.py @@ -61,6 +61,7 @@ def __init__( oracle_protocol="TCP", oracle_home="", wallet_configuration_path="", + **kwargs ): configuration = OracleDataSource.get_default_configuration() @@ -73,7 +74,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=OracleDataSource(connector_configuration)) + super().__init__( + data_provider=OracleDataSource(connector_configuration), **kwargs + ) self.host = host self.port = port diff --git a/package/connectors/generated/outlook.py b/package/connectors/generated/outlook.py index ff9fc09bd..b5a62bb11 100644 --- a/package/connectors/generated/outlook.py +++ b/package/connectors/generated/outlook.py @@ -57,6 +57,7 @@ def __init__( domain=None, ssl_enabled=False, ssl_ca=None, + **kwargs ): configuration = OutlookDataSource.get_default_configuration() @@ -69,7 +70,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=OutlookDataSource(connector_configuration)) + super().__init__( + data_provider=OutlookDataSource(connector_configuration), **kwargs + ) self.data_source = data_source self.tenant_id = tenant_id diff --git a/package/connectors/generated/postgresql.py b/package/connectors/generated/postgresql.py index e080a6b21..fdfdeff99 100644 --- a/package/connectors/generated/postgresql.py +++ b/package/connectors/generated/postgresql.py @@ -55,6 +55,7 @@ def __init__( retry_count=3, ssl_enabled=False, ssl_ca=None, + **kwargs ): configuration = PostgreSQLDataSource.get_default_configuration() @@ -67,7 +68,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=PostgreSQLDataSource(connector_configuration)) + super().__init__( + data_provider=PostgreSQLDataSource(connector_configuration), **kwargs + ) self.host = host self.port = port diff --git a/package/connectors/generated/redis.py b/package/connectors/generated/redis.py index c34337dc9..8fa43914a 100644 --- a/package/connectors/generated/redis.py +++ b/package/connectors/generated/redis.py @@ -53,6 +53,7 @@ def __init__( mutual_tls_enabled=False, tls_certfile=None, tls_keyfile=None, + **kwargs ): configuration = RedisDataSource.get_default_configuration() @@ -65,7 +66,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=RedisDataSource(connector_configuration)) + super().__init__( + data_provider=RedisDataSource(connector_configuration), **kwargs + ) self.host = host self.port = port diff --git a/package/connectors/generated/s3.py b/package/connectors/generated/s3.py index f6cabcace..c7e9b94fa 100644 --- a/package/connectors/generated/s3.py +++ b/package/connectors/generated/s3.py @@ -43,6 +43,7 @@ def __init__( connect_timeout=90, max_attempts=5, page_size=100, + **kwargs ): configuration = S3DataSource.get_default_configuration() @@ -55,7 +56,7 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=S3DataSource(connector_configuration)) + super().__init__(data_provider=S3DataSource(connector_configuration), **kwargs) self.buckets = buckets self.aws_access_key_id = aws_access_key_id diff --git a/package/connectors/generated/salesforce.py b/package/connectors/generated/salesforce.py index 5bd71e8b9..6e8b01a3b 100644 --- a/package/connectors/generated/salesforce.py +++ b/package/connectors/generated/salesforce.py @@ -28,7 +28,7 @@ class SalesforceConnector(ConnectorBase): """ - def __init__(self, domain=None, client_id=None, client_secret=None): + def __init__(self, domain=None, client_id=None, client_secret=None, **kwargs): configuration = SalesforceDataSource.get_default_configuration() @@ -40,7 +40,9 @@ def __init__(self, domain=None, client_id=None, client_secret=None): connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=SalesforceDataSource(connector_configuration)) + super().__init__( + data_provider=SalesforceDataSource(connector_configuration), **kwargs + ) self.domain = domain self.client_id = client_id diff --git a/package/connectors/generated/servicenow.py b/package/connectors/generated/servicenow.py index 7078e15d6..36b1887d2 100644 --- a/package/connectors/generated/servicenow.py +++ b/package/connectors/generated/servicenow.py @@ -40,6 +40,7 @@ def __init__( services="*", retry_count=3, concurrent_downloads=10, + **kwargs ): configuration = ServiceNowDataSource.get_default_configuration() @@ -52,7 +53,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=ServiceNowDataSource(connector_configuration)) + super().__init__( + data_provider=ServiceNowDataSource(connector_configuration), **kwargs + ) self.url = url self.username = username diff --git a/package/connectors/generated/sharepoint_online.py b/package/connectors/generated/sharepoint_online.py index fce44b209..fa8a02025 100644 --- a/package/connectors/generated/sharepoint_online.py +++ b/package/connectors/generated/sharepoint_online.py @@ -61,6 +61,7 @@ def __init__( fetch_unique_page_permissions=True, fetch_unique_list_permissions=True, fetch_unique_list_item_permissions=True, + **kwargs ): configuration = SharepointOnlineDataSource.get_default_configuration() @@ -74,7 +75,7 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) super().__init__( - data_provider=SharepointOnlineDataSource(connector_configuration) + data_provider=SharepointOnlineDataSource(connector_configuration), **kwargs ) self.tenant_id = tenant_id diff --git a/package/connectors/generated/sharepoint_server.py b/package/connectors/generated/sharepoint_server.py index 8f2f41ac1..3c8754cd9 100644 --- a/package/connectors/generated/sharepoint_server.py +++ b/package/connectors/generated/sharepoint_server.py @@ -50,6 +50,7 @@ def __init__( retry_count=3, fetch_unique_list_permissions=True, fetch_unique_list_item_permissions=True, + **kwargs ): configuration = SharepointServerDataSource.get_default_configuration() @@ -63,7 +64,7 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) super().__init__( - data_provider=SharepointServerDataSource(connector_configuration) + data_provider=SharepointServerDataSource(connector_configuration), **kwargs ) self.username = username diff --git a/package/connectors/generated/slack.py b/package/connectors/generated/slack.py index 8e7d4fec1..1876c9e3d 100644 --- a/package/connectors/generated/slack.py +++ b/package/connectors/generated/slack.py @@ -37,6 +37,7 @@ def __init__( fetch_last_n_days=None, auto_join_channels=False, sync_users=True, + **kwargs ): configuration = SlackDataSource.get_default_configuration() @@ -49,7 +50,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=SlackDataSource(connector_configuration)) + super().__init__( + data_provider=SlackDataSource(connector_configuration), **kwargs + ) self.token = token self.fetch_last_n_days = fetch_last_n_days diff --git a/package/connectors/generated/zoom.py b/package/connectors/generated/zoom.py index 69700839f..0cffe84e0 100644 --- a/package/connectors/generated/zoom.py +++ b/package/connectors/generated/zoom.py @@ -38,6 +38,7 @@ def __init__( client_secret=None, fetch_past_meeting_details=False, recording_age=None, + **kwargs ): configuration = ZoomDataSource.get_default_configuration() @@ -50,7 +51,9 @@ def __init__( connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider=ZoomDataSource(connector_configuration)) + super().__init__( + data_provider=ZoomDataSource(connector_configuration), **kwargs + ) self.account_id = account_id self.client_id = client_id diff --git a/package/setup.py b/package/setup.py new file mode 100644 index 000000000..c5a067c42 --- /dev/null +++ b/package/setup.py @@ -0,0 +1,44 @@ +import os +import sys +from setuptools import find_packages, setup + +if sys.version_info < (3, 10): + msg = "Requires Python 3.10 or higher." + raise ValueError(msg) + +__version__ = "0.1.0" # Define your version here or import from a version file + + +def read_reqs(req_file): + with open(req_file) as f: + return [line.strip() for line in f if line and not line.startswith("#")] + + +install_requires = read_reqs("../framework.txt") + +with open("../README.md") as f: + long_description = f.read() + +classifiers = [ + "Programming Language :: Python", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3 :: Only", +] + +setup( + name="elastic_connectors", + version=__version__, + packages=find_packages(where="../package"), + package_dir={"": "../package"}, + description=( + "Connectors developed by Elastic to sync data from 3rd party sources." + ), + long_description=long_description, + long_description_content_type="text/markdown", + author="Ingest Team", + author_email="your-email@elastic.co", + include_package_data=True, + zip_safe=False, + classifiers=classifiers, + install_requires=install_requires, +) diff --git a/requirements/package-dev.txt b/requirements/package-dev.txt index 97231eaa3..86bd7078f 100644 --- a/requirements/package-dev.txt +++ b/requirements/package-dev.txt @@ -1,2 +1,3 @@ Jinja2==3.1.4 black==24.4.2 +tika==2.6.0 diff --git a/scripts/codegen/generate_connectors_init.py b/scripts/codegen/generate_connectors_init.py index 697d2a71b..4eef933f3 100644 --- a/scripts/codegen/generate_connectors_init.py +++ b/scripts/codegen/generate_connectors_init.py @@ -3,27 +3,33 @@ from black import format_file_in_place, FileMode, WriteBack, NothingChanged from pathlib import Path -def generate_init_file(output_dir, generated_dir, template_env): - init_file_path = os.path.join(output_dir, '__init__.py') +from connectors.config import _default_config + + +def generate_init_file(output_dir, template_env): + init_file_path = os.path.join(output_dir, "__init__.py") imports = [] - for filename in os.listdir(generated_dir): - if filename.endswith('.py') and filename != '__init__.py': - module_name = filename[:-3] - class_name = ''.join(word.title() for word in module_name.split('_')) + 'Connector' - imports.append((module_name, class_name)) + connectors_config = _default_config() + data_source_classes = connectors_config["sources"] + + for module, module_path in data_source_classes.items(): + _, class_name = module_path.split(":") + class_name = class_name.replace("DataSource", "Connector") + imports.append((module, class_name)) template = template_env.get_template("init_template.jinja2") init_code = template.render(imports=imports) - with open(init_file_path, 'w') as init_file: + with open(init_file_path, "w") as init_file: init_file.write(init_code) - format_file_in_place( Path(init_file_path), fast=False, mode=FileMode(), write_back=WriteBack.YES) + format_file_in_place( + Path(init_file_path), fast=False, mode=FileMode(), write_back=WriteBack.YES + ) if __name__ == "__main__": connectors_dir = os.path.join("package", "connectors") - generated_dir = os.path.join(connectors_dir, "generated") env = Environment(loader=FileSystemLoader("scripts/codegen/templates")) - generate_init_file(connectors_dir, generated_dir, env) + generate_init_file(connectors_dir, env) diff --git a/scripts/codegen/templates/connector_template.jinja2 b/scripts/codegen/templates/connector_template.jinja2 index 494536efc..04bfbfaa6 100644 --- a/scripts/codegen/templates/connector_template.jinja2 +++ b/scripts/codegen/templates/connector_template.jinja2 @@ -20,7 +20,7 @@ class {{ class_name }}(ConnectorBase): - {{ config[param].tooltip }}{% endif %} {% endfor %} """ - def __init__(self, {% for param, value in params %}{{ param }}={% if value is none %}None{% elif value is string %}'{{ value }}'{% else %}{{ value }}{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}): + def __init__(self, {% for param, value in params %}{{ param }}={% if value is none %}None{% elif value is string %}'{{ value }}'{% else %}{{ value }}{% endif %}{% if not loop.last %}, {% endif %}{% endfor %}, **kwargs): configuration = {{ data_source_class }}.get_default_configuration() @@ -32,7 +32,7 @@ class {{ class_name }}(ConnectorBase): connector_configuration = DataSourceConfiguration(configuration) - super().__init__(data_provider={{ data_source_class }}(connector_configuration)) + super().__init__(data_provider={{ data_source_class }}(connector_configuration), **kwargs) {% for key in config.keys() %}self.{{ key }} = {{ key }} {% endfor %} From 7c748d8367c1c97a58087369c860edcf29fbe74d Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Wed, 10 Jul 2024 17:27:26 +0200 Subject: [PATCH 4/8] WIP --- package/__init__.py | 66 +++++++++++++ package/connectors/__init__.py | 66 ------------- package/connectors/generated/dir.py | 2 +- package/setup.py | 95 +++++++++++++------ scripts/codegen/generate_connectors_init.py | 4 +- .../codegen/templates/init_template.jinja2 | 2 +- 6 files changed, 135 insertions(+), 100 deletions(-) diff --git a/package/__init__.py b/package/__init__.py index e69de29bb..7a2106e12 100644 --- a/package/__init__.py +++ b/package/__init__.py @@ -0,0 +1,66 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# +# +# This is a generated code. Do not modify directly. +# Run `make generate_connector_package` to update. + + +from .connectors.generated.azure_blob_storage import AzureBlobStorageConnector + +from .connectors.generated.box import BoxConnector + +from .connectors.generated.confluence import ConfluenceConnector + +from .connectors.generated.dir import DirectoryConnector + +from .connectors.generated.dropbox import DropboxConnector + +from .connectors.generated.github import GitHubConnector + +from .connectors.generated.gmail import GMailConnector + +from .connectors.generated.google_cloud_storage import GoogleCloudStorageConnector + +from .connectors.generated.google_drive import GoogleDriveConnector + +from .connectors.generated.graphql import GraphQLConnector + +from .connectors.generated.jira import JiraConnector + +from .connectors.generated.microsoft_teams import MicrosoftTeamsConnector + +from .connectors.generated.mongodb import MongoConnector + +from .connectors.generated.mssql import MSSQLConnector + +from .connectors.generated.mysql import MySqlConnector + +from .connectors.generated.network_drive import NASConnector + +from .connectors.generated.notion import NotionConnector + +from .connectors.generated.onedrive import OneDriveConnector + +from .connectors.generated.oracle import OracleConnector + +from .connectors.generated.outlook import OutlookConnector + +from .connectors.generated.postgresql import PostgreSQLConnector + +from .connectors.generated.redis import RedisConnector + +from .connectors.generated.s3 import S3Connector + +from .connectors.generated.salesforce import SalesforceConnector + +from .connectors.generated.servicenow import ServiceNowConnector + +from .connectors.generated.sharepoint_online import SharepointOnlineConnector + +from .connectors.generated.sharepoint_server import SharepointServerConnector + +from .connectors.generated.slack import SlackConnector + +from .connectors.generated.zoom import ZoomConnector diff --git a/package/connectors/__init__.py b/package/connectors/__init__.py index bfb84ca38..e69de29bb 100644 --- a/package/connectors/__init__.py +++ b/package/connectors/__init__.py @@ -1,66 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License 2.0; -# you may not use this file except in compliance with the Elastic License 2.0. -# -# -# This is a generated code. Do not modify directly. -# Run `make generate_connector_package` to update. - - -from .generated.azure_blob_storage import AzureBlobStorageConnector - -from .generated.box import BoxConnector - -from .generated.confluence import ConfluenceConnector - -from .generated.dir import DirectoryConnector - -from .generated.dropbox import DropboxConnector - -from .generated.github import GitHubConnector - -from .generated.gmail import GMailConnector - -from .generated.google_cloud_storage import GoogleCloudStorageConnector - -from .generated.google_drive import GoogleDriveConnector - -from .generated.graphql import GraphQLConnector - -from .generated.jira import JiraConnector - -from .generated.microsoft_teams import MicrosoftTeamsConnector - -from .generated.mongodb import MongoConnector - -from .generated.mssql import MSSQLConnector - -from .generated.mysql import MySqlConnector - -from .generated.network_drive import NASConnector - -from .generated.notion import NotionConnector - -from .generated.onedrive import OneDriveConnector - -from .generated.oracle import OracleConnector - -from .generated.outlook import OutlookConnector - -from .generated.postgresql import PostgreSQLConnector - -from .generated.redis import RedisConnector - -from .generated.s3 import S3Connector - -from .generated.salesforce import SalesforceConnector - -from .generated.servicenow import ServiceNowConnector - -from .generated.sharepoint_online import SharepointOnlineConnector - -from .generated.sharepoint_server import SharepointServerConnector - -from .generated.slack import SlackConnector - -from .generated.zoom import ZoomConnector diff --git a/package/connectors/generated/dir.py b/package/connectors/generated/dir.py index 1eaffa689..fcc73c374 100644 --- a/package/connectors/generated/dir.py +++ b/package/connectors/generated/dir.py @@ -25,7 +25,7 @@ class DirectoryConnector(ConnectorBase): def __init__( self, - directory="/Users/jedr/connectors/connectors/sources", + directory="/Users/jedr/connectors/lib/python3.10/site-packages/connectors/sources", pattern="**/*.*", **kwargs ): diff --git a/package/setup.py b/package/setup.py index c5a067c42..2b6a7903f 100644 --- a/package/setup.py +++ b/package/setup.py @@ -1,44 +1,79 @@ import os -import sys -from setuptools import find_packages, setup +from setuptools import setup, find_packages -if sys.version_info < (3, 10): - msg = "Requires Python 3.10 or higher." - raise ValueError(msg) +from setuptools._vendor.packaging.markers import Marker -__version__ = "0.1.0" # Define your version here or import from a version file +try: + ARCH = os.uname().machine +except Exception as e: + ARCH = "x86_64" + print( # noqa: T201 + f"Defaulting to architecture '{ARCH}'. Unable to determine machine architecture due to error: {e}" + ) + + +# We feed install_requires with `requirements.txt` but we unpin versions so we +# don't enforce them and trap folks into dependency hell. (only works with `==` here) +# +# A proper production installation will do the following sequence: +# +# $ pip install -r requirements/`uname -n`.txt +# $ pip install elasticsearch-connectors +# +# Because the *pinned* dependencies is what we tested +# + + +def extract_req(req): + req = req.strip().split(";") + if len(req) > 1: + env_marker = req[-1].strip() + marker = Marker(env_marker) + if not marker.evaluate(): + return None + req = req[0] + req = req.split("=") + return req[0] def read_reqs(req_file): - with open(req_file) as f: - return [line.strip() for line in f if line and not line.startswith("#")] + deps = [] + reqs_dir, __ = os.path.split(req_file) + with open(req_file) as f: + reqs = f.readlines() + for req in reqs: + req = req.strip() + if req == "" or req.startswith("#"): + continue + if req.startswith("-r"): + subreq_file = req.split("-r")[-1].strip() + subreq_file = os.path.join(reqs_dir, subreq_file) + for subreq in read_reqs(subreq_file): + dep = extract_req(subreq) + if dep is not None and dep not in deps: + deps.append(dep) + else: + dep = extract_req(req) + if dep is not None and dep not in deps: + deps.append(dep) + return deps -install_requires = read_reqs("../framework.txt") -with open("../README.md") as f: - long_description = f.read() +install_requires = read_reqs( + os.path.join(os.path.dirname(__file__), f"../requirements/{ARCH}.txt") +) -classifiers = [ - "Programming Language :: Python", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3 :: Only", -] +print(find_packages(include=["elastic_connectors", "elastic_connectors.*"])) setup( - name="elastic_connectors", - version=__version__, - packages=find_packages(where="../package"), - package_dir={"": "../package"}, - description=( - "Connectors developed by Elastic to sync data from 3rd party sources." - ), - long_description=long_description, - long_description_content_type="text/markdown", - author="Ingest Team", - author_email="your-email@elastic.co", - include_package_data=True, - zip_safe=False, - classifiers=classifiers, + name="test-elastic-connectors", + version="0.1.3", + packages=find_packages(), install_requires=install_requires, + include_package_data=True, + package_data={"elastic_connectors": ["../connectors/*"]}, + package_dir={ + "elastic_connectors": ".", + }, ) diff --git a/scripts/codegen/generate_connectors_init.py b/scripts/codegen/generate_connectors_init.py index 4eef933f3..2d26daa99 100644 --- a/scripts/codegen/generate_connectors_init.py +++ b/scripts/codegen/generate_connectors_init.py @@ -30,6 +30,6 @@ def generate_init_file(output_dir, template_env): if __name__ == "__main__": - connectors_dir = os.path.join("package", "connectors") + package_dir = os.path.join("package") env = Environment(loader=FileSystemLoader("scripts/codegen/templates")) - generate_init_file(connectors_dir, env) + generate_init_file(package_dir, env) diff --git a/scripts/codegen/templates/init_template.jinja2 b/scripts/codegen/templates/init_template.jinja2 index ba58662c8..d8f35f156 100644 --- a/scripts/codegen/templates/init_template.jinja2 +++ b/scripts/codegen/templates/init_template.jinja2 @@ -7,5 +7,5 @@ # Run `make generate_connector_package` to update. {% for module, class_name in imports %} -from .generated.{{ module }} import {{ class_name }} +from .connectors.generated.{{ module }} import {{ class_name }} {% endfor %} From ee0b3c2efd5bbe1cefa5147c367f790cd4325b83 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Thu, 11 Jul 2024 11:32:57 +0200 Subject: [PATCH 5/8] Publish pipeline works e2e --- package/README.md | 1 + package/__init__.py | 91 +++++++++++++------ package/{connectors => }/connector_base.py | 6 ++ package/connectors/generated/__init__.py | 0 package/{connectors => generated}/__init__.py | 0 .../generated/azure_blob_storage.py | 8 +- package/{connectors => }/generated/box.py | 6 +- .../{connectors => }/generated/confluence.py | 6 +- package/{connectors => }/generated/dir.py | 6 +- package/{connectors => }/generated/dropbox.py | 6 +- package/{connectors => }/generated/github.py | 6 +- package/{connectors => }/generated/gmail.py | 6 +- .../generated/google_cloud_storage.py | 8 +- .../generated/google_drive.py | 6 +- package/{connectors => }/generated/graphql.py | 6 +- package/{connectors => }/generated/jira.py | 6 +- .../generated/microsoft_teams.py | 8 +- package/{connectors => }/generated/mongodb.py | 6 +- package/{connectors => }/generated/mssql.py | 6 +- package/{connectors => }/generated/mysql.py | 6 +- .../generated/network_drive.py | 6 +- package/{connectors => }/generated/notion.py | 6 +- .../{connectors => }/generated/onedrive.py | 6 +- package/{connectors => }/generated/oracle.py | 6 +- package/{connectors => }/generated/outlook.py | 6 +- .../{connectors => }/generated/postgresql.py | 6 +- package/{connectors => }/generated/redis.py | 6 +- package/{connectors => }/generated/s3.py | 6 +- .../{connectors => }/generated/salesforce.py | 6 +- .../{connectors => }/generated/servicenow.py | 6 +- .../generated/sharepoint_online.py | 8 +- .../generated/sharepoint_server.py | 8 +- package/{connectors => }/generated/slack.py | 6 +- package/{connectors => }/generated/zoom.py | 6 +- package/setup.py | 55 ++++++----- requirements/package-dev.txt | 1 - requirements/package.txt | 1 + .../codegen/generate_connectors.py | 6 +- .../codegen/generate_connectors_init.py | 2 +- .../templates/connector_template.jinja2 | 6 +- .../codegen/templates/init_template.jinja2 | 4 +- scripts/package/manage_package.sh | 53 +++++++++++ scripts/package/update_imports.py | 37 ++++++++ 43 files changed, 300 insertions(+), 147 deletions(-) create mode 100644 package/README.md rename package/{connectors => }/connector_base.py (94%) delete mode 100644 package/connectors/generated/__init__.py rename package/{connectors => generated}/__init__.py (100%) rename package/{connectors => }/generated/azure_blob_storage.py (88%) rename package/{connectors => }/generated/box.py (89%) rename package/{connectors => }/generated/confluence.py (93%) rename package/{connectors => }/generated/dir.py (86%) rename package/{connectors => }/generated/dropbox.py (91%) rename package/{connectors => }/generated/github.py (92%) rename package/{connectors => }/generated/gmail.py (89%) rename package/{connectors => }/generated/google_cloud_storage.py (84%) rename package/{connectors => }/generated/google_drive.py (95%) rename package/{connectors => }/generated/graphql.py (94%) rename package/{connectors => }/generated/jira.py (93%) rename package/{connectors => }/generated/microsoft_teams.py (86%) rename package/{connectors => }/generated/mongodb.py (92%) rename package/{connectors => }/generated/mssql.py (91%) rename package/{connectors => }/generated/mysql.py (90%) rename package/{connectors => }/generated/network_drive.py (89%) rename package/{connectors => }/generated/notion.py (89%) rename package/{connectors => }/generated/onedrive.py (88%) rename package/{connectors => }/generated/oracle.py (92%) rename package/{connectors => }/generated/outlook.py (92%) rename package/{connectors => }/generated/postgresql.py (90%) rename package/{connectors => }/generated/redis.py (93%) rename package/{connectors => }/generated/s3.py (90%) rename package/{connectors => }/generated/salesforce.py (88%) rename package/{connectors => }/generated/servicenow.py (89%) rename package/{connectors => }/generated/sharepoint_online.py (94%) rename package/{connectors => }/generated/sharepoint_server.py (91%) rename package/{connectors => }/generated/slack.py (91%) rename package/{connectors => }/generated/zoom.py (90%) create mode 100644 requirements/package.txt rename scripts/{ => package}/codegen/generate_connectors.py (92%) rename scripts/{ => package}/codegen/generate_connectors_init.py (92%) rename scripts/{ => package}/codegen/templates/connector_template.jinja2 (87%) rename scripts/{ => package}/codegen/templates/init_template.jinja2 (70%) create mode 100755 scripts/package/manage_package.sh create mode 100644 scripts/package/update_imports.py diff --git a/package/README.md b/package/README.md new file mode 100644 index 000000000..2089e6211 --- /dev/null +++ b/package/README.md @@ -0,0 +1 @@ +# elastic-conn-packages diff --git a/package/__init__.py b/package/__init__.py index 7a2106e12..50f8441df 100644 --- a/package/__init__.py +++ b/package/__init__.py @@ -7,60 +7,93 @@ # Run `make generate_connector_package` to update. -from .connectors.generated.azure_blob_storage import AzureBlobStorageConnector +from .generated.azure_blob_storage import AzureBlobStorageConnector -from .connectors.generated.box import BoxConnector +from .generated.box import BoxConnector -from .connectors.generated.confluence import ConfluenceConnector +from .generated.confluence import ConfluenceConnector -from .connectors.generated.dir import DirectoryConnector +from .generated.dir import DirectoryConnector -from .connectors.generated.dropbox import DropboxConnector +from .generated.dropbox import DropboxConnector -from .connectors.generated.github import GitHubConnector +from .generated.github import GitHubConnector -from .connectors.generated.gmail import GMailConnector +from .generated.gmail import GMailConnector -from .connectors.generated.google_cloud_storage import GoogleCloudStorageConnector +from .generated.google_cloud_storage import GoogleCloudStorageConnector -from .connectors.generated.google_drive import GoogleDriveConnector +from .generated.google_drive import GoogleDriveConnector -from .connectors.generated.graphql import GraphQLConnector +from .generated.graphql import GraphQLConnector -from .connectors.generated.jira import JiraConnector +from .generated.jira import JiraConnector -from .connectors.generated.microsoft_teams import MicrosoftTeamsConnector +from .generated.microsoft_teams import MicrosoftTeamsConnector -from .connectors.generated.mongodb import MongoConnector +from .generated.mongodb import MongoConnector -from .connectors.generated.mssql import MSSQLConnector +from .generated.mssql import MSSQLConnector -from .connectors.generated.mysql import MySqlConnector +from .generated.mysql import MySqlConnector -from .connectors.generated.network_drive import NASConnector +from .generated.network_drive import NASConnector -from .connectors.generated.notion import NotionConnector +from .generated.notion import NotionConnector -from .connectors.generated.onedrive import OneDriveConnector +from .generated.onedrive import OneDriveConnector -from .connectors.generated.oracle import OracleConnector +from .generated.oracle import OracleConnector -from .connectors.generated.outlook import OutlookConnector +from .generated.outlook import OutlookConnector -from .connectors.generated.postgresql import PostgreSQLConnector +from .generated.postgresql import PostgreSQLConnector -from .connectors.generated.redis import RedisConnector +from .generated.redis import RedisConnector -from .connectors.generated.s3 import S3Connector +from .generated.s3 import S3Connector -from .connectors.generated.salesforce import SalesforceConnector +from .generated.salesforce import SalesforceConnector -from .connectors.generated.servicenow import ServiceNowConnector +from .generated.servicenow import ServiceNowConnector -from .connectors.generated.sharepoint_online import SharepointOnlineConnector +from .generated.sharepoint_online import SharepointOnlineConnector -from .connectors.generated.sharepoint_server import SharepointServerConnector +from .generated.sharepoint_server import SharepointServerConnector -from .connectors.generated.slack import SlackConnector +from .generated.slack import SlackConnector -from .connectors.generated.zoom import ZoomConnector +from .generated.zoom import ZoomConnector + + +__all__ = [ + "AzureBlobStorageConnector", + "BoxConnector", + "ConfluenceConnector", + "DirectoryConnector", + "DropboxConnector", + "GitHubConnector", + "GMailConnector", + "GoogleCloudStorageConnector", + "GoogleDriveConnector", + "GraphQLConnector", + "JiraConnector", + "MicrosoftTeamsConnector", + "MongoConnector", + "MSSQLConnector", + "MySqlConnector", + "NASConnector", + "NotionConnector", + "OneDriveConnector", + "OracleConnector", + "OutlookConnector", + "PostgreSQLConnector", + "RedisConnector", + "S3Connector", + "SalesforceConnector", + "ServiceNowConnector", + "SharepointOnlineConnector", + "SharepointServerConnector", + "SlackConnector", + "ZoomConnector", +] diff --git a/package/connectors/connector_base.py b/package/connector_base.py similarity index 94% rename from package/connectors/connector_base.py rename to package/connector_base.py index d63af6475..b0cc50255 100644 --- a/package/connectors/connector_base.py +++ b/package/connector_base.py @@ -1,3 +1,9 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# + import base64 import logging from typing import AsyncIterator, Dict diff --git a/package/connectors/generated/__init__.py b/package/connectors/generated/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/package/connectors/__init__.py b/package/generated/__init__.py similarity index 100% rename from package/connectors/__init__.py rename to package/generated/__init__.py diff --git a/package/connectors/generated/azure_blob_storage.py b/package/generated/azure_blob_storage.py similarity index 88% rename from package/connectors/generated/azure_blob_storage.py rename to package/generated/azure_blob_storage.py index 201fc272b..427f305f7 100644 --- a/package/connectors/generated/azure_blob_storage.py +++ b/package/generated/azure_blob_storage.py @@ -6,9 +6,11 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.azure_blob_storage import AzureBlobStorageDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.azure_blob_storage import ( + AzureBlobStorageDataSource, +) +from elastic_connectors.connector_base import ConnectorBase class AzureBlobStorageConnector(ConnectorBase): diff --git a/package/connectors/generated/box.py b/package/generated/box.py similarity index 89% rename from package/connectors/generated/box.py rename to package/generated/box.py index 48a82aee4..84cb8c064 100644 --- a/package/connectors/generated/box.py +++ b/package/generated/box.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.box import BoxDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.box import BoxDataSource +from elastic_connectors.connector_base import ConnectorBase class BoxConnector(ConnectorBase): diff --git a/package/connectors/generated/confluence.py b/package/generated/confluence.py similarity index 93% rename from package/connectors/generated/confluence.py rename to package/generated/confluence.py index 2ae5eca1a..e3e4acdd2 100644 --- a/package/connectors/generated/confluence.py +++ b/package/generated/confluence.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.confluence import ConfluenceDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.confluence import ConfluenceDataSource +from elastic_connectors.connector_base import ConnectorBase class ConfluenceConnector(ConnectorBase): diff --git a/package/connectors/generated/dir.py b/package/generated/dir.py similarity index 86% rename from package/connectors/generated/dir.py rename to package/generated/dir.py index fcc73c374..d067f11e6 100644 --- a/package/connectors/generated/dir.py +++ b/package/generated/dir.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.directory import DirectoryDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.directory import DirectoryDataSource +from elastic_connectors.connector_base import ConnectorBase class DirectoryConnector(ConnectorBase): diff --git a/package/connectors/generated/dropbox.py b/package/generated/dropbox.py similarity index 91% rename from package/connectors/generated/dropbox.py rename to package/generated/dropbox.py index 04eaf7329..6a1381799 100644 --- a/package/connectors/generated/dropbox.py +++ b/package/generated/dropbox.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.dropbox import DropboxDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.dropbox import DropboxDataSource +from elastic_connectors.connector_base import ConnectorBase class DropboxConnector(ConnectorBase): diff --git a/package/connectors/generated/github.py b/package/generated/github.py similarity index 92% rename from package/connectors/generated/github.py rename to package/generated/github.py index 2b2cb40e0..95d176e02 100644 --- a/package/connectors/generated/github.py +++ b/package/generated/github.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.github import GitHubDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.github import GitHubDataSource +from elastic_connectors.connector_base import ConnectorBase class GitHubConnector(ConnectorBase): diff --git a/package/connectors/generated/gmail.py b/package/generated/gmail.py similarity index 89% rename from package/connectors/generated/gmail.py rename to package/generated/gmail.py index 3a25159e8..9b99c67b9 100644 --- a/package/connectors/generated/gmail.py +++ b/package/generated/gmail.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.gmail import GMailDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.gmail import GMailDataSource +from elastic_connectors.connector_base import ConnectorBase class GMailConnector(ConnectorBase): diff --git a/package/connectors/generated/google_cloud_storage.py b/package/generated/google_cloud_storage.py similarity index 84% rename from package/connectors/generated/google_cloud_storage.py rename to package/generated/google_cloud_storage.py index 609c3642b..04d65b6bf 100644 --- a/package/connectors/generated/google_cloud_storage.py +++ b/package/generated/google_cloud_storage.py @@ -6,9 +6,11 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.google_cloud_storage import GoogleCloudStorageDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.google_cloud_storage import ( + GoogleCloudStorageDataSource, +) +from elastic_connectors.connector_base import ConnectorBase class GoogleCloudStorageConnector(ConnectorBase): diff --git a/package/connectors/generated/google_drive.py b/package/generated/google_drive.py similarity index 95% rename from package/connectors/generated/google_drive.py rename to package/generated/google_drive.py index 96fcdd2df..34ff05ade 100644 --- a/package/connectors/generated/google_drive.py +++ b/package/generated/google_drive.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.google_drive import GoogleDriveDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.google_drive import GoogleDriveDataSource +from elastic_connectors.connector_base import ConnectorBase class GoogleDriveConnector(ConnectorBase): diff --git a/package/connectors/generated/graphql.py b/package/generated/graphql.py similarity index 94% rename from package/connectors/generated/graphql.py rename to package/generated/graphql.py index 5679438d1..d05ced99a 100644 --- a/package/connectors/generated/graphql.py +++ b/package/generated/graphql.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.graphql import GraphQLDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.graphql import GraphQLDataSource +from elastic_connectors.connector_base import ConnectorBase class GraphQLConnector(ConnectorBase): diff --git a/package/connectors/generated/jira.py b/package/generated/jira.py similarity index 93% rename from package/connectors/generated/jira.py rename to package/generated/jira.py index bfbab7d63..eb60c5eb6 100644 --- a/package/connectors/generated/jira.py +++ b/package/generated/jira.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.jira import JiraDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.jira import JiraDataSource +from elastic_connectors.connector_base import ConnectorBase class JiraConnector(ConnectorBase): diff --git a/package/connectors/generated/microsoft_teams.py b/package/generated/microsoft_teams.py similarity index 86% rename from package/connectors/generated/microsoft_teams.py rename to package/generated/microsoft_teams.py index f18af063f..86ec913af 100644 --- a/package/connectors/generated/microsoft_teams.py +++ b/package/generated/microsoft_teams.py @@ -6,9 +6,11 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.microsoft_teams import MicrosoftTeamsDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.microsoft_teams import ( + MicrosoftTeamsDataSource, +) +from elastic_connectors.connector_base import ConnectorBase class MicrosoftTeamsConnector(ConnectorBase): diff --git a/package/connectors/generated/mongodb.py b/package/generated/mongodb.py similarity index 92% rename from package/connectors/generated/mongodb.py rename to package/generated/mongodb.py index b22d63f1b..dca8f9f9a 100644 --- a/package/connectors/generated/mongodb.py +++ b/package/generated/mongodb.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.mongo import MongoDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.mongo import MongoDataSource +from elastic_connectors.connector_base import ConnectorBase class MongoConnector(ConnectorBase): diff --git a/package/connectors/generated/mssql.py b/package/generated/mssql.py similarity index 91% rename from package/connectors/generated/mssql.py rename to package/generated/mssql.py index 01ac5e2b4..29e34628c 100644 --- a/package/connectors/generated/mssql.py +++ b/package/generated/mssql.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.mssql import MSSQLDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.mssql import MSSQLDataSource +from elastic_connectors.connector_base import ConnectorBase class MSSQLConnector(ConnectorBase): diff --git a/package/connectors/generated/mysql.py b/package/generated/mysql.py similarity index 90% rename from package/connectors/generated/mysql.py rename to package/generated/mysql.py index c18a27123..4cb43c3ff 100644 --- a/package/connectors/generated/mysql.py +++ b/package/generated/mysql.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.mysql import MySqlDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.mysql import MySqlDataSource +from elastic_connectors.connector_base import ConnectorBase class MySqlConnector(ConnectorBase): diff --git a/package/connectors/generated/network_drive.py b/package/generated/network_drive.py similarity index 89% rename from package/connectors/generated/network_drive.py rename to package/generated/network_drive.py index 3f46ad1cc..fd48a9bd1 100644 --- a/package/connectors/generated/network_drive.py +++ b/package/generated/network_drive.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.network_drive import NASDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.network_drive import NASDataSource +from elastic_connectors.connector_base import ConnectorBase class NASConnector(ConnectorBase): diff --git a/package/connectors/generated/notion.py b/package/generated/notion.py similarity index 89% rename from package/connectors/generated/notion.py rename to package/generated/notion.py index a85418a18..1ac6b26cf 100644 --- a/package/connectors/generated/notion.py +++ b/package/generated/notion.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.notion import NotionDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.notion import NotionDataSource +from elastic_connectors.connector_base import ConnectorBase class NotionConnector(ConnectorBase): diff --git a/package/connectors/generated/onedrive.py b/package/generated/onedrive.py similarity index 88% rename from package/connectors/generated/onedrive.py rename to package/generated/onedrive.py index 4c1a18d2d..92a5bdfba 100644 --- a/package/connectors/generated/onedrive.py +++ b/package/generated/onedrive.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.onedrive import OneDriveDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.onedrive import OneDriveDataSource +from elastic_connectors.connector_base import ConnectorBase class OneDriveConnector(ConnectorBase): diff --git a/package/connectors/generated/oracle.py b/package/generated/oracle.py similarity index 92% rename from package/connectors/generated/oracle.py rename to package/generated/oracle.py index a7e884b5f..70a6bd799 100644 --- a/package/connectors/generated/oracle.py +++ b/package/generated/oracle.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.oracle import OracleDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.oracle import OracleDataSource +from elastic_connectors.connector_base import ConnectorBase class OracleConnector(ConnectorBase): diff --git a/package/connectors/generated/outlook.py b/package/generated/outlook.py similarity index 92% rename from package/connectors/generated/outlook.py rename to package/generated/outlook.py index b5a62bb11..4ba0cec9d 100644 --- a/package/connectors/generated/outlook.py +++ b/package/generated/outlook.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.outlook import OutlookDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.outlook import OutlookDataSource +from elastic_connectors.connector_base import ConnectorBase class OutlookConnector(ConnectorBase): diff --git a/package/connectors/generated/postgresql.py b/package/generated/postgresql.py similarity index 90% rename from package/connectors/generated/postgresql.py rename to package/generated/postgresql.py index fdfdeff99..f5c7f1183 100644 --- a/package/connectors/generated/postgresql.py +++ b/package/generated/postgresql.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.postgresql import PostgreSQLDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.postgresql import PostgreSQLDataSource +from elastic_connectors.connector_base import ConnectorBase class PostgreSQLConnector(ConnectorBase): diff --git a/package/connectors/generated/redis.py b/package/generated/redis.py similarity index 93% rename from package/connectors/generated/redis.py rename to package/generated/redis.py index 8fa43914a..58fb4ecb4 100644 --- a/package/connectors/generated/redis.py +++ b/package/generated/redis.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.redis import RedisDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.redis import RedisDataSource +from elastic_connectors.connector_base import ConnectorBase class RedisConnector(ConnectorBase): diff --git a/package/connectors/generated/s3.py b/package/generated/s3.py similarity index 90% rename from package/connectors/generated/s3.py rename to package/generated/s3.py index c7e9b94fa..4e0d650c8 100644 --- a/package/connectors/generated/s3.py +++ b/package/generated/s3.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.s3 import S3DataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.s3 import S3DataSource +from elastic_connectors.connector_base import ConnectorBase class S3Connector(ConnectorBase): diff --git a/package/connectors/generated/salesforce.py b/package/generated/salesforce.py similarity index 88% rename from package/connectors/generated/salesforce.py rename to package/generated/salesforce.py index 6e8b01a3b..dfa6fbbd2 100644 --- a/package/connectors/generated/salesforce.py +++ b/package/generated/salesforce.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.salesforce import SalesforceDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.salesforce import SalesforceDataSource +from elastic_connectors.connector_base import ConnectorBase class SalesforceConnector(ConnectorBase): diff --git a/package/connectors/generated/servicenow.py b/package/generated/servicenow.py similarity index 89% rename from package/connectors/generated/servicenow.py rename to package/generated/servicenow.py index 36b1887d2..20ba269e5 100644 --- a/package/connectors/generated/servicenow.py +++ b/package/generated/servicenow.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.servicenow import ServiceNowDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.servicenow import ServiceNowDataSource +from elastic_connectors.connector_base import ConnectorBase class ServiceNowConnector(ConnectorBase): diff --git a/package/connectors/generated/sharepoint_online.py b/package/generated/sharepoint_online.py similarity index 94% rename from package/connectors/generated/sharepoint_online.py rename to package/generated/sharepoint_online.py index fa8a02025..de089c384 100644 --- a/package/connectors/generated/sharepoint_online.py +++ b/package/generated/sharepoint_online.py @@ -6,9 +6,11 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.sharepoint_online import SharepointOnlineDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.sharepoint_online import ( + SharepointOnlineDataSource, +) +from elastic_connectors.connector_base import ConnectorBase class SharepointOnlineConnector(ConnectorBase): diff --git a/package/connectors/generated/sharepoint_server.py b/package/generated/sharepoint_server.py similarity index 91% rename from package/connectors/generated/sharepoint_server.py rename to package/generated/sharepoint_server.py index 3c8754cd9..ae9b620ba 100644 --- a/package/connectors/generated/sharepoint_server.py +++ b/package/generated/sharepoint_server.py @@ -6,9 +6,11 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.sharepoint_server import SharepointServerDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.sharepoint_server import ( + SharepointServerDataSource, +) +from elastic_connectors.connector_base import ConnectorBase class SharepointServerConnector(ConnectorBase): diff --git a/package/connectors/generated/slack.py b/package/generated/slack.py similarity index 91% rename from package/connectors/generated/slack.py rename to package/generated/slack.py index 1876c9e3d..de22ec1e3 100644 --- a/package/connectors/generated/slack.py +++ b/package/generated/slack.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.slack import SlackDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.slack import SlackDataSource +from elastic_connectors.connector_base import ConnectorBase class SlackConnector(ConnectorBase): diff --git a/package/connectors/generated/zoom.py b/package/generated/zoom.py similarity index 90% rename from package/connectors/generated/zoom.py rename to package/generated/zoom.py index 0cffe84e0..c9fed55ac 100644 --- a/package/connectors/generated/zoom.py +++ b/package/generated/zoom.py @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from connectors.sources.zoom import ZoomDataSource -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.connectors.sources.zoom import ZoomDataSource +from elastic_connectors.connector_base import ConnectorBase class ZoomConnector(ConnectorBase): diff --git a/package/setup.py b/package/setup.py index 2b6a7903f..743217fc4 100644 --- a/package/setup.py +++ b/package/setup.py @@ -1,6 +1,12 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# + import os -from setuptools import setup, find_packages +from setuptools import find_packages, setup from setuptools._vendor.packaging.markers import Marker try: @@ -12,18 +18,6 @@ ) -# We feed install_requires with `requirements.txt` but we unpin versions so we -# don't enforce them and trap folks into dependency hell. (only works with `==` here) -# -# A proper production installation will do the following sequence: -# -# $ pip install -r requirements/`uname -n`.txt -# $ pip install elasticsearch-connectors -# -# Because the *pinned* dependencies is what we tested -# - - def extract_req(req): req = req.strip().split(";") if len(req) > 1: @@ -60,20 +54,37 @@ def read_reqs(req_file): return deps -install_requires = read_reqs( - os.path.join(os.path.dirname(__file__), f"../requirements/{ARCH}.txt") +framework_reqs = read_reqs( + os.path.join("elastic_connectors", "requirements", f"{ARCH}.txt") ) +package_reqs = read_reqs( + os.path.join("elastic_connectors", "requirements", "package.txt") +) + +install_requires = framework_reqs + package_reqs + -print(find_packages(include=["elastic_connectors", "elastic_connectors.*"])) +with open("README.md") as f: + long_description = f.read() setup( - name="test-elastic-connectors", - version="0.1.3", - packages=find_packages(), + name="elastic-connectors", + version="0.1.0", + description="Elastic connectors", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + author="Jedr Blaszyk", + author_email="jedr.blaszyk@elastic.co", + url="https://github.com/elastic/connectors", + packages=find_packages(include=["elastic_connectors", "elastic_connectors.*"]), install_requires=install_requires, include_package_data=True, - package_data={"elastic_connectors": ["../connectors/*"]}, - package_dir={ - "elastic_connectors": ".", + package_data={ + "elastic_connectors": ["connectors/VERSION"], }, + classifiers=[ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + ], + python_requires=">=3.10", ) diff --git a/requirements/package-dev.txt b/requirements/package-dev.txt index 86bd7078f..97231eaa3 100644 --- a/requirements/package-dev.txt +++ b/requirements/package-dev.txt @@ -1,3 +1,2 @@ Jinja2==3.1.4 black==24.4.2 -tika==2.6.0 diff --git a/requirements/package.txt b/requirements/package.txt new file mode 100644 index 000000000..c0e2d6499 --- /dev/null +++ b/requirements/package.txt @@ -0,0 +1 @@ +tika==2.6.0 diff --git a/scripts/codegen/generate_connectors.py b/scripts/package/codegen/generate_connectors.py similarity index 92% rename from scripts/codegen/generate_connectors.py rename to scripts/package/codegen/generate_connectors.py index f3bf7b977..df7e889b4 100644 --- a/scripts/codegen/generate_connectors.py +++ b/scripts/package/codegen/generate_connectors.py @@ -33,7 +33,7 @@ def generate_wrapper_class_code( template = template_env.get_template("connector_template.jinja2") class_code = template.render( - class_name=class_name.replace('DataSource', 'Connector'), + class_name=class_name.replace("DataSource", "Connector"), data_source_class=data_source_class.__name__, data_source_module=data_source_module, params=constructor_args, @@ -55,7 +55,7 @@ def generate_and_write_wrapper_classes(sources, output_dir): if not os.path.exists(output_dir): os.makedirs(output_dir) - env = Environment(loader=FileSystemLoader("scripts/codegen/templates")) + env = Environment(loader=FileSystemLoader("scripts/package/codegen/templates")) for key, value in sources.items(): module_name, class_name = value.split(":") @@ -73,5 +73,5 @@ def generate_and_write_wrapper_classes(sources, output_dir): if __name__ == "__main__": connectors_config = _default_config() data_source_classes = connectors_config["sources"] - output_dir = os.path.join("package", "connectors", "generated") + output_dir = os.path.join("package", "generated") generate_and_write_wrapper_classes(data_source_classes, output_dir) diff --git a/scripts/codegen/generate_connectors_init.py b/scripts/package/codegen/generate_connectors_init.py similarity index 92% rename from scripts/codegen/generate_connectors_init.py rename to scripts/package/codegen/generate_connectors_init.py index 2d26daa99..30fabc040 100644 --- a/scripts/codegen/generate_connectors_init.py +++ b/scripts/package/codegen/generate_connectors_init.py @@ -31,5 +31,5 @@ def generate_init_file(output_dir, template_env): if __name__ == "__main__": package_dir = os.path.join("package") - env = Environment(loader=FileSystemLoader("scripts/codegen/templates")) + env = Environment(loader=FileSystemLoader("scripts/package/codegen/templates")) generate_init_file(package_dir, env) diff --git a/scripts/codegen/templates/connector_template.jinja2 b/scripts/package/codegen/templates/connector_template.jinja2 similarity index 87% rename from scripts/codegen/templates/connector_template.jinja2 rename to scripts/package/codegen/templates/connector_template.jinja2 index 04bfbfaa6..af9201d57 100644 --- a/scripts/codegen/templates/connector_template.jinja2 +++ b/scripts/package/codegen/templates/connector_template.jinja2 @@ -6,9 +6,9 @@ # This is a generated code. Do not modify directly. # Run `make generate_connector_package` to update. -from connectors.source import DataSourceConfiguration -from {{ data_source_module }} import {{ data_source_class }} -from package.connectors.connector_base import ConnectorBase +from elastic_connectors.connectors.source import DataSourceConfiguration +from elastic_connectors.{{ data_source_module }} import {{ data_source_class }} +from elastic_connectors.connector_base import ConnectorBase class {{ class_name }}(ConnectorBase): """ diff --git a/scripts/codegen/templates/init_template.jinja2 b/scripts/package/codegen/templates/init_template.jinja2 similarity index 70% rename from scripts/codegen/templates/init_template.jinja2 rename to scripts/package/codegen/templates/init_template.jinja2 index d8f35f156..bf3d156b7 100644 --- a/scripts/codegen/templates/init_template.jinja2 +++ b/scripts/package/codegen/templates/init_template.jinja2 @@ -7,5 +7,7 @@ # Run `make generate_connector_package` to update. {% for module, class_name in imports %} -from .connectors.generated.{{ module }} import {{ class_name }} +from .generated.{{ module }} import {{ class_name }} {% endfor %} + +__all__ = [{% for module, class_name in imports %}'{{ class_name }}'{% if not loop.last %}, {% endif %}{% endfor %}] diff --git a/scripts/package/manage_package.sh b/scripts/package/manage_package.sh new file mode 100755 index 000000000..e13ba0bb6 --- /dev/null +++ b/scripts/package/manage_package.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +set -e + +function generate_connector_package_code() { + bin/python scripts/package/codegen/generate_connectors.py + bin/python scripts/package/codegen/generate_connectors_init.py +} + +function generate_connector_package() { + generate_connector_package_code + mkdir -p package/elastic_connectors + cp -r package/* package/elastic_connectors + rm -rf package/elastic_connectors/elastic_connectors + cp -r connectors requirements package/elastic_connectors + bin/python scripts/package/update_imports.py +} + +function clean_connector_package() { + cd package && rm -rf elastic_connectors build dist *.egg-info +} + +function build_connector_package() { + clean_connector_package + generate_connector_package + cd package && ../bin/python setup.py sdist bdist_wheel +} + +function publish_connector_package() { + build_connector_package + cd package && twine upload --repository testpypi dist/* +} + +case "$1" in + generate-connector-package-code) + generate_connector_package_code + ;; + generate-connector-package) + generate_connector_package + ;; + clean-connector-package) + clean_connector_package + ;; + build-connector-package) + build_connector_package + ;; + publish-connector-package) + publish_connector_package + ;; + *) + echo "Usage: $0 {generate-connector-package-code|generate-connector-package|clean-connector-package|build-connector-package|publish-connector-package}" + exit 1 +esac diff --git a/scripts/package/update_imports.py b/scripts/package/update_imports.py new file mode 100644 index 000000000..35a960d7b --- /dev/null +++ b/scripts/package/update_imports.py @@ -0,0 +1,37 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. +# + +import os +import re + + +def update_imports(directory, old_import, new_import): + for root, _, files in os.walk(directory): + for file in files: + if file.endswith(".py"): + file_path = os.path.join(root, file) + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Replace old import with new import + updated_content = re.sub( + r"\b" + old_import + r"\b", new_import, content + ) + + if content != updated_content: + with open(file_path, "w", encoding="utf-8") as f: + f.write(updated_content) + print(f"Updated imports in {file_path}") + + +if __name__ == "__main__": + # Update these paths and import strings as necessary + old_import = "from connectors" + new_import = "from elastic_connectors.connectors" + + update_imports( + os.path.join("package", "elastic_connectors"), old_import, new_import + ) From bb9736c54506198a98d58494d3da3e0610a8c758 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Thu, 11 Jul 2024 11:34:16 +0200 Subject: [PATCH 6/8] update makefile --- Makefile | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 5a70d8a41..641f7f625 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,25 @@ bin/package-dev: requirements/package-dev.txt bin/pip install -r requirements/$(ARCH).txt bin/pip install -r requirements/package-dev.txt -generate_connector_package: bin/package-dev - bin/python scripts/codegen/generate_connectors.py - bin/python scripts/codegen/generate_connectors_init.py +generate_connector_package_code: bin/package-dev + bin/python scripts/package/codegen/generate_connectors.py + bin/python scripts/package/codegen/generate_connectors_init.py + +# Move everything under `elastic_connectors` temporary folder +generate_connector_package: generate_connector_package_code + mkdir -p package/elastic_connectors + cp -r package/* package/elastic_connectors + rm -rf package/elastic_connectors/elastic_connectors + cp -r connectors requirements package/elastic_connectors + bin/python scripts/package/update_imports.py + +# Clean temporary folder and distribution files +clean_connector_package: + cd package && rm -rf elastic_connectors build dist *.egg-info + +# Build the connector package +build_connector_package: clean_connector_package generate_connector_package + cd package && ../bin/python setup.py sdist bdist_wheel + +publish_connector_package: build_connector_package + cd package && twine upload --repository testpypi dist/* From ed1b39022ca422258715308072773cedee5d235c Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Fri, 12 Jul 2024 08:24:25 +0200 Subject: [PATCH 7/8] Add docs, cleanup repo --- .gitignore | 4 + Makefile | 13 ++- package/README.md | 64 +++++++++++- package/connector_base.py | 4 +- package/docs/.pages | 4 + package/docs/README.md | 71 +++++++++++++ package/docs/generated.azure_blob_storage.md | 65 ++++++++++++ package/docs/generated.box.md | 65 ++++++++++++ package/docs/generated.confluence.md | 99 +++++++++++++++++++ package/docs/generated.dir.md | 49 +++++++++ package/docs/generated.dropbox.md | 71 +++++++++++++ package/docs/generated.github.md | 91 +++++++++++++++++ package/docs/generated.gmail.md | 60 +++++++++++ .../docs/generated.google_cloud_storage.md | 45 +++++++++ package/docs/generated.google_drive.md | 71 +++++++++++++ package/docs/generated.graphql.md | 96 ++++++++++++++++++ package/docs/generated.jira.md | 95 ++++++++++++++++++ package/docs/generated.microsoft_teams.md | 61 ++++++++++++ package/docs/generated.mongodb.md | 80 +++++++++++++++ package/docs/generated.mssql.md | 90 +++++++++++++++++ package/docs/generated.mysql.md | 81 +++++++++++++++ package/docs/generated.network_drive.md | 69 +++++++++++++ package/docs/generated.notion.md | 62 ++++++++++++ package/docs/generated.onedrive.md | 61 ++++++++++++ package/docs/generated.oracle.md | 94 ++++++++++++++++++ package/docs/generated.outlook.md | 88 +++++++++++++++++ package/docs/generated.postgresql.md | 86 ++++++++++++++++ package/docs/generated.redis.md | 82 +++++++++++++++ package/docs/generated.s3.md | 70 +++++++++++++ package/docs/generated.salesforce.md | 51 ++++++++++ package/docs/generated.servicenow.md | 66 +++++++++++++ package/docs/generated.sharepoint_online.md | 92 +++++++++++++++++ package/docs/generated.sharepoint_server.md | 79 +++++++++++++++ package/docs/generated.slack.md | 61 ++++++++++++ package/docs/generated.zoom.md | 63 ++++++++++++ package/setup.py | 2 +- requirements/package-dev.txt | 1 + 37 files changed, 2297 insertions(+), 9 deletions(-) create mode 100644 package/docs/.pages create mode 100644 package/docs/README.md create mode 100644 package/docs/generated.azure_blob_storage.md create mode 100644 package/docs/generated.box.md create mode 100644 package/docs/generated.confluence.md create mode 100644 package/docs/generated.dir.md create mode 100644 package/docs/generated.dropbox.md create mode 100644 package/docs/generated.github.md create mode 100644 package/docs/generated.gmail.md create mode 100644 package/docs/generated.google_cloud_storage.md create mode 100644 package/docs/generated.google_drive.md create mode 100644 package/docs/generated.graphql.md create mode 100644 package/docs/generated.jira.md create mode 100644 package/docs/generated.microsoft_teams.md create mode 100644 package/docs/generated.mongodb.md create mode 100644 package/docs/generated.mssql.md create mode 100644 package/docs/generated.mysql.md create mode 100644 package/docs/generated.network_drive.md create mode 100644 package/docs/generated.notion.md create mode 100644 package/docs/generated.onedrive.md create mode 100644 package/docs/generated.oracle.md create mode 100644 package/docs/generated.outlook.md create mode 100644 package/docs/generated.postgresql.md create mode 100644 package/docs/generated.redis.md create mode 100644 package/docs/generated.s3.md create mode 100644 package/docs/generated.salesforce.md create mode 100644 package/docs/generated.servicenow.md create mode 100644 package/docs/generated.sharepoint_online.md create mode 100644 package/docs/generated.sharepoint_server.md create mode 100644 package/docs/generated.slack.md create mode 100644 package/docs/generated.zoom.md diff --git a/.gitignore b/.gitignore index 7ba55d91d..5785d0489 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,7 @@ config.yml **/venv scripts/stack/connectors-config + +# package related +package/elastic_connectors +package/dist diff --git a/Makefile b/Makefile index 641f7f625..2c0000544 100644 --- a/Makefile +++ b/Makefile @@ -89,12 +89,12 @@ bin/package-dev: requirements/package-dev.txt bin/pip install -r requirements/$(ARCH).txt bin/pip install -r requirements/package-dev.txt -generate_connector_package_code: bin/package-dev +generate-connector-package-code: bin/package-dev bin/python scripts/package/codegen/generate_connectors.py bin/python scripts/package/codegen/generate_connectors_init.py # Move everything under `elastic_connectors` temporary folder -generate_connector_package: generate_connector_package_code +generate-connector-package: generate-connector-package-code mkdir -p package/elastic_connectors cp -r package/* package/elastic_connectors rm -rf package/elastic_connectors/elastic_connectors @@ -102,12 +102,15 @@ generate_connector_package: generate_connector_package_code bin/python scripts/package/update_imports.py # Clean temporary folder and distribution files -clean_connector_package: +clean-connector-package: cd package && rm -rf elastic_connectors build dist *.egg-info +generate-connector-package-docs: + PYTHONPATH=./package lazydocs generated --remove-package-prefix --no-watermark --output-path package/docs --overview-file README + # Build the connector package -build_connector_package: clean_connector_package generate_connector_package +build-connector-package: clean-connector-package generate-connector-package generate-connector-package-docs cd package && ../bin/python setup.py sdist bdist_wheel -publish_connector_package: build_connector_package +publish-connector-package: build-connector-package cd package && twine upload --repository testpypi dist/* diff --git a/package/README.md b/package/README.md index 2089e6211..720781e17 100644 --- a/package/README.md +++ b/package/README.md @@ -1 +1,63 @@ -# elastic-conn-packages +# elastic-connectors + +## Overview + +`elastic-connectors` is an async-first Python package that provides connectors to various third-party services. Each connector class in this package exposes an asynchronous method to fetch documents from the third-party service. + +## Installation + +To install the package, use pip: + +```bash +pip install elastic-connectors +``` + +## Usage + +### Importing a Connector +Each connector module can be imported as follows: + +```python +from elastic_connectors import SharepointOnlineConnector +``` + +### Constructor +The constructor for each connector module requires arguments relevant to the third-party integration along with optional parameters: + +- `logger` (logging.Logger, optional): Logger instance. Defaults to None. +- `download_content` (bool, optional): Flag to determine if content should be downloaded. Defaults to True. + +### Methods + +Each connector module exposes the following asynchronous method to fetch the data from a 3rd party source: + +```python +async def async_get_docs(self) -> AsyncIterator[Dict]: + """ + Asynchronously retrieves documents from the third-party service. + + Yields: + AsyncIterator[Dict]: An asynchronous iterator of dictionaries containing document data. + """ +``` + +### Example +Below is an example demonstrating how to use a connector module + +```python +docs = [] + +async with SharepointOnlineConnector( + tenant_id=SPO_TENANT_ID, + tenant_name=SPO_TENANT_NAME, + client_id=SPO_CLIENT_ID, + secret_value=SPO_CLIENT_SECRET +) as connector: + spo_docs = [] + async for doc in connector.async_get_docs(): + spo_docs.append(doc) +``` + +### API overview + +See [API overview](./docs/README.md) with all available connectors. diff --git a/package/connector_base.py b/package/connector_base.py index b0cc50255..e9ab44387 100644 --- a/package/connector_base.py +++ b/package/connector_base.py @@ -13,7 +13,7 @@ from connectors.es.settings import TIMESTAMP_FIELD -def extract_content_with_tika(b64_content: str) -> str: +def _extract_content_with_tika(b64_content: str) -> str: """ Extracts text content from a base64-encoded binary content using Tika. @@ -111,7 +111,7 @@ async def async_get_docs(self) -> AsyncIterator[Dict]: # binary to string conversion binary_data = data.get("_attachment", None) - text = extract_content_with_tika(binary_data) + text = _extract_content_with_tika(binary_data) doc.update({"body": text}) except Exception as e: diff --git a/package/docs/.pages b/package/docs/.pages new file mode 100644 index 000000000..db48efa29 --- /dev/null +++ b/package/docs/.pages @@ -0,0 +1,4 @@ +title: API Reference +nav: + - Overview: README.md + - ... diff --git a/package/docs/README.md b/package/docs/README.md new file mode 100644 index 000000000..ec9963ef1 --- /dev/null +++ b/package/docs/README.md @@ -0,0 +1,71 @@ + + +# API Overview + +## Modules + +- [`generated.azure_blob_storage`](./generated.azure_blob_storage.md#module-generatedazure_blob_storage) +- [`generated.box`](./generated.box.md#module-generatedbox) +- [`generated.confluence`](./generated.confluence.md#module-generatedconfluence) +- [`generated.dir`](./generated.dir.md#module-generateddir) +- [`generated.dropbox`](./generated.dropbox.md#module-generateddropbox) +- [`generated.github`](./generated.github.md#module-generatedgithub) +- [`generated.gmail`](./generated.gmail.md#module-generatedgmail) +- [`generated.google_cloud_storage`](./generated.google_cloud_storage.md#module-generatedgoogle_cloud_storage) +- [`generated.google_drive`](./generated.google_drive.md#module-generatedgoogle_drive) +- [`generated.graphql`](./generated.graphql.md#module-generatedgraphql) +- [`generated.jira`](./generated.jira.md#module-generatedjira) +- [`generated.microsoft_teams`](./generated.microsoft_teams.md#module-generatedmicrosoft_teams) +- [`generated.mongodb`](./generated.mongodb.md#module-generatedmongodb) +- [`generated.mssql`](./generated.mssql.md#module-generatedmssql) +- [`generated.mysql`](./generated.mysql.md#module-generatedmysql) +- [`generated.network_drive`](./generated.network_drive.md#module-generatednetwork_drive) +- [`generated.notion`](./generated.notion.md#module-generatednotion) +- [`generated.onedrive`](./generated.onedrive.md#module-generatedonedrive) +- [`generated.oracle`](./generated.oracle.md#module-generatedoracle) +- [`generated.outlook`](./generated.outlook.md#module-generatedoutlook) +- [`generated.postgresql`](./generated.postgresql.md#module-generatedpostgresql) +- [`generated.redis`](./generated.redis.md#module-generatedredis) +- [`generated.s3`](./generated.s3.md#module-generateds3) +- [`generated.salesforce`](./generated.salesforce.md#module-generatedsalesforce) +- [`generated.servicenow`](./generated.servicenow.md#module-generatedservicenow) +- [`generated.sharepoint_online`](./generated.sharepoint_online.md#module-generatedsharepoint_online) +- [`generated.sharepoint_server`](./generated.sharepoint_server.md#module-generatedsharepoint_server) +- [`generated.slack`](./generated.slack.md#module-generatedslack) +- [`generated.zoom`](./generated.zoom.md#module-generatedzoom) + +## Classes + +- [`azure_blob_storage.AzureBlobStorageConnector`](./generated.azure_blob_storage.md#class-azureblobstorageconnector): AzureBlobStorageConnector class generated for connecting to the data source. +- [`box.BoxConnector`](./generated.box.md#class-boxconnector): BoxConnector class generated for connecting to the data source. +- [`confluence.ConfluenceConnector`](./generated.confluence.md#class-confluenceconnector): ConfluenceConnector class generated for connecting to the data source. +- [`dir.DirectoryConnector`](./generated.dir.md#class-directoryconnector): DirectoryConnector class generated for connecting to the data source. +- [`dropbox.DropboxConnector`](./generated.dropbox.md#class-dropboxconnector): DropboxConnector class generated for connecting to the data source. +- [`github.GitHubConnector`](./generated.github.md#class-githubconnector): GitHubConnector class generated for connecting to the data source. +- [`gmail.GMailConnector`](./generated.gmail.md#class-gmailconnector): GMailConnector class generated for connecting to the data source. +- [`google_cloud_storage.GoogleCloudStorageConnector`](./generated.google_cloud_storage.md#class-googlecloudstorageconnector): GoogleCloudStorageConnector class generated for connecting to the data source. +- [`google_drive.GoogleDriveConnector`](./generated.google_drive.md#class-googledriveconnector): GoogleDriveConnector class generated for connecting to the data source. +- [`graphql.GraphQLConnector`](./generated.graphql.md#class-graphqlconnector): GraphQLConnector class generated for connecting to the data source. +- [`jira.JiraConnector`](./generated.jira.md#class-jiraconnector): JiraConnector class generated for connecting to the data source. +- [`microsoft_teams.MicrosoftTeamsConnector`](./generated.microsoft_teams.md#class-microsoftteamsconnector): MicrosoftTeamsConnector class generated for connecting to the data source. +- [`mongodb.MongoConnector`](./generated.mongodb.md#class-mongoconnector): MongoConnector class generated for connecting to the data source. +- [`mssql.MSSQLConnector`](./generated.mssql.md#class-mssqlconnector): MSSQLConnector class generated for connecting to the data source. +- [`mysql.MySqlConnector`](./generated.mysql.md#class-mysqlconnector): MySqlConnector class generated for connecting to the data source. +- [`network_drive.NASConnector`](./generated.network_drive.md#class-nasconnector): NASConnector class generated for connecting to the data source. +- [`notion.NotionConnector`](./generated.notion.md#class-notionconnector): NotionConnector class generated for connecting to the data source. +- [`onedrive.OneDriveConnector`](./generated.onedrive.md#class-onedriveconnector): OneDriveConnector class generated for connecting to the data source. +- [`oracle.OracleConnector`](./generated.oracle.md#class-oracleconnector): OracleConnector class generated for connecting to the data source. +- [`outlook.OutlookConnector`](./generated.outlook.md#class-outlookconnector): OutlookConnector class generated for connecting to the data source. +- [`postgresql.PostgreSQLConnector`](./generated.postgresql.md#class-postgresqlconnector): PostgreSQLConnector class generated for connecting to the data source. +- [`redis.RedisConnector`](./generated.redis.md#class-redisconnector): RedisConnector class generated for connecting to the data source. +- [`s3.S3Connector`](./generated.s3.md#class-s3connector): S3Connector class generated for connecting to the data source. +- [`salesforce.SalesforceConnector`](./generated.salesforce.md#class-salesforceconnector): SalesforceConnector class generated for connecting to the data source. +- [`servicenow.ServiceNowConnector`](./generated.servicenow.md#class-servicenowconnector): ServiceNowConnector class generated for connecting to the data source. +- [`sharepoint_online.SharepointOnlineConnector`](./generated.sharepoint_online.md#class-sharepointonlineconnector): SharepointOnlineConnector class generated for connecting to the data source. +- [`sharepoint_server.SharepointServerConnector`](./generated.sharepoint_server.md#class-sharepointserverconnector): SharepointServerConnector class generated for connecting to the data source. +- [`slack.SlackConnector`](./generated.slack.md#class-slackconnector): SlackConnector class generated for connecting to the data source. +- [`zoom.ZoomConnector`](./generated.zoom.md#class-zoomconnector): ZoomConnector class generated for connecting to the data source. + +## Functions + +- No functions diff --git a/package/docs/generated.azure_blob_storage.md b/package/docs/generated.azure_blob_storage.md new file mode 100644 index 000000000..92d9e2cd5 --- /dev/null +++ b/package/docs/generated.azure_blob_storage.md @@ -0,0 +1,65 @@ + + + + +# module `generated.azure_blob_storage` + + + + + + +--- + + + +## class `AzureBlobStorageConnector` +AzureBlobStorageConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `account_name` (str): Azure Blob Storage account name + + + - `account_key` (str): Azure Blob Storage account key + + + - `blob_endpoint` (str): Azure Blob Storage blob endpoint + + + - `containers` (list): Azure Blob Storage containers + + + - `retry_count` (int): Retries per request + + + - `concurrent_downloads` (int): Maximum concurrent downloads + + + +### method `__init__` + +```python +__init__( + account_name=None, + account_key=None, + blob_endpoint=None, + containers=None, + retry_count=3, + concurrent_downloads=100, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.box.md b/package/docs/generated.box.md new file mode 100644 index 000000000..af9f17984 --- /dev/null +++ b/package/docs/generated.box.md @@ -0,0 +1,65 @@ + + + + +# module `generated.box` + + + + + + +--- + + + +## class `BoxConnector` +BoxConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `is_enterprise` (str): Box Account + + + - `client_id` (str): Client ID + + + - `client_secret` (str): Client Secret + + + - `refresh_token` (str): Refresh Token + + + - `enterprise_id` (int): Enterprise ID + + + - `concurrent_downloads` (int): Maximum concurrent downloads + + + +### method `__init__` + +```python +__init__( + is_enterprise='box_free', + client_id=None, + client_secret=None, + refresh_token=None, + enterprise_id=None, + concurrent_downloads=15, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.confluence.md b/package/docs/generated.confluence.md new file mode 100644 index 000000000..7a726c735 --- /dev/null +++ b/package/docs/generated.confluence.md @@ -0,0 +1,99 @@ + + + + +# module `generated.confluence` + + + + + + +--- + + + +## class `ConfluenceConnector` +ConfluenceConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `data_source` (str): Confluence data source + + + - `username` (str): Confluence Server username + + + - `password` (str): Confluence Server password + + + - `data_center_username` (str): Confluence Data Center username + + + - `data_center_password` (str): Confluence Data Center password + + + - `account_email` (str): Confluence Cloud account email + + + - `api_token` (str): Confluence Cloud API token + + + - `confluence_url` (str): Confluence URL + + + - `spaces` (list): Confluence space keys + - This configurable field is ignored when Advanced Sync Rules are used. + + + - `index_labels` (bool): Enable indexing labels + - Enabling this will increase the amount of network calls to the source, and may decrease performance + + + - `ssl_enabled` (bool): Enable SSL + + + - `ssl_ca` (str): SSL certificate + + + - `retry_count` (int): Retries per request + + + - `concurrent_downloads` (int): Maximum concurrent downloads + + + +### method `__init__` + +```python +__init__( + data_source='confluence_server', + username=None, + password=None, + data_center_username=None, + data_center_password=None, + account_email=None, + api_token=None, + confluence_url=None, + spaces=None, + index_labels=False, + ssl_enabled=False, + ssl_ca=None, + retry_count=3, + concurrent_downloads=50, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.dir.md b/package/docs/generated.dir.md new file mode 100644 index 000000000..1d876f1b6 --- /dev/null +++ b/package/docs/generated.dir.md @@ -0,0 +1,49 @@ + + + + +# module `generated.dir` + + + + + + +--- + + + +## class `DirectoryConnector` +DirectoryConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `directory` (str): Directory path + + + - `pattern` (str): File glob-like pattern + + + +### method `__init__` + +```python +__init__( + directory='/Users/jedr/connectors/lib/python3.10/site-packages/connectors/sources', + pattern='**/*.*', + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.dropbox.md b/package/docs/generated.dropbox.md new file mode 100644 index 000000000..86ae46238 --- /dev/null +++ b/package/docs/generated.dropbox.md @@ -0,0 +1,71 @@ + + + + +# module `generated.dropbox` + + + + + + +--- + + + +## class `DropboxConnector` +DropboxConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `path` (str): Path to fetch files/folders + - Path is ignored when Advanced Sync Rules are used. + + + - `app_key` (str): App Key + + + - `app_secret` (str): App secret + + + - `refresh_token` (str): Refresh token + + + - `retry_count` (int): Retries per request + + + - `concurrent_downloads` (int): Maximum concurrent downloads + + + - `include_inherited_users_and_groups` (bool): Include groups and inherited users + - Include groups and inherited users when indexing permissions. Enabling this configurable field will cause a significant performance degradation. + + + +### method `__init__` + +```python +__init__( + path=None, + app_key=None, + app_secret=None, + refresh_token=None, + retry_count=3, + concurrent_downloads=100, + include_inherited_users_and_groups=False, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.github.md b/package/docs/generated.github.md new file mode 100644 index 000000000..7027a0caf --- /dev/null +++ b/package/docs/generated.github.md @@ -0,0 +1,91 @@ + + + + +# module `generated.github` + + + + + + +--- + + + +## class `GitHubConnector` +GitHubConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `data_source` (str): Data source + + + - `host` (str): Server URL + + + - `auth_method` (str): Authentication method + + + - `token` (str): Token + + + - `repo_type` (str): Repository Type + - The Document Level Security feature is not available for the Other Repository Type + + + - `org_name` (str): Organization Name + + + - `app_id` (int): App ID + + + - `private_key` (str): App private key + + + - `repositories` (list): List of repositories + - This configurable field is ignored when Advanced Sync Rules are used. + + + - `ssl_enabled` (bool): Enable SSL + + + - `ssl_ca` (str): SSL certificate + + + - `retry_count` (int): Maximum retries per request + + + +### method `__init__` + +```python +__init__( + data_source='github_server', + host=None, + auth_method='personal_access_token', + token=None, + repo_type='other', + org_name=None, + app_id=None, + private_key=None, + repositories=None, + ssl_enabled=False, + ssl_ca=None, + retry_count=3, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.gmail.md b/package/docs/generated.gmail.md new file mode 100644 index 000000000..e51b343eb --- /dev/null +++ b/package/docs/generated.gmail.md @@ -0,0 +1,60 @@ + + + + +# module `generated.gmail` + + + + + + +--- + + + +## class `GMailConnector` +GMailConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `service_account_credentials` (str): GMail service account JSON + + + - `subject` (str): Google Workspace admin email + - Admin account email address + + + - `customer_id` (str): Google customer id + - Google admin console -> Account -> Settings -> Customer Id + + + - `include_spam_and_trash` (bool): Include spam and trash emails + - Will include spam and trash emails, when set to true. + + + +### method `__init__` + +```python +__init__( + service_account_credentials=None, + subject=None, + customer_id=None, + include_spam_and_trash=False, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.google_cloud_storage.md b/package/docs/generated.google_cloud_storage.md new file mode 100644 index 000000000..8eba0c410 --- /dev/null +++ b/package/docs/generated.google_cloud_storage.md @@ -0,0 +1,45 @@ + + + + +# module `generated.google_cloud_storage` + + + + + + +--- + + + +## class `GoogleCloudStorageConnector` +GoogleCloudStorageConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `buckets` (list): Google Cloud Storage buckets + + + - `service_account_credentials` (str): Google Cloud service account JSON + + + +### method `__init__` + +```python +__init__(buckets=None, service_account_credentials=None, **kwargs) +``` + + + + + + + + + diff --git a/package/docs/generated.google_drive.md b/package/docs/generated.google_drive.md new file mode 100644 index 000000000..cb86ce08f --- /dev/null +++ b/package/docs/generated.google_drive.md @@ -0,0 +1,71 @@ + + + + +# module `generated.google_drive` + + + + + + +--- + + + +## class `GoogleDriveConnector` +GoogleDriveConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `service_account_credentials` (str): Google Drive service account JSON + - This connectors authenticates as a service account to synchronize content from Google Drive. + + + - `use_domain_wide_delegation_for_sync` (bool): Use domain-wide delegation for data sync + - Enable domain-wide delegation to automatically sync content from all shared and personal drives in the Google workspace. This eliminates the need to manually share Google Drive data with your service account, though it may increase sync time. If disabled, only items and folders manually shared with the service account will be synced. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + + - `google_workspace_admin_email_for_data_sync` (str): Google Workspace admin email + - Provide the admin email to be used with domain-wide delegation for data sync. This email enables the connector to utilize the Admin Directory API for listing organization users. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + + - `google_workspace_email_for_shared_drives_sync` (str): Google Workspace email for syncing shared drives + - Provide the Google Workspace user email for discovery and syncing of shared drives. Only the shared drives this user has access to will be synced. + + + - `google_workspace_admin_email` (str): Google Workspace admin email + - In order to use Document Level Security you need to enable Google Workspace domain-wide delegation of authority for your service account. A service account with delegated authority can impersonate admin user with sufficient permissions to fetch all users and their corresponding permissions. Please refer to the connector documentation to ensure domain-wide delegation is correctly configured and has the appropriate scopes. + + + - `max_concurrency` (int): Maximum concurrent HTTP requests + - This setting determines the maximum number of concurrent HTTP requests sent to the Google API to fetch data. Increasing this value can improve data retrieval speed, but it may also place higher demands on system resources and network bandwidth. + + + +### method `__init__` + +```python +__init__( + service_account_credentials=None, + use_domain_wide_delegation_for_sync=False, + google_workspace_admin_email_for_data_sync=None, + google_workspace_email_for_shared_drives_sync=None, + google_workspace_admin_email=None, + max_concurrency=25, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.graphql.md b/package/docs/generated.graphql.md new file mode 100644 index 000000000..ca86d3327 --- /dev/null +++ b/package/docs/generated.graphql.md @@ -0,0 +1,96 @@ + + + + +# module `generated.graphql` + + + + + + +--- + + + +## class `GraphQLConnector` +GraphQLConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `http_endpoint` (str): GraphQL HTTP endpoint + + + - `http_method` (str): HTTP method for GraphQL requests + + + - `authentication_method` (str): Authentication Method + + + - `username` (str): Username + + + - `password` (str): Password + + + - `token` (str): Bearer Token + + + - `graphql_query` (str): GraphQL Body + + + - `graphql_variables` (str): Graphql Variables + + + - `graphql_object_to_id_map` (str): GraphQL Objects to ID mapping + - Specifies which GraphQL objects should be indexed as individual documents. This allows finer control over indexing, ensuring only relevant data sections from the GraphQL response are stored as separate documents. Use a JSON with key as the GraphQL object name and value as string field within the document, with the requirement that each document must have a distinct value for this field. Use '.' to provide full path of the object from the root of the response. For example {'organization.users.nodes': 'id'} + + + - `headers` (str): Headers + + + - `pagination_model` (str): Pagination model + - For cursor-based pagination, add 'pageInfo' and an 'after' argument variable in your query at the desired node (Pagination key). Use 'after' query argument with a variable to iterate through pages. Detailed examples and setup instructions are available in the docs. + + + - `pagination_key` (str): Pagination key + - Specifies which GraphQL object is used for pagination. Use '.' to provide full path of the object from the root of the response. For example 'organization.users' + + + - `connection_timeout` (int): Connection Timeout + + + +### method `__init__` + +```python +__init__( + http_endpoint=None, + http_method='post', + authentication_method='none', + username=None, + password=None, + token=None, + graphql_query=None, + graphql_variables=None, + graphql_object_to_id_map=None, + headers=None, + pagination_model='no_pagination', + pagination_key=None, + connection_timeout=300, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.jira.md b/package/docs/generated.jira.md new file mode 100644 index 000000000..092543c06 --- /dev/null +++ b/package/docs/generated.jira.md @@ -0,0 +1,95 @@ + + + + +# module `generated.jira` + + + + + + +--- + + + +## class `JiraConnector` +JiraConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `data_source` (str): Jira data source + + + - `username` (str): Jira Server username + + + - `password` (str): Jira Server password + + + - `data_center_username` (str): Jira Data Center username + + + - `data_center_password` (str): Jira Data Center password + + + - `account_email` (str): Jira Cloud email address + - Email address associated with Jira Cloud account. E.g. jane.doe@gmail.com + + + - `api_token` (str): Jira Cloud API token + + + - `jira_url` (str): Jira host url + + + - `projects` (list): Jira project keys + - This configurable field is ignored when Advanced Sync Rules are used. + + + - `ssl_enabled` (bool): Enable SSL + + + - `ssl_ca` (str): SSL certificate + + + - `retry_count` (int): Retries for failed requests + + + - `concurrent_downloads` (int): Maximum concurrent downloads + + + +### method `__init__` + +```python +__init__( + data_source='jira_cloud', + username=None, + password=None, + data_center_username=None, + data_center_password=None, + account_email=None, + api_token=None, + jira_url=None, + projects=None, + ssl_enabled=False, + ssl_ca=None, + retry_count=3, + concurrent_downloads=100, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.microsoft_teams.md b/package/docs/generated.microsoft_teams.md new file mode 100644 index 000000000..e1e7e89e2 --- /dev/null +++ b/package/docs/generated.microsoft_teams.md @@ -0,0 +1,61 @@ + + + + +# module `generated.microsoft_teams` + + + + + + +--- + + + +## class `MicrosoftTeamsConnector` +MicrosoftTeamsConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `tenant_id` (str): Tenant ID + + + - `client_id` (str): Client ID + + + - `secret_value` (str): Secret value + + + - `username` (str): Username + + + - `password` (str): Password + + + +### method `__init__` + +```python +__init__( + tenant_id=None, + client_id=None, + secret_value=None, + username=None, + password=None, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.mongodb.md b/package/docs/generated.mongodb.md new file mode 100644 index 000000000..aa60c03db --- /dev/null +++ b/package/docs/generated.mongodb.md @@ -0,0 +1,80 @@ + + + + +# module `generated.mongodb` + + + + + + +--- + + + +## class `MongoConnector` +MongoConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `host` (str): Server hostname + + + - `user` (str): Username + + + - `password` (str): Password + + + - `database` (str): Database + + + - `collection` (str): Collection + + + - `direct_connection` (bool): Direct connection + + + - `ssl_enabled` (bool): SSL/TLS Connection + - This option establishes a secure connection to the MongoDB server using SSL/TLS encryption. Ensure that your MongoDB deployment supports SSL/TLS connections. Enable if MongoDB cluster uses DNS SRV records. + + + - `ssl_ca` (str): Certificate Authority (.pem) + - Specifies the root certificate from the Certificate Authority. The value of the certificate is used to validate the certificate presented by the MongoDB instance. + + + - `tls_insecure` (bool): Skip certificate verification + - This option skips certificate validation for TLS/SSL connections to your MongoDB server. We strongly recommend setting this option to 'disable'. + + + +### method `__init__` + +```python +__init__( + host=None, + user=None, + password=None, + database=None, + collection=None, + direct_connection=False, + ssl_enabled=False, + ssl_ca=None, + tls_insecure=False, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.mssql.md b/package/docs/generated.mssql.md new file mode 100644 index 000000000..e69f0c911 --- /dev/null +++ b/package/docs/generated.mssql.md @@ -0,0 +1,90 @@ + + + + +# module `generated.mssql` + + + + + + +--- + + + +## class `MSSQLConnector` +MSSQLConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `host` (str): Host + + + - `port` (int): Port + + + - `username` (str): Username + + + - `password` (str): Password + + + - `database` (str): Database + + + - `tables` (list): Comma-separated list of tables + - This configurable field is ignored when Advanced Sync Rules are used. + + + - `fetch_size` (int): Rows fetched per request + + + - `retry_count` (int): Retries per request + + + - `schema` (str): Schema + + + - `ssl_enabled` (bool): Enable SSL verification + + + - `ssl_ca` (str): SSL certificate + + + - `validate_host` (bool): Validate host + + + +### method `__init__` + +```python +__init__( + host=None, + port=None, + username=None, + password=None, + database=None, + tables='*', + fetch_size=50, + retry_count=3, + schema=None, + ssl_enabled=False, + ssl_ca=None, + validate_host=False, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.mysql.md b/package/docs/generated.mysql.md new file mode 100644 index 000000000..8b280a474 --- /dev/null +++ b/package/docs/generated.mysql.md @@ -0,0 +1,81 @@ + + + + +# module `generated.mysql` + + + + + + +--- + + + +## class `MySqlConnector` +MySqlConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `host` (str): Host + + + - `port` (int): Port + + + - `user` (str): Username + + + - `password` (str): Password + + + - `database` (str): Database + + + - `tables` (list): Comma-separated list of tables + + + - `ssl_enabled` (bool): Enable SSL + + + - `ssl_ca` (str): SSL certificate + + + - `fetch_size` (int): Rows fetched per request + + + - `retry_count` (int): Retries per request + + + +### method `__init__` + +```python +__init__( + host=None, + port=None, + user=None, + password=None, + database=None, + tables='*', + ssl_enabled=False, + ssl_ca=None, + fetch_size=50, + retry_count=3, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.network_drive.md b/package/docs/generated.network_drive.md new file mode 100644 index 000000000..724f57438 --- /dev/null +++ b/package/docs/generated.network_drive.md @@ -0,0 +1,69 @@ + + + + +# module `generated.network_drive` + + + + + + +--- + + + +## class `NASConnector` +NASConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `username` (str): Username + + + - `password` (str): Password + + + - `server_ip` (str): SMB IP + + + - `server_port` (int): SMB port + + + - `drive_path` (str): SMB path + + + - `drive_type` (str): Drive type + + + - `identity_mappings` (str): Path of CSV file containing users and groups SID (For Linux Network Drive) + + + +### method `__init__` + +```python +__init__( + username=None, + password=None, + server_ip=None, + server_port=None, + drive_path=None, + drive_type='windows', + identity_mappings=None, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.notion.md b/package/docs/generated.notion.md new file mode 100644 index 000000000..4dcc5ba45 --- /dev/null +++ b/package/docs/generated.notion.md @@ -0,0 +1,62 @@ + + + + +# module `generated.notion` + + + + + + +--- + + + +## class `NotionConnector` +NotionConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `notion_secret_key` (str): Notion Secret Key + + + - `databases` (list): List of Databases + + + - `pages` (list): List of Pages + + + - `index_comments` (bool): Enable indexing comments + - Enabling this will increase the amount of network calls to the source, and may decrease performance + + + - `concurrent_downloads` (int): Maximum concurrent downloads + + + +### method `__init__` + +```python +__init__( + notion_secret_key=None, + databases=None, + pages=None, + index_comments=False, + concurrent_downloads=30, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.onedrive.md b/package/docs/generated.onedrive.md new file mode 100644 index 000000000..1a6ff27e6 --- /dev/null +++ b/package/docs/generated.onedrive.md @@ -0,0 +1,61 @@ + + + + +# module `generated.onedrive` + + + + + + +--- + + + +## class `OneDriveConnector` +OneDriveConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `client_id` (str): Azure application Client ID + + + - `client_secret` (str): Azure application Client Secret + + + - `tenant_id` (str): Azure application Tenant ID + + + - `retry_count` (int): Maximum retries per request + + + - `concurrent_downloads` (int): Maximum concurrent downloads + + + +### method `__init__` + +```python +__init__( + client_id=None, + client_secret=None, + tenant_id=None, + retry_count=3, + concurrent_downloads=15, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.oracle.md b/package/docs/generated.oracle.md new file mode 100644 index 000000000..d12254e48 --- /dev/null +++ b/package/docs/generated.oracle.md @@ -0,0 +1,94 @@ + + + + +# module `generated.oracle` + + + + + + +--- + + + +## class `OracleConnector` +OracleConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `host` (str): Host + + + - `port` (int): Port + + + - `username` (str): Username + + + - `password` (str): Password + + + - `connection_source` (str): Connection Source + - Select 'Service Name' option if connecting to a pluggable database + + + - `sid` (str): SID + + + - `service_name` (str): Service Name + + + - `tables` (list): Comma-separated list of tables + + + - `fetch_size` (int): Rows fetched per request + + + - `retry_count` (int): Retries per request + + + - `oracle_protocol` (str): Oracle connection protocol + + + - `oracle_home` (str): Path to Oracle Home + + + - `wallet_configuration_path` (str): Path to SSL Wallet configuration files + + + +### method `__init__` + +```python +__init__( + host=None, + port=None, + username=None, + password=None, + connection_source='sid', + sid=None, + service_name=None, + tables='*', + fetch_size=50, + retry_count=3, + oracle_protocol='TCP', + oracle_home='', + wallet_configuration_path='', + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.outlook.md b/package/docs/generated.outlook.md new file mode 100644 index 000000000..0752c2630 --- /dev/null +++ b/package/docs/generated.outlook.md @@ -0,0 +1,88 @@ + + + + +# module `generated.outlook` + + + + + + +--- + + + +## class `OutlookConnector` +OutlookConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `data_source` (str): Outlook data source + + + - `tenant_id` (str): Tenant ID + + + - `client_id` (str): Client ID + + + - `client_secret` (str): Client Secret Value + + + - `exchange_server` (str): Exchange Server + - Exchange server's IP address. E.g. 127.0.0.1 + + + - `active_directory_server` (str): Active Directory Server + - Active Directory server's IP address. E.g. 127.0.0.1 + + + - `username` (str): Exchange server username + + + - `password` (str): Exchange server password + + + - `domain` (str): Exchange server domain name + - Domain name such as gmail.com, outlook.com + + + - `ssl_enabled` (bool): Enable SSL + + + - `ssl_ca` (str): SSL certificate + + + +### method `__init__` + +```python +__init__( + data_source='outlook_cloud', + tenant_id=None, + client_id=None, + client_secret=None, + exchange_server=None, + active_directory_server=None, + username=None, + password=None, + domain=None, + ssl_enabled=False, + ssl_ca=None, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.postgresql.md b/package/docs/generated.postgresql.md new file mode 100644 index 000000000..bdd8ffc4a --- /dev/null +++ b/package/docs/generated.postgresql.md @@ -0,0 +1,86 @@ + + + + +# module `generated.postgresql` + + + + + + +--- + + + +## class `PostgreSQLConnector` +PostgreSQLConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `host` (str): Host + + + - `port` (int): Port + + + - `username` (str): Username + + + - `password` (str): Password + + + - `database` (str): Database + + + - `schema` (str): Schema + + + - `tables` (list): Comma-separated list of tables + - This configurable field is ignored when Advanced Sync Rules are used. + + + - `fetch_size` (int): Rows fetched per request + + + - `retry_count` (int): Retries per request + + + - `ssl_enabled` (bool): Enable SSL verification + + + - `ssl_ca` (str): SSL certificate + + + +### method `__init__` + +```python +__init__( + host=None, + port=None, + username=None, + password=None, + database=None, + schema=None, + tables='*', + fetch_size=50, + retry_count=3, + ssl_enabled=False, + ssl_ca=None, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.redis.md b/package/docs/generated.redis.md new file mode 100644 index 000000000..d24816386 --- /dev/null +++ b/package/docs/generated.redis.md @@ -0,0 +1,82 @@ + + + + +# module `generated.redis` + + + + + + +--- + + + +## class `RedisConnector` +RedisConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `host` (str): Host + + + - `port` (int): Port + + + - `username` (str): Username + + + - `password` (str): Password + + + - `database` (list): Comma-separated list of databases + - Databases are ignored when Advanced Sync Rules are used. + + + - `ssl_enabled` (bool): SSL/TLS Connection + - This option establishes a secure connection to Redis using SSL/TLS encryption. Ensure that your Redis deployment supports SSL/TLS connections. + + + - `mutual_tls_enabled` (bool): Mutual SSL/TLS Connection + - This option establishes a secure connection to Redis using mutual SSL/TLS encryption. Ensure that your Redis deployment supports mutual SSL/TLS connections. + + + - `tls_certfile` (str): client certificate file for SSL/TLS + - Specifies the client certificate from the Certificate Authority. The value of the certificate is used to validate the certificate presented by the Redis instance. + + + - `tls_keyfile` (str): client private key file for SSL/TLS + - Specifies the client private key from the Certificate Authority. The value of the key is used to validate the connection in the Redis instance. + + + +### method `__init__` + +```python +__init__( + host=None, + port=None, + username=None, + password=None, + database='*', + ssl_enabled=False, + mutual_tls_enabled=False, + tls_certfile=None, + tls_keyfile=None, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.s3.md b/package/docs/generated.s3.md new file mode 100644 index 000000000..5c72180e4 --- /dev/null +++ b/package/docs/generated.s3.md @@ -0,0 +1,70 @@ + + + + +# module `generated.s3` + + + + + + +--- + + + +## class `S3Connector` +S3Connector class generated for connecting to the data source. + + + +**Args:** + + + + - `buckets` (list): AWS Buckets + - AWS Buckets are ignored when Advanced Sync Rules are used. + + + - `aws_access_key_id` (str): AWS Access Key Id + + + - `aws_secret_access_key` (str): AWS Secret Key + + + - `read_timeout` (int): Read timeout + + + - `connect_timeout` (int): Connection timeout + + + - `max_attempts` (int): Maximum retry attempts + + + - `page_size` (int): Maximum size of page + + + +### method `__init__` + +```python +__init__( + buckets=None, + aws_access_key_id=None, + aws_secret_access_key=None, + read_timeout=90, + connect_timeout=90, + max_attempts=5, + page_size=100, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.salesforce.md b/package/docs/generated.salesforce.md new file mode 100644 index 000000000..3fb078501 --- /dev/null +++ b/package/docs/generated.salesforce.md @@ -0,0 +1,51 @@ + + + + +# module `generated.salesforce` + + + + + + +--- + + + +## class `SalesforceConnector` +SalesforceConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `domain` (str): Domain + - The domain for your Salesforce instance. If your Salesforce URL is 'foo.my.salesforce.com', the domain would be 'foo'. + + + - `client_id` (str): Client ID + - The client id for your OAuth2-enabled connected app. Also called 'consumer key' + + + - `client_secret` (str): Client Secret + - The client secret for your OAuth2-enabled connected app. Also called 'consumer secret' + + + +### method `__init__` + +```python +__init__(domain=None, client_id=None, client_secret=None, **kwargs) +``` + + + + + + + + + diff --git a/package/docs/generated.servicenow.md b/package/docs/generated.servicenow.md new file mode 100644 index 000000000..fa5bff6c4 --- /dev/null +++ b/package/docs/generated.servicenow.md @@ -0,0 +1,66 @@ + + + + +# module `generated.servicenow` + + + + + + +--- + + + +## class `ServiceNowConnector` +ServiceNowConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `url` (str): Service URL + + + - `username` (str): Username + + + - `password` (str): Password + + + - `services` (list): Comma-separated list of services + - List of services is ignored when Advanced Sync Rules are used. + + + - `retry_count` (int): Retries per request + + + - `concurrent_downloads` (int): Maximum concurrent downloads + + + +### method `__init__` + +```python +__init__( + url=None, + username=None, + password=None, + services='*', + retry_count=3, + concurrent_downloads=10, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.sharepoint_online.md b/package/docs/generated.sharepoint_online.md new file mode 100644 index 000000000..0e6556809 --- /dev/null +++ b/package/docs/generated.sharepoint_online.md @@ -0,0 +1,92 @@ + + + + +# module `generated.sharepoint_online` + + + + + + +--- + + + +## class `SharepointOnlineConnector` +SharepointOnlineConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `tenant_id` (str): Tenant ID + + + - `tenant_name` (str): Tenant name + + + - `client_id` (str): Client ID + + + - `secret_value` (str): Secret value + + + - `site_collections` (list): Comma-separated list of sites + - A comma-separated list of sites to ingest data from. If enumerating all sites, use * to include all available sites, or specify a list of site names. Otherwise, specify a list of site paths. + + + - `enumerate_all_sites` (bool): Enumerate all sites? + - If enabled, sites will be fetched in bulk, then filtered down to the configured list of sites. This is efficient when syncing many sites. If disabled, each configured site will be fetched with an individual request. This is efficient when syncing fewer sites. + + + - `fetch_subsites` (bool): Fetch sub-sites of configured sites? + - Whether subsites of the configured site(s) should be automatically fetched. + + + - `fetch_drive_item_permissions` (bool): Fetch drive item permissions + - Enable this option to fetch drive item specific permissions. This setting can increase sync time. + + + - `fetch_unique_page_permissions` (bool): Fetch unique page permissions + - Enable this option to fetch unique page permissions. This setting can increase sync time. If this setting is disabled a page will inherit permissions from its parent site. + + + - `fetch_unique_list_permissions` (bool): Fetch unique list permissions + - Enable this option to fetch unique list permissions. This setting can increase sync time. If this setting is disabled a list will inherit permissions from its parent site. + + + - `fetch_unique_list_item_permissions` (bool): Fetch unique list item permissions + - Enable this option to fetch unique list item permissions. This setting can increase sync time. If this setting is disabled a list item will inherit permissions from its parent site. + + + +### method `__init__` + +```python +__init__( + tenant_id=None, + tenant_name=None, + client_id=None, + secret_value=None, + site_collections='*', + enumerate_all_sites=True, + fetch_subsites=True, + fetch_drive_item_permissions=True, + fetch_unique_page_permissions=True, + fetch_unique_list_permissions=True, + fetch_unique_list_item_permissions=True, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.sharepoint_server.md b/package/docs/generated.sharepoint_server.md new file mode 100644 index 000000000..91353fa83 --- /dev/null +++ b/package/docs/generated.sharepoint_server.md @@ -0,0 +1,79 @@ + + + + +# module `generated.sharepoint_server` + + + + + + +--- + + + +## class `SharepointServerConnector` +SharepointServerConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `username` (str): SharePoint Server username + + + - `password` (str): SharePoint Server password + + + - `host_url` (str): SharePoint host + + + - `site_collections` (list): Comma-separated list of SharePoint site collections to index + + + - `ssl_enabled` (bool): Enable SSL + + + - `ssl_ca` (str): SSL certificate + + + - `retry_count` (int): Retries per request + + + - `fetch_unique_list_permissions` (bool): Fetch unique list permissions + - Enable this option to fetch unique list permissions. This setting can increase sync time. If this setting is disabled a list will inherit permissions from its parent site. + + + - `fetch_unique_list_item_permissions` (bool): Fetch unique list item permissions + - Enable this option to fetch unique list item permissions. This setting can increase sync time. If this setting is disabled a list item will inherit permissions from its parent site. + + + +### method `__init__` + +```python +__init__( + username=None, + password=None, + host_url=None, + site_collections=None, + ssl_enabled=False, + ssl_ca=None, + retry_count=3, + fetch_unique_list_permissions=True, + fetch_unique_list_item_permissions=True, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.slack.md b/package/docs/generated.slack.md new file mode 100644 index 000000000..4780221ae --- /dev/null +++ b/package/docs/generated.slack.md @@ -0,0 +1,61 @@ + + + + +# module `generated.slack` + + + + + + +--- + + + +## class `SlackConnector` +SlackConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `token` (str): Authentication Token + - The Slack Authentication Token for the slack application you created. See the docs for details. + + + - `fetch_last_n_days` (int): Days of message history to fetch + - How far back in time to request message history from slack. Messages older than this will not be indexed. + + + - `auto_join_channels` (bool): Automatically join channels + - The Slack application bot will only be able to read conversation history from channels it has joined. The default requires it to be manually invited to channels. Enabling this allows it to automatically invite itself into all public channels. + + + - `sync_users` (bool): Sync users + - Whether or not Slack Users should be indexed as documents in Elasticsearch. + + + +### method `__init__` + +```python +__init__( + token=None, + fetch_last_n_days=None, + auto_join_channels=False, + sync_users=True, + **kwargs +) +``` + + + + + + + + + diff --git a/package/docs/generated.zoom.md b/package/docs/generated.zoom.md new file mode 100644 index 000000000..befbbb82d --- /dev/null +++ b/package/docs/generated.zoom.md @@ -0,0 +1,63 @@ + + + + +# module `generated.zoom` + + + + + + +--- + + + +## class `ZoomConnector` +ZoomConnector class generated for connecting to the data source. + + + +**Args:** + + + + - `account_id` (str): Account ID + + + - `client_id` (str): Client ID + + + - `client_secret` (str): Client secret + + + - `fetch_past_meeting_details` (bool): Fetch past meeting details + - Enable this option to fetch past past meeting details. This setting can increase sync time. + + + - `recording_age` (int): Recording Age Limit (Months) + - How far back in time to request recordings from zoom. Recordings older than this will not be indexed. + + + +### method `__init__` + +```python +__init__( + account_id=None, + client_id=None, + client_secret=None, + fetch_past_meeting_details=False, + recording_age=None, + **kwargs +) +``` + + + + + + + + + diff --git a/package/setup.py b/package/setup.py index 743217fc4..f48be3e04 100644 --- a/package/setup.py +++ b/package/setup.py @@ -69,7 +69,7 @@ def read_reqs(req_file): setup( name="elastic-connectors", - version="0.1.0", + version="0.2.0", description="Elastic connectors", long_description=open("README.md").read(), long_description_content_type="text/markdown", diff --git a/requirements/package-dev.txt b/requirements/package-dev.txt index 97231eaa3..9c4fb173d 100644 --- a/requirements/package-dev.txt +++ b/requirements/package-dev.txt @@ -1,2 +1,3 @@ Jinja2==3.1.4 black==24.4.2 +lazydocs==0.4.8 From f07e8a429e8e7aecae4f846ec66f18cdf235b231 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Fri, 12 Jul 2024 08:44:45 +0200 Subject: [PATCH 8/8] Delete unused scrupt --- scripts/package/manage_package.sh | 53 ------------------------------- 1 file changed, 53 deletions(-) delete mode 100755 scripts/package/manage_package.sh diff --git a/scripts/package/manage_package.sh b/scripts/package/manage_package.sh deleted file mode 100755 index e13ba0bb6..000000000 --- a/scripts/package/manage_package.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -set -e - -function generate_connector_package_code() { - bin/python scripts/package/codegen/generate_connectors.py - bin/python scripts/package/codegen/generate_connectors_init.py -} - -function generate_connector_package() { - generate_connector_package_code - mkdir -p package/elastic_connectors - cp -r package/* package/elastic_connectors - rm -rf package/elastic_connectors/elastic_connectors - cp -r connectors requirements package/elastic_connectors - bin/python scripts/package/update_imports.py -} - -function clean_connector_package() { - cd package && rm -rf elastic_connectors build dist *.egg-info -} - -function build_connector_package() { - clean_connector_package - generate_connector_package - cd package && ../bin/python setup.py sdist bdist_wheel -} - -function publish_connector_package() { - build_connector_package - cd package && twine upload --repository testpypi dist/* -} - -case "$1" in - generate-connector-package-code) - generate_connector_package_code - ;; - generate-connector-package) - generate_connector_package - ;; - clean-connector-package) - clean_connector_package - ;; - build-connector-package) - build_connector_package - ;; - publish-connector-package) - publish_connector_package - ;; - *) - echo "Usage: $0 {generate-connector-package-code|generate-connector-package|clean-connector-package|build-connector-package|publish-connector-package}" - exit 1 -esac