diff --git a/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/__init__.py b/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/__init__.py new file mode 100644 index 00000000..bf30e6e1 --- /dev/null +++ b/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/__init__.py @@ -0,0 +1,4 @@ +# generated by generate-python-pydantic-v2 +name = 'models' +from .well_known_types import * +from .airbyte_protocol import * diff --git a/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/airbyte_protocol.py b/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/airbyte_protocol.py new file mode 100644 index 00000000..7689c9b9 --- /dev/null +++ b/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/airbyte_protocol.py @@ -0,0 +1,858 @@ +# generated by datamodel-codegen: +# filename: airbyte_protocol.yaml + +from __future__ import annotations + +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic import AnyUrl, BaseModel, ConfigDict, Field, RootModel + + +class Type(Enum): + RECORD = 'RECORD' + STATE = 'STATE' + LOG = 'LOG' + SPEC = 'SPEC' + CONNECTION_STATUS = 'CONNECTION_STATUS' + CATALOG = 'CATALOG' + TRACE = 'TRACE' + CONTROL = 'CONTROL' + DESTINATION_CATALOG = 'DESTINATION_CATALOG' + + +class Change(Enum): + NULLED = 'NULLED' + TRUNCATED = 'TRUNCATED' + + +class Reason(Enum): + SOURCE_RECORD_SIZE_LIMITATION = 'SOURCE_RECORD_SIZE_LIMITATION' + DESTINATION_RECORD_SIZE_LIMITATION = 'DESTINATION_RECORD_SIZE_LIMITATION' + PLATFORM_RECORD_SIZE_LIMITATION = 'PLATFORM_RECORD_SIZE_LIMITATION' + SOURCE_FIELD_SIZE_LIMITATION = 'SOURCE_FIELD_SIZE_LIMITATION' + DESTINATION_FIELD_SIZE_LIMITATION = 'DESTINATION_FIELD_SIZE_LIMITATION' + PLATFORM_FIELD_SIZE_LIMITATION = 'PLATFORM_FIELD_SIZE_LIMITATION' + SOURCE_SERIALIZATION_ERROR = 'SOURCE_SERIALIZATION_ERROR' + DESTINATION_SERIALIZATION_ERROR = 'DESTINATION_SERIALIZATION_ERROR' + PLATFORM_SERIALIZATION_ERROR = 'PLATFORM_SERIALIZATION_ERROR' + SOURCE_RETRIEVAL_ERROR = 'SOURCE_RETRIEVAL_ERROR' + DESTINATION_TYPECAST_ERROR = 'DESTINATION_TYPECAST_ERROR' + + +class AirbyteRecordMessageMetaChange(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + field: str = Field( + ..., description='The field that had the change occur (required)' + ) + change: Change = Field(..., description='The type of change that occurred') + reason: Reason = Field(..., description='The reason that the change occurred') + + +class AirbyteRecordMessageFileReference(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + staging_file_url: Optional[str] = Field( + None, + description='The absolute path to the referenced file in the staging area.', + ) + source_file_relative_path: Optional[str] = Field( + None, description='The relative path to the referenced file in source.' + ) + file_size_bytes: Optional[int] = Field( + None, description='The size of the referenced file in bytes.' + ) + + +class AirbyteStateType(Enum): + GLOBAL = 'GLOBAL' + STREAM = 'STREAM' + LEGACY = 'LEGACY' + + +class StreamDescriptor(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + name: str + namespace: Optional[str] = None + + +class AirbyteStateBlob(BaseModel): + pass + model_config = ConfigDict( + extra='allow', + ) + + +class AirbyteStateStats(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + recordCount: Optional[float] = Field( + None, + description='the number of records which were emitted for this state message, for this stream or global. While the value should always be a round number, it is defined as a double to account for integer overflows, and the value should always have a decimal point for proper serialization.', + ) + rejectedRecordCount: Optional[float] = Field( + None, + description='the number of records which were rejected for this state message, for this stream or global. While the value should always be a round number, it is defined as a double to account for integer overflows, and the value should always have a decimal point for proper serialization.\n', + ) + + +class Level(Enum): + FATAL = 'FATAL' + ERROR = 'ERROR' + WARN = 'WARN' + INFO = 'INFO' + DEBUG = 'DEBUG' + TRACE = 'TRACE' + + +class AirbyteLogMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + level: Level = Field(..., description='log level') + message: str = Field(..., description='log message') + stack_trace: Optional[str] = Field( + None, + description='an optional stack trace if the log message corresponds to an exception', + ) + + +class TraceType(Enum): + ERROR = 'ERROR' + ESTIMATE = 'ESTIMATE' + STREAM_STATUS = 'STREAM_STATUS' + ANALYTICS = 'ANALYTICS' + + +class FailureType(Enum): + system_error = 'system_error' + config_error = 'config_error' + transient_error = 'transient_error' + + +class AirbyteErrorTraceMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + message: str = Field( + ..., description='A user-friendly message that indicates the cause of the error' + ) + internal_message: Optional[str] = Field( + None, description='The internal error that caused the failure' + ) + stack_trace: Optional[str] = Field( + None, description='The full stack trace of the error' + ) + failure_type: Optional[FailureType] = Field(None, description='The type of error') + stream_descriptor: Optional[StreamDescriptor] = Field( + None, description='The stream associated with the error, if known (optional)' + ) + + +class EstimateType(Enum): + STREAM = 'STREAM' + SYNC = 'SYNC' + + +class AirbyteEstimateTraceMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + name: str = Field(..., description='The name of the stream') + type: EstimateType = Field( + ..., + description='Estimates are either per-stream (STREAM) or for the entire sync (SYNC). STREAM is preferred, and requires the source to count how many records are about to be emitted per-stream (e.g. there will be 100 rows from this table emitted). For the rare source which cannot tell which stream a record belongs to before reading (e.g. CDC databases), SYNC estimates can be emitted. Sources should not emit both STREAM and SYNC estimates within a sync.\n', + title='estimate type', + ) + namespace: Optional[str] = Field(None, description='The namespace of the stream') + row_estimate: Optional[int] = Field( + None, + description='The estimated number of rows to be emitted by this sync for this stream', + ) + byte_estimate: Optional[int] = Field( + None, + description='The estimated number of bytes to be emitted by this sync for this stream', + ) + + +class AirbyteStreamStatus(Enum): + STARTED = 'STARTED' + RUNNING = 'RUNNING' + COMPLETE = 'COMPLETE' + INCOMPLETE = 'INCOMPLETE' + + +class AirbyteStreamStatusReasonType(Enum): + RATE_LIMITED = 'RATE_LIMITED' + + +class AirbyteStreamStatusRateLimitedReason(BaseModel): + quota_reset: Optional[int] = Field( + None, + description='Optional time in ms representing when the API quota is going to be reset', + ) + + +class AirbyteStreamStatusReason(BaseModel): + type: AirbyteStreamStatusReasonType + rate_limited: Optional[AirbyteStreamStatusRateLimitedReason] = None + + +class AirbyteStreamStatusTraceMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + stream_descriptor: StreamDescriptor = Field( + ..., description='The stream associated with the status' + ) + status: AirbyteStreamStatus = Field( + ..., description='The current status of the stream' + ) + reasons: Optional[List[AirbyteStreamStatusReason]] = Field( + None, description='The reasons associated with the status of the stream' + ) + + +class AirbyteAnalyticsTraceMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + type: str = Field( + ..., + description='The event type - should be a static string. Keep in mind that all strings are shared across all connectors.', + ) + value: Optional[str] = Field( + None, + description='The value of the event - can be an arbitrary string. In case the value is numeric, it should be converted to a string. Casting for analytics purposes can happen in the warehouse.', + ) + + +class OrchestratorType(Enum): + CONNECTOR_CONFIG = 'CONNECTOR_CONFIG' + + +class AirbyteControlConnectorConfigMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + config: Dict[str, Any] = Field( + ..., description="the config items from this connector's spec to update" + ) + + +class Status(Enum): + SUCCEEDED = 'SUCCEEDED' + FAILED = 'FAILED' + + +class AirbyteConnectionStatus(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + status: Status + message: Optional[str] = None + + +class SyncMode(Enum): + full_refresh = 'full_refresh' + incremental = 'incremental' + + +class DestinationSyncMode(Enum): + append = 'append' + overwrite = 'overwrite' + append_dedup = 'append_dedup' + update = 'update' + soft_delete = 'soft_delete' + + +class DestinationOperation(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + object_name: str = Field(..., description='The name of the destination object.') + sync_mode: DestinationSyncMode = Field( + ..., description='The sync mode to be performed on the destination object.' + ) + json_schema: Dict[str, Any] = Field( + ..., description='Stream schema using Json Schema specs.' + ) + matching_keys: Optional[List[List[str]]] = Field( + None, + description='A list of keys that can be used to match a record in the destination object. The inner array of strings represents a nested object path.', + ) + + +class OAuth2Specification(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + rootObject: Optional[List[Union[str, int]]] = Field( + None, + description="A list of strings representing a pointer to the root object which contains any oauth parameters in the ConnectorSpecification. Examples: if oauth parameters were contained inside the top level, rootObject=[] If they were nested inside another object {'credentials': {'app_id' etc...}, rootObject=['credentials'] If they were inside a oneOf {'switch': {oneOf: [{client_id...}, {non_oauth_param]}}, rootObject=['switch', 0] ", + ) + oauthFlowInitParameters: Optional[List[List[str]]] = Field( + None, + description="Pointers to the fields in the rootObject needed to obtain the initial refresh/access tokens for the OAuth flow. Each inner array represents the path in the rootObject of the referenced field. For example. Assume the rootObject contains params 'app_secret', 'app_id' which are needed to get the initial refresh token. If they are not nested in the rootObject, then the array would look like this [['app_secret'], ['app_id']] If they are nested inside an object called 'auth_params' then this array would be [['auth_params', 'app_secret'], ['auth_params', 'app_id']]", + ) + oauthFlowOutputParameters: Optional[List[List[str]]] = Field( + None, + description='Pointers to the fields in the rootObject which can be populated from successfully completing the oauth flow using the init parameters. This is typically a refresh/access token. Each inner array represents the path in the rootObject of the referenced field.', + ) + + +class AuthType(Enum): + oauth2_0 = 'oauth2.0' + + +class AuthSpecification(BaseModel): + auth_type: Optional[AuthType] = None + oauth2Specification: Optional[OAuth2Specification] = Field( + None, + description='If the connector supports OAuth, this field should be non-null.', + ) + + +class AuthFlowType(Enum): + oauth2_0 = 'oauth2.0' + oauth1_0 = 'oauth1.0' + + +class State(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + min: Optional[int] = None + max: Optional[int] = None + + +class OauthConnectorInputSpecification(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + consent_url: str = Field( + ..., + description='The OAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.\n\nExamples:\n {\n "consent_url": "https://domain.host.com/oauth2/authorize?{{ client_id_key }}={{ client_id_value }}&{{ redirect_uri_key }}={{ {{redirect_uri_value}} | urlEncoder }}&{{ scope_key }}={{ {{scope_key}} | urlEncoder }}&{{ state_key }}={{ state_value }}&subdomain={{ subdomain }}"\n }', + ) + scope: Optional[str] = Field( + None, + description='The OAuth Specific string of the scopes needed to be grant for authenticated user.\n\nExamples:\n {\n "scope": "user:read user:read_orders workspaces:read"\n }', + ) + access_token_url: str = Field( + ..., + description='The OAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.\n\nExamples:\n {\n "access_token_url": "https://auth.host.com/oauth2/token?{{ client_id_key }}={{ client_id_value }}&{{ client_secret_key }}={{ client_secret_value }}&{{ auth_code_key }}={{ auth_code_value }}&{{ redirect_uri_key }}={{ {{redirect_uri_key}} | urlEncoder }}"\n }', + ) + access_token_headers: Optional[Dict[str, Any]] = Field( + None, + description='The OAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\n\nExamples:\n In this example the header value will be encoded with the base64 hash from `client_id` and `client_secret`,\n to provide neccessary header to complete auth flow and obtain the `access_token`.\n\n {\n "access_token_headers": {\n "Authorization": "Basic {{ {{ client_id_value }}:{{ client_secret_value }} | base64Encoder }}"\n }\n }', + ) + access_token_params: Optional[Dict[str, Any]] = Field( + None, + description='The OAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params are encoded as `Json string` and passed to the outgoing API request.\n\nExamples:\n {\n "access_token_params": {\n "my_query_param": "param_value",\n "{{ client_id_key }}": "{{ client_id_value }}"\n }\n }', + ) + extract_output: Optional[List[str]] = Field( + None, + description='The OAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.\n\nExamples:\n In this example, once the OAuthFlow has been completed successfully, \n the `access_token`, `refresh_token` and other fields like `other_field` will be checked and extracted from the OAuthOutput.\n\n {\n "extract_output": ["access_token", "refresh_token", "other_field"]\n }', + ) + state: Optional[State] = Field( + None, + description='The OAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.\n\nExamples:\n {\n "state": {\n "min": 7,\n "max": 128,\n }\n }', + ) + client_id_key: Optional[str] = Field( + None, + description='The OAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.\n\nExamples:\n {\n "client_id_key": "my_custom_client_id_key_name"\n }', + ) + client_secret_key: Optional[str] = Field( + None, + description='The OAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.\n\nExamples:\n {\n "client_secret_key": "my_custom_client_secret_key_name"\n }', + ) + scope_key: Optional[str] = Field( + None, + description='The OAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.\n\nExamples:\n {\n "scope_key": "my_custom_scope_key_key_name"\n }', + ) + state_key: Optional[str] = Field( + None, + description='The OAuth Specific optional override to provide the custom `state` key name, if required by data-provider.\n\nExamples:\n {\n "state_key": "my_custom_state_key_key_name"\n }', + ) + auth_code_key: Optional[str] = Field( + None, + description='The OAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.\n\nExamples:\n {\n "auth_code_key": "my_custom_auth_code_key_name"\n }', + ) + redirect_uri_key: Optional[str] = Field( + None, + description='The OAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.\n\nExamples:\n {\n "redirect_uri_key": "my_custom_redirect_uri_key_name"\n }', + ) + token_expiry_key: Optional[str] = Field( + None, + description='The OAuth Specific optional override to provide the custom key name to something like `expires_at`, if required by data-provider.\n\nExamples:\n {\n "token_expiry_key": "expires_at"\n }', + ) + + +class OAuthConfigSpecification(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = ( + Field( + None, + description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\n\nExamples:\n\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", + ) + ) + oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = ( + Field( + None, + description='OAuth specific blob. Pertains to the fields defined by the connector relating to the OAuth flow.', + ) + ) + complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( + None, + description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\n\nExamples:\n With a `refresh_token` in the `oauth response`:\n\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token'],\n path_in_oauth_response: ['refresh_token']\n }\n }\n \n With a nested `refresh_token` under the `data` object, in the `oauth response`:\n \n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token'],\n path_in_oauth_response: ['data', 'refresh_token']\n }\n }", + ) + complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( + None, + description='OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\n\nExamples:\n\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }', + ) + complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( + None, + description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\n\nExamples:\n\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", + ) + + +class StreamMapperType(Enum): + hashing = 'hashing' + field_renaming = 'field-renaming' + row_filtering = 'row-filtering' + encryption = 'encryption' + field_filtering = 'field-filtering' + + +class Method(Enum): + MD2 = 'MD2' + MD5 = 'MD5' + SHA_1 = 'SHA-1' + SHA_256 = 'SHA-256' + SHA_384 = 'SHA-384' + SHA_512 = 'SHA-512' + + +class HashingMapperConfiguration(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + target_field: str = Field(..., description='The field to hash') + field_name_suffix: Optional[str] = Field( + None, description='Suffix to append to the hashed field name' + ) + method: Method = Field(..., description='Hashing algorithm to use') + + +class FieldRenamingMapperConfiguration(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + original_field_name: str = Field( + ..., description='The original field name to rename' + ) + new_field_name: str = Field(..., description='The new field name') + + +class Type1(Enum): + EQUAL = 'EQUAL' + NOT = 'NOT' + + +class FilterCondition(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + type: Type1 + field_name: Optional[str] = Field( + None, description='Field name for EQUAL conditions' + ) + comparison_value: Optional[str] = Field( + None, description='Value to compare against for EQUAL conditions' + ) + condition: Optional[FilterCondition] = Field( + None, description='Nested condition for NOT conditions' + ) + + +class Algorithm(Enum): + RSA = 'RSA' + AES = 'AES' + + +class Mode(Enum): + GCM = 'GCM' + CBC = 'CBC' + ECB = 'ECB' + + +class Padding(Enum): + NoPadding = 'NoPadding' + PKCS5Padding = 'PKCS5Padding' + + +class EncryptionMapperConfiguration(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + algorithm: Algorithm + target_field: str = Field(..., description='The field to encrypt') + field_name_suffix: Optional[str] = Field( + None, description='Suffix to append to the encrypted field name' + ) + public_key: Optional[str] = Field( + None, description='RSA public key in hex-encoded DER format (required for RSA)' + ) + key: Optional[str] = Field( + None, description='AES encryption key (required for AES)' + ) + mode: Optional[Mode] = Field( + None, description='AES encryption mode (required for AES)' + ) + padding: Optional[Padding] = Field( + None, description='AES padding scheme (required for AES)' + ) + + +class FieldFilteringMapperConfiguration(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + target_field: str = Field(..., description='The field to filter out') + + +class AirbyteRecordMessageMeta(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + changes: Optional[List[AirbyteRecordMessageMetaChange]] = Field( + None, + description='Lists of changes to the content of this record which occurred during syncing', + ) + + +class AirbyteStreamState(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + stream_descriptor: StreamDescriptor + stream_state: Optional[AirbyteStateBlob] = None + + +class AirbyteGlobalState(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + shared_state: Optional[AirbyteStateBlob] = None + stream_states: List[AirbyteStreamState] + + +class AirbyteTraceMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + type: TraceType = Field( + ..., description='the type of trace message', title='trace type' + ) + emitted_at: float = Field( + ..., description='the time in ms that the message was emitted' + ) + error: Optional[AirbyteErrorTraceMessage] = Field( + None, description='error trace message: the error object' + ) + estimate: Optional[AirbyteEstimateTraceMessage] = Field( + None, + description='Estimate trace message: a guess at how much data will be produced in this sync', + ) + stream_status: Optional[AirbyteStreamStatusTraceMessage] = Field( + None, + description='Stream status trace message: the current status of a stream within a source', + ) + analytics: Optional[AirbyteAnalyticsTraceMessage] = None + + +class AirbyteControlMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + type: OrchestratorType = Field( + ..., description='the type of orchestrator message', title='orchestrator type' + ) + emitted_at: float = Field( + ..., description='the time in ms that the message was emitted' + ) + connectorConfig: Optional[AirbyteControlConnectorConfigMessage] = Field( + None, + description='connector config orchestrator message: the updated config for the platform to store for this connector', + ) + + +class AirbyteStream(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + name: str = Field(..., description="Stream's name.") + json_schema: Dict[str, Any] = Field( + ..., description='Stream schema using Json Schema specs.' + ) + supported_sync_modes: List[SyncMode] = Field( + ..., description='List of sync modes supported by this stream.', min_length=1 + ) + source_defined_cursor: Optional[bool] = Field( + None, + description='If the source defines the cursor field, then any other cursor field inputs will be ignored. If it does not,\neither the user_provided one is used, or the default one is used as a backup. This field must be set if\nis_resumable is set to true, including resumable full refresh synthetic cursors.', + ) + default_cursor_field: Optional[List[str]] = Field( + None, + description='Path to the field that will be used to determine if a record is new or modified since the last sync. If not provided by the source, the end user will have to specify the comparable themselves.', + ) + source_defined_primary_key: Optional[List[List[str]]] = Field( + None, + description='If the source defines the primary key, paths to the fields that will be used as a primary key. If not provided by the source, the end user will have to specify the primary key themselves.', + ) + namespace: Optional[str] = Field( + None, + description='Optional Source-defined namespace. Currently only used by JDBC destinations to determine what schema to write to. Airbyte streams from the same sources should have the same namespace.', + ) + is_resumable: Optional[bool] = Field( + None, + description='If the stream is resumable or not. Should be set to true if the stream supports incremental. Defaults to false.\nPrimarily used by the Platform in Full Refresh to determine if a Full Refresh stream should actually be treated as incremental within a job.', + ) + is_file_based: Optional[bool] = Field( + None, description='This stream describes a stream of files and their metadata.' + ) + + +class DestinationCatalog(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + operations: List[DestinationOperation] = Field( + ..., + description='An array of operations that can be performed on destination objects.', + ) + + +class AdvancedAuth(BaseModel): + auth_flow_type: Optional[AuthFlowType] = None + predicate_key: Optional[List[str]] = Field( + None, + description='Json Path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.', + ) + predicate_value: Optional[str] = Field( + None, + description='Value of the predicate_key fields for the advanced auth to be applicable.', + ) + oauth_config_specification: Optional[OAuthConfigSpecification] = None + + +class ConnectorSpecification(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + documentationUrl: Optional[AnyUrl] = None + changelogUrl: Optional[AnyUrl] = None + connectionSpecification: Dict[str, Any] = Field( + ..., + description='ConnectorDefinition specific blob. Must be a valid JSON string.', + ) + supportsIncremental: Optional[bool] = Field( + None, + description='(deprecated) If the connector supports incremental mode or not.', + ) + supportsNormalization: Optional[bool] = Field( + False, description='If the connector supports normalization or not.' + ) + supportsDBT: Optional[bool] = Field( + False, description='If the connector supports DBT or not.' + ) + supported_destination_sync_modes: Optional[List[DestinationSyncMode]] = Field( + None, description='List of destination sync modes supported by the connector' + ) + authSpecification: Optional[AuthSpecification] = Field( + None, description='deprecated, switching to advanced_auth instead' + ) + advanced_auth: Optional[AdvancedAuth] = Field( + None, + description="Additional and optional specification object to describe what an 'advanced' Auth flow would need to function.\n - A connector should be able to fully function with the configuration as described by the ConnectorSpecification in a 'basic' mode.\n - The 'advanced' mode provides easier UX for the user with UI improvements and automations. However, this requires further setup on the\n server side by instance or workspace admins beforehand. The trade-off is that the user does not have to provide as many technical\n inputs anymore and the auth process is faster and easier to complete.", + ) + protocol_version: Optional[str] = Field( + None, + description='the Airbyte Protocol version supported by the connector. Protocol versioning uses SemVer. ', + ) + + +class RowFilteringMapperConfiguration(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + conditions: FilterCondition + + +class AirbyteRecordMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + namespace: Optional[str] = Field( + None, description='namespace the data is associated with' + ) + stream: str = Field(..., description='stream the data is associated with') + data: Dict[str, Any] = Field(..., description='record data') + emitted_at: int = Field( + ..., + description='when the data was emitted from the source. epoch in millisecond.', + ) + meta: Optional[AirbyteRecordMessageMeta] = Field( + None, description='Information about this record added mid-sync' + ) + file_reference: Optional[AirbyteRecordMessageFileReference] = Field( + None, + description='An internal facing reference to the file described by the record if applicable.', + ) + + +class AirbyteStateMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + type: Optional[AirbyteStateType] = None + stream: Optional[AirbyteStreamState] = None + global_: Optional[AirbyteGlobalState] = Field(None, alias='global') + data: Optional[Dict[str, Any]] = Field( + None, description='(Deprecated) the state data' + ) + sourceStats: Optional[AirbyteStateStats] = None + destinationStats: Optional[AirbyteStateStats] = None + + +class AirbyteCatalog(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + streams: List[AirbyteStream] + + +class MapperConfiguration( + RootModel[ + Union[ + HashingMapperConfiguration, + FieldRenamingMapperConfiguration, + RowFilteringMapperConfiguration, + EncryptionMapperConfiguration, + FieldFilteringMapperConfiguration, + ] + ] +): + root: Union[ + HashingMapperConfiguration, + FieldRenamingMapperConfiguration, + RowFilteringMapperConfiguration, + EncryptionMapperConfiguration, + FieldFilteringMapperConfiguration, + ] = Field(..., description='Configuration for a specific mapper type') + + +class AirbyteMessage(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + type: Type = Field(..., description='Message type') + log: Optional[AirbyteLogMessage] = Field( + None, + description='log message: any kind of logging you want the platform to know about.', + ) + spec: Optional[ConnectorSpecification] = None + connectionStatus: Optional[AirbyteConnectionStatus] = None + catalog: Optional[AirbyteCatalog] = Field( + None, description='catalog message: the catalog' + ) + record: Optional[AirbyteRecordMessage] = Field( + None, description='record message: the record' + ) + state: Optional[AirbyteStateMessage] = Field( + None, + description='schema message: the state. Must be the last message produced. The platform uses this information', + ) + trace: Optional[AirbyteTraceMessage] = Field( + None, + description='trace message: a message to communicate information about the status and performance of a connector', + ) + control: Optional[AirbyteControlMessage] = Field( + None, + description='connector config message: a message to communicate an updated configuration from a connector that should be persisted', + ) + destination_catalog: Optional[DestinationCatalog] = Field( + None, description='destination catalog message: the catalog' + ) + + +class ConfiguredStreamMapper(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + id: Optional[str] = Field( + None, description='Optional unique identifier for the mapper' + ) + type: StreamMapperType + mapper_configuration: MapperConfiguration + + +class ConfiguredAirbyteStream(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + stream: AirbyteStream + sync_mode: SyncMode + cursor_field: Optional[List[str]] = Field( + None, + description='Path to the field that will be used to determine if a record is new or modified since the last sync. This field is REQUIRED if `sync_mode` is `incremental`. Otherwise it is ignored.', + ) + destination_sync_mode: DestinationSyncMode + destination_object_name: Optional[str] = None + primary_key: Optional[List[List[str]]] = Field( + None, + description='Paths to the fields that will be used as primary key. This field is REQUIRED if `destination_sync_mode` is `*_dedup`. Otherwise it is ignored.', + ) + generation_id: Optional[int] = Field( + None, + description='Monotically increasing numeric id representing the current generation of a stream. This id can be shared across syncs.\nIf this is null, it means that the platform is not supporting the refresh and it is expected that no extra id will be added to the records and no data from previous generation will be cleanup. ', + ) + minimum_generation_id: Optional[int] = Field( + None, + description='The minimum generation id which is needed in a stream. If it is present, the destination will try to delete the data that are part of a generation lower than this property. If the minimum generation is equals to 0, no data deletion is expected from the destiantion\nIf this is null, it means that the platform is not supporting the refresh and it is expected that no extra id will be added to the records and no data from previous generation will be cleanup. ', + ) + sync_id: Optional[int] = Field( + None, + description='Monotically increasing numeric id representing the current sync id. This is aimed to be unique per sync.\nIf this is null, it means that the platform is not supporting the refresh and it is expected that no extra id will be added to the records and no data from previous generation will be cleanup. ', + ) + include_files: Optional[bool] = Field( + None, + description='If the stream is_file_based, determines whether to include the associated files in the sync. Otherwise, this property will be ignored.', + ) + mappers: Optional[List[ConfiguredStreamMapper]] = Field( + None, + description='List of mappers to apply to this stream for data transformation', + ) + + +class ConfiguredAirbyteCatalog(BaseModel): + model_config = ConfigDict( + extra='allow', + ) + streams: List[ConfiguredAirbyteStream] + + +class AirbyteProtocol(BaseModel): + airbyte_message: Optional[AirbyteMessage] = None + configured_airbyte_catalog: Optional[ConfiguredAirbyteCatalog] = None + + +FilterCondition.model_rebuild() diff --git a/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/well_known_types.py b/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/well_known_types.py new file mode 100644 index 00000000..b23f5a38 --- /dev/null +++ b/protocol-models/python/airbyte_protocol_pdv2/airbyte_protocol/models/well_known_types.py @@ -0,0 +1,108 @@ +# generated by datamodel-codegen: +# filename: well_known_types.yaml + +from __future__ import annotations + +from enum import Enum +from typing import Any, Union + +from pydantic import Field, RootModel, constr + + +class Model(RootModel[Any]): + root: Any + + +class String(RootModel[str]): + root: str = Field(..., description='Arbitrary text') + + +class BinaryData( + RootModel[ + constr( + pattern=r'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$' + ) + ] +): + root: constr( + pattern=r'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$' + ) = Field( + ..., + description='Arbitrary binary data. Represented as base64-encoded strings in the JSON transport. In the future, if we support other transports, may be encoded differently.\n', + ) + + +class Date(RootModel[constr(pattern=r'^\d{4}-\d{2}-\d{2}( BC)?$')]): + root: constr(pattern=r'^\d{4}-\d{2}-\d{2}( BC)?$') = Field( + ..., description="RFC 3339§5.6's full-date format, extended with BC era support" + ) + + +class TimestampWithTimezone( + RootModel[ + constr( + pattern=r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+\-]\d{1,2}:\d{2})( BC)?$' + ) + ] +): + root: constr( + pattern=r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+\-]\d{1,2}:\d{2})( BC)?$' + ) = Field( + ..., + description='An instant in time. Frequently simply referred to as just a timestamp, or timestamptz. Uses RFC 3339§5.6\'s date-time format, requiring a "T" separator, and extended with BC era support. Note that we do _not_ accept Unix epochs here.\n', + ) + + +class TimestampWithoutTimezone( + RootModel[constr(pattern=r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?( BC)?$')] +): + root: constr(pattern=r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?( BC)?$') = ( + Field( + ..., + description='Also known as a localdatetime, or just datetime. Under RFC 3339§5.6, this would be represented as `full-date "T" partial-time`, extended with BC era support.\n', + ) + ) + + +class TimeWithTimezone( + RootModel[constr(pattern=r'^\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+\-]\d{1,2}:\d{2})$')] +): + root: constr(pattern=r'^\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+\-]\d{1,2}:\d{2})$') = Field( + ..., description='An RFC 3339§5.6 full-time' + ) + + +class TimeWithoutTimezone(RootModel[constr(pattern=r'^\d{2}:\d{2}:\d{2}(\.\d+)?$')]): + root: constr(pattern=r'^\d{2}:\d{2}:\d{2}(\.\d+)?$') = Field( + ..., description='An RFC 3339§5.6 partial-time' + ) + + +class Number1(Enum): + Infinity = 'Infinity' + field_Infinity = '-Infinity' + NaN = 'NaN' + + +class Number(RootModel[Union[constr(pattern=r'-?(0|[0-9]\d*)(\.\d+)?'), Number1]]): + root: Union[constr(pattern=r'-?(0|[0-9]\d*)(\.\d+)?'), Number1] = Field( + ..., + description='Note the mix of regex validation for normal numbers, and enum validation for special values.', + ) + + +class Integer1(Enum): + Infinity = 'Infinity' + field_Infinity = '-Infinity' + NaN = 'NaN' + + +class Integer(RootModel[Union[constr(pattern=r'-?(0|[0-9]\d*)'), Integer1]]): + root: Union[constr(pattern=r'-?(0|[0-9]\d*)'), Integer1] + + +class Boolean(RootModel[bool]): + root: bool = Field( + ..., + description="Note the direct usage of a primitive boolean rather than string. Unlike Numbers and Integers, we don't expect unusual values here.", + ) diff --git a/protocol-models/src/main/resources/airbyte_protocol/v0/airbyte_protocol.yaml b/protocol-models/src/main/resources/airbyte_protocol/v0/airbyte_protocol.yaml index 3aed9378..2b807162 100644 --- a/protocol-models/src/main/resources/airbyte_protocol/v0/airbyte_protocol.yaml +++ b/protocol-models/src/main/resources/airbyte_protocol/v0/airbyte_protocol.yaml @@ -586,6 +586,11 @@ definitions: type: boolean description: |- If the stream is_file_based, determines whether to include the associated files in the sync. Otherwise, this property will be ignored. + mappers: + description: List of mappers to apply to this stream for data transformation + type: array + items: + "$ref": "#/definitions/ConfiguredStreamMapper" SyncMode: type: string enum: @@ -1026,3 +1031,144 @@ definitions: } type: object existingJavaType: com.fasterxml.jackson.databind.JsonNode + ConfiguredStreamMapper: + type: object + additionalProperties: true + required: + - type + - mapper_configuration + properties: + id: + description: Optional unique identifier for the mapper + type: string + type: + "$ref": "#/definitions/StreamMapperType" + mapper_configuration: + "$ref": "#/definitions/MapperConfiguration" + StreamMapperType: + type: string + description: The type of mapper to apply + enum: + - hashing + - field-renaming + - row-filtering + - encryption + - field-filtering + MapperConfiguration: + description: Configuration for a specific mapper type + oneOf: + - "$ref": "#/definitions/HashingMapperConfiguration" + - "$ref": "#/definitions/FieldRenamingMapperConfiguration" + - "$ref": "#/definitions/RowFilteringMapperConfiguration" + - "$ref": "#/definitions/EncryptionMapperConfiguration" + - "$ref": "#/definitions/FieldFilteringMapperConfiguration" + HashingMapperConfiguration: + type: object + additionalProperties: true + required: + - target_field + - method + properties: + target_field: + description: The field to hash + type: string + field_name_suffix: + description: Suffix to append to the hashed field name + type: string + method: + description: Hashing algorithm to use + type: string + enum: + - MD2 + - MD5 + - SHA-1 + - SHA-256 + - SHA-384 + - SHA-512 + FieldRenamingMapperConfiguration: + type: object + additionalProperties: true + required: + - original_field_name + - new_field_name + properties: + original_field_name: + description: The original field name to rename + type: string + new_field_name: + description: The new field name + type: string + RowFilteringMapperConfiguration: + type: object + additionalProperties: true + required: + - conditions + properties: + conditions: + "$ref": "#/definitions/FilterCondition" + FilterCondition: + type: object + additionalProperties: true + required: + - type + properties: + type: + type: string + enum: + - EQUAL + - NOT + field_name: + description: Field name for EQUAL conditions + type: string + comparison_value: + description: Value to compare against for EQUAL conditions + type: string + condition: + description: Nested condition for NOT conditions + "$ref": "#/definitions/FilterCondition" + EncryptionMapperConfiguration: + type: object + additionalProperties: true + required: + - algorithm + - target_field + properties: + algorithm: + type: string + enum: + - RSA + - AES + target_field: + description: The field to encrypt + type: string + field_name_suffix: + description: Suffix to append to the encrypted field name + type: string + public_key: + description: RSA public key in hex-encoded DER format (required for RSA) + type: string + key: + description: AES encryption key (required for AES) + type: string + mode: + description: AES encryption mode (required for AES) + type: string + enum: + - GCM + - CBC + - ECB + padding: + description: AES padding scheme (required for AES) + type: string + enum: + - NoPadding + - PKCS5Padding + FieldFilteringMapperConfiguration: + type: object + additionalProperties: true + required: + - target_field + properties: + target_field: + description: The field to filter out + type: string