diff --git a/airbyte_cdk/manifest_migrations/README.md b/airbyte_cdk/manifest_migrations/README.md index ef85fc8be..6216279b6 100644 --- a/airbyte_cdk/manifest_migrations/README.md +++ b/airbyte_cdk/manifest_migrations/README.md @@ -21,7 +21,7 @@ This directory contains the logic and registry for manifest migrations in the Ai 3. **Register the Migration:** - Open `migrations/registry.yaml`. - Add an entry under the appropriate version, or create a new version section if needed. - - Version can be: "*", "==6.48.3", "~=1.2", ">=1.0.0,<2.0.0", "6.48.3" + - Version can be: "\*", "==6.48.3", "~=1.2", ">=1.0.0,<2.0.0", "6.48.3" - Each migration entry should include: - `name`: The filename (without `.py`) - `order`: The order in which this migration should be applied for the version diff --git a/airbyte_cdk/sources/declarative/generated/declarative_component_schema.json b/airbyte_cdk/sources/declarative/generated/declarative_component_schema.json new file mode 100644 index 000000000..f38a077e7 --- /dev/null +++ b/airbyte_cdk/sources/declarative/generated/declarative_component_schema.json @@ -0,0 +1,5809 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml", + "title": "DeclarativeSource", + "type": "object", + "description": "An API source that extracts data according to its declarative components.", + "version": "1.0.0", + "required": ["type", "check", "version"], + "anyOf": [ + { + "required": ["streams"] + }, + { + "required": ["dynamic_streams"] + } + ], + "properties": { + "type": { + "type": "string", + "enum": ["DeclarativeSource"] + }, + "check": { + "anyOf": [ + { + "$ref": "#/definitions/CheckStream" + }, + { + "$ref": "#/definitions/CheckDynamicStream" + } + ] + }, + "streams": { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/ConditionalStreams" + }, + { + "$ref": "#/definitions/DeclarativeStream" + }, + { + "$ref": "#/definitions/StateDelegatingStream" + } + ] + } + }, + "dynamic_streams": { + "type": "array", + "items": { + "$ref": "#/definitions/DynamicDeclarativeStream" + } + }, + "version": { + "type": "string", + "description": "The version of the Airbyte CDK used to build and test the source." + }, + "schemas": { + "$ref": "#/definitions/Schemas" + }, + "definitions": { + "type": "object" + }, + "spec": { + "$ref": "#/definitions/Spec" + }, + "concurrency_level": { + "$ref": "#/definitions/ConcurrencyLevel" + }, + "api_budget": { + "$ref": "#/definitions/HTTPAPIBudget" + }, + "max_concurrent_async_job_count": { + "title": "Maximum Concurrent Asynchronous Jobs", + "description": "Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.", + "type": ["integer", "string"], + "examples": [3, "{{ config['max_concurrent_async_job_count'] }}"] + }, + "metadata": { + "type": "object", + "description": "For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", + "additionalProperties": true + }, + "description": { + "type": "string", + "description": "A description of the connector. It will be presented on the Source documentation page." + } + }, + "additionalProperties": false, + "definitions": { + "AddedFieldDefinition": { + "title": "Definition Of Field To Add", + "description": "Defines the field to add on a record.", + "type": "object", + "required": ["type", "path", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["AddedFieldDefinition"] + }, + "path": { + "title": "Path", + "description": "List of strings defining the path where to add the value on the record.", + "type": "array", + "items": { + "type": "string" + }, + "examples": [["segment_id"], ["metadata", "segment_id"]] + }, + "value": { + "title": "Value", + "description": "Value of the new field. Use {{ record['existing_field'] }} syntax to refer to other fields in the record.", + "type": "string", + "interpolation_context": [ + "config", + "record", + "stream_interval", + "stream_partition", + "stream_slice" + ], + "examples": [ + "{{ record['updates'] }}", + "{{ record['MetaData']['LastUpdatedTime'] }}", + "{{ stream_partition['segment_id'] }}" + ] + }, + "value_type": { + "title": "Value Type", + "description": "Type of the value. If not specified, the type will be inferred from the value.", + "$ref": "#/definitions/ValueType" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "AddFields": { + "title": "Add Fields", + "description": "Transformation which adds field to an output record. The path of the added field can be nested.", + "type": "object", + "required": ["type", "fields"], + "properties": { + "type": { + "type": "string", + "enum": ["AddFields"] + }, + "fields": { + "title": "Fields", + "description": "List of transformations (path and corresponding value) that will be added to the record.", + "type": "array", + "items": { + "$ref": "#/definitions/AddedFieldDefinition" + } + }, + "condition": { + "description": "Fields will be added if expression is evaluated to True.", + "type": "string", + "default": "", + "interpolation_context": ["config", "property", "parameters"], + "examples": [ + "{{ property|string == '' }}", + "{{ property is integer }}", + "{{ property|length > 5 }}", + "{{ property == 'some_string_to_match' }}" + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ApiKeyAuthenticator": { + "title": "API Key Authenticator", + "description": "Authenticator for requests authenticated with an API token injected as an HTTP request header.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["ApiKeyAuthenticator"] + }, + "api_token": { + "title": "API Key", + "description": "The API key to inject in the request. Fill it in the user inputs.", + "type": "string", + "interpolation_context": ["config"], + "examples": [ + "{{ config['api_key'] }}", + "Token token={{ config['api_key'] }}" + ] + }, + "header": { + "title": "Header Name", + "description": "The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["Authorization", "Api-Token", "X-Auth-Token"] + }, + "inject_into": { + "title": "Inject API Key Into Outgoing HTTP Request", + "description": "Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.", + "$ref": "#/definitions/RequestOption", + "examples": [ + { + "inject_into": "header", + "field_name": "Authorization" + }, + { + "inject_into": "request_parameter", + "field_name": "authKey" + } + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "AuthFlow": { + "title": "Auth flow", + "description": "Additional and optional specification object to describe what an 'advanced' Auth flow would need to function.\n - A connector should be able to fully function with the configuration as described by the ConnectorSpecification in a 'basic' mode.\n - The 'advanced' mode provides easier UX for the user with UI improvements and automations. However, this requires further setup on the\n server side by instance or workspace admins beforehand. The trade-off is that the user does not have to provide as many technical\n inputs anymore and the auth process is faster and easier to complete.", + "type": "object", + "properties": { + "auth_flow_type": { + "title": "Auth flow type", + "description": "The type of auth to use", + "type": "string", + "enum": ["oauth2.0", "oauth1.0"] + }, + "predicate_key": { + "title": "Predicate key", + "description": "JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.", + "type": "array", + "items": { + "type": "string" + }, + "examples": [["credentials", "auth_type"]] + }, + "predicate_value": { + "title": "Predicate value", + "description": "Value of the predicate_key fields for the advanced auth to be applicable.", + "type": "string", + "examples": ["Oauth"] + }, + "oauth_config_specification": { + "$ref": "#/definitions/OAuthConfigSpecification" + } + } + }, + "BasicHttpAuthenticator": { + "title": "Basic HTTP Authenticator", + "description": "Authenticator for requests authenticated with the Basic HTTP authentication scheme, which encodes a username and an optional password in the Authorization request header.", + "type": "object", + "required": ["type", "username"], + "properties": { + "type": { + "type": "string", + "enum": ["BasicHttpAuthenticator"] + }, + "username": { + "title": "Username", + "description": "The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["{{ config['username'] }}", "{{ config['api_key'] }}"] + }, + "password": { + "title": "Password", + "description": "The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.", + "type": "string", + "default": "", + "interpolation_context": ["config"], + "examples": ["{{ config['password'] }}", ""] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "BearerAuthenticator": { + "title": "Bearer Token Authenticator", + "description": "Authenticator for requests authenticated with a bearer token injected as a request header of the form `Authorization: Bearer `.", + "type": "object", + "required": ["type", "api_token"], + "properties": { + "type": { + "type": "string", + "enum": ["BearerAuthenticator"] + }, + "api_token": { + "title": "Bearer Token", + "description": "Token to inject as request header for authenticating with the API.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["{{ config['api_key'] }}", "{{ config['token'] }}"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "SelectiveAuthenticator": { + "title": "Selective Authenticator", + "description": "Authenticator that selects concrete authenticator based on config property.", + "type": "object", + "additionalProperties": true, + "required": ["type", "authenticators", "authenticator_selection_path"], + "properties": { + "type": { + "type": "string", + "enum": ["SelectiveAuthenticator"] + }, + "authenticator_selection_path": { + "title": "Authenticator Selection Path", + "description": "Path of the field in config with selected authenticator name", + "type": "array", + "items": { + "type": "string" + }, + "examples": [["auth"], ["auth", "type"]] + }, + "authenticators": { + "title": "Authenticators", + "description": "Authenticators to select from.", + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "$ref": "#/definitions/ApiKeyAuthenticator" + }, + { + "$ref": "#/definitions/BasicHttpAuthenticator" + }, + { + "$ref": "#/definitions/BearerAuthenticator" + }, + { + "$ref": "#/definitions/OAuthAuthenticator" + }, + { + "$ref": "#/definitions/JwtAuthenticator" + }, + { + "$ref": "#/definitions/SessionTokenAuthenticator" + }, + { + "$ref": "#/definitions/LegacySessionTokenAuthenticator" + }, + { + "$ref": "#/definitions/CustomAuthenticator" + }, + { + "$ref": "#/definitions/NoAuth" + } + ] + }, + "examples": [ + { + "authenticators": { + "token": "#/definitions/ApiKeyAuthenticator", + "oauth": "#/definitions/OAuthAuthenticator", + "jwt": "#/definitions/JwtAuthenticator" + } + } + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CheckStream": { + "title": "Streams to Check", + "description": "Defines the streams to try reading when running a check operation.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["CheckStream"] + }, + "stream_names": { + "title": "Stream Names", + "description": "Names of the streams to try reading from when running a check operation.", + "type": "array", + "items": { + "type": "string" + }, + "examples": [["users"], ["users", "contacts"]] + }, + "dynamic_streams_check_configs": { + "type": "array", + "items": { + "$ref": "#/definitions/DynamicStreamCheckConfig" + } + } + } + }, + "DynamicStreamCheckConfig": { + "type": "object", + "required": ["type", "dynamic_stream_name"], + "properties": { + "type": { + "type": "string", + "enum": ["DynamicStreamCheckConfig"] + }, + "dynamic_stream_name": { + "title": "Dynamic Stream Name", + "description": "The dynamic stream name.", + "type": "string" + }, + "stream_count": { + "title": "Stream Count", + "description": "The number of streams to attempt reading from during a check operation. If `stream_count` exceeds the total number of available streams, the minimum of the two values will be used.", + "type": "integer", + "default": 0 + } + } + }, + "CheckDynamicStream": { + "title": "Dynamic Streams to Check", + "description": "(This component is experimental. Use at your own risk.) Defines the dynamic streams to try reading when running a check operation.", + "type": "object", + "required": ["type", "stream_count"], + "properties": { + "type": { + "type": "string", + "enum": ["CheckDynamicStream"] + }, + "stream_count": { + "title": "Stream Count", + "description": "Numbers of the streams to try reading from when running a check operation.", + "type": "integer" + }, + "use_check_availability": { + "title": "Use Check Availability", + "description": "Enables stream check availability. This field is automatically set by the CDK.", + "type": "boolean", + "default": true + } + } + }, + "CompositeErrorHandler": { + "title": "Composite Error Handler", + "description": "Error handler that sequentially iterates over a list of error handlers.", + "type": "object", + "required": ["type", "error_handlers"], + "properties": { + "type": { + "type": "string", + "enum": ["CompositeErrorHandler"] + }, + "error_handlers": { + "title": "Error Handlers", + "description": "List of error handlers to iterate on to determine how to handle a failed response.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/CompositeErrorHandler" + }, + { + "$ref": "#/definitions/DefaultErrorHandler" + } + ] + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ConcurrencyLevel": { + "title": "Concurrency Level", + "description": "Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time. Note that a value of 1 could create deadlock if a stream has a very high number of partitions.", + "type": "object", + "required": ["default_concurrency"], + "properties": { + "type": { + "type": "string", + "enum": ["ConcurrencyLevel"] + }, + "default_concurrency": { + "title": "Default Concurrency", + "description": "The amount of concurrency that will applied during a sync. This value can be hardcoded or user-defined in the config if different users have varying volume thresholds in the target API.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + } + ], + "interpolation_context": ["config"], + "examples": [10, "{{ config['num_workers'] or 10 }}"] + }, + "max_concurrency": { + "title": "Max Concurrency", + "description": "The maximum level of concurrency that will be used during a sync. This becomes a required field when the default_concurrency derives from the config, because it serves as a safeguard against a user-defined threshold that is too high.", + "type": "integer", + "examples": [20, 100] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ConditionalStreams": { + "title": "Conditional Streams", + "description": "Streams that are only available while performing a connector operation when the condition is met.", + "type": "object", + "required": ["type", "streams", "condition"], + "properties": { + "type": { + "type": "string", + "enum": ["ConditionalStreams"] + }, + "condition": { + "title": "Condition", + "description": "Condition that will be evaluated to determine if a set of streams should be available.", + "type": "string", + "interpolation_context": ["config", "parameters"], + "examples": ["{{ config['is_sandbox'] }}"] + }, + "streams": { + "title": "Streams", + "description": "Streams that will be used during an operation based on the condition.", + "type": "array", + "items": { + "$ref": "#/definitions/DeclarativeStream" + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ConstantBackoffStrategy": { + "title": "Constant Backoff", + "description": "Backoff strategy with a constant backoff interval.", + "type": "object", + "required": ["type", "backoff_time_in_seconds"], + "properties": { + "type": { + "type": "string", + "enum": ["ConstantBackoffStrategy"] + }, + "backoff_time_in_seconds": { + "title": "Backoff Time", + "description": "Backoff time in seconds.", + "anyOf": [ + { + "type": "number", + "title": "Number of seconds" + }, + { + "type": "string", + "title": "Interpolated Value" + } + ], + "interpolation_context": ["config"], + "examples": [30, 30.5, "{{ config['backoff_time'] }}"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CursorPagination": { + "title": "Cursor Pagination", + "description": "Pagination strategy that evaluates an interpolated string to define the next page to fetch.", + "type": "object", + "required": ["type", "cursor_value"], + "properties": { + "type": { + "type": "string", + "enum": ["CursorPagination"] + }, + "cursor_value": { + "title": "Cursor Value", + "description": "Value of the cursor defining the next page to fetch.", + "type": "string", + "interpolation_context": [ + "config", + "headers", + "last_page_size", + "last_record", + "response" + ], + "examples": [ + "{{ headers.link.next.cursor }}", + "{{ last_record['key'] }}", + "{{ response['nextPage'] }}" + ] + }, + "page_size": { + "title": "Page Size", + "description": "The number of records to include in each pages.", + "type": "integer", + "examples": [100] + }, + "stop_condition": { + "title": "Stop Condition", + "description": "Template string evaluating when to stop paginating.", + "type": "string", + "interpolation_context": [ + "config", + "headers", + "last_record", + "response" + ], + "examples": [ + "{{ response.data.has_more is false }}", + "{{ 'next' not in headers['link'] }}" + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomAuthenticator": { + "title": "Custom Authenticator", + "description": "Authenticator component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomAuthenticator"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.", + "type": "string", + "additionalProperties": true, + "examples": ["source_railz.components.ShortLivedTokenAuthenticator"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomBackoffStrategy": { + "title": "Custom Backoff Strategy", + "description": "Backoff strategy component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomBackoffStrategy"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomBackoffStrategy"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomErrorHandler": { + "title": "Custom Error Handler", + "description": "Error handler component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomErrorHandler"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomErrorHandler"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomIncrementalSync": { + "title": "Custom Incremental Sync", + "description": "Incremental component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name", "cursor_field"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomIncrementalSync"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.", + "type": "string", + "additionalProperties": true, + "examples": ["source_railz.components.MyCustomIncrementalSync"] + }, + "cursor_field": { + "description": "The location of the value on a record that will be used as a bookmark during sync.", + "type": "string" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomPaginationStrategy": { + "title": "Custom Pagination Strategy", + "description": "Pagination strategy component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomPaginationStrategy"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomPaginationStrategy"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomRecordExtractor": { + "title": "Custom Record Extractor", + "description": "Record extractor component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomRecordExtractor"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomRecordExtractor"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomRecordFilter": { + "title": "Custom Record Filter", + "description": "Record filter component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomRecordFilter"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom record filter strategy. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomCustomRecordFilter"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomRequester": { + "title": "Custom Requester", + "description": "Requester component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomRequester"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.", + "type": "string", + "additionalProperties": true, + "examples": ["source_railz.components.MyCustomRecordExtractor"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomRetriever": { + "title": "Custom Retriever", + "description": "Retriever component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomRetriever"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.", + "type": "string", + "additionalProperties": true, + "examples": ["source_railz.components.MyCustomRetriever"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomPartitionRouter": { + "title": "Custom Partition Router", + "description": "Partition router component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomPartitionRouter"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomPartitionRouter"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomSchemaLoader": { + "title": "Custom Schema Loader", + "description": "Schema Loader component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomSchemaLoader"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomSchemaLoader"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomSchemaNormalization": { + "title": "Custom Schema Normalization", + "description": "Schema normalization component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomSchemaNormalization"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_..`.", + "type": "string", + "additionalProperties": true, + "examples": [ + "source_amazon_seller_partner.components.LedgerDetailedViewReportsTypeTransformer" + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomStateMigration": { + "title": "Custom State Migration", + "description": "Apply a custom transformation on the input state.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomStateMigration"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom state migration. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomStateMigration"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "CustomTransformation": { + "title": "Custom Transformation", + "description": "Transformation component whose behavior is derived from a custom code implementation of the connector.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomTransformation"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.", + "type": "string", + "examples": ["source_railz.components.MyCustomTransformation"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "LegacyToPerPartitionStateMigration": { + "title": "Legacy To Per-partition-state Migration", + "description": "Transforms the input state for per-partitioned streams from the legacy format to the low-code format. The cursor field and partition ID fields are automatically extracted from the stream's DatetimebasedCursor and SubstreamPartitionRouter.\nExample input state: { \"13506132\": { \"last_changed\": \"2022-12-27T08:34:39+00:00\" } Example output state: { \"partition\": {\"id\": \"13506132\"}, \"cursor\": {\"last_changed\": \"2022-12-27T08:34:39+00:00\"} } ", + "type": "object", + "additionalProperties": true, + "properties": { + "type": { + "type": "string", + "enum": ["LegacyToPerPartitionStateMigration"] + } + } + }, + "IncrementingCountCursor": { + "title": "Incrementing Count Cursor", + "description": "Cursor that allows for incremental sync according to a continuously increasing integer.", + "type": "object", + "required": ["type", "cursor_field"], + "properties": { + "type": { + "type": "string", + "enum": ["IncrementingCountCursor"] + }, + "cursor_field": { + "title": "Cursor Field", + "description": "The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["created_at", "{{ config['record_cursor'] }}"] + }, + "start_value": { + "title": "Start Value", + "description": "The value that determines the earliest record that should be synced.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ], + "interpolation_context": ["config"], + "examples": [0, "{{ config['start_value'] }}"] + }, + "start_value_option": { + "title": "Inject Start Value Into Outgoing HTTP Request", + "description": "Optionally configures how the start value will be sent in requests to the source API.", + "$ref": "#/definitions/RequestOption" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "DatetimeBasedCursor": { + "title": "Datetime Based Cursor", + "description": "Cursor to provide incremental capabilities over datetime.", + "type": "object", + "required": ["type", "cursor_field", "datetime_format", "start_datetime"], + "properties": { + "type": { + "type": "string", + "enum": ["DatetimeBasedCursor"] + }, + "clamping": { + "title": "Date Range Clamping", + "description": "This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)", + "type": "object", + "required": ["target"], + "properties": { + "target": { + "title": "Target", + "description": "The period of time that datetime windows will be clamped by", + "type": "string", + "interpolation_context": ["config"], + "examples": ["DAY", "WEEK", "MONTH", "{{ config['target'] }}"] + }, + "target_details": { + "type": "object", + "additionalProperties": true + } + } + }, + "cursor_field": { + "title": "Cursor Field", + "description": "The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["created_at", "{{ config['record_cursor'] }}"] + }, + "cursor_datetime_formats": { + "title": "Cursor Datetime Formats", + "type": "array", + "items": { + "type": "string" + }, + "description": "The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the Outgoing Datetime Format will be used.\nUse placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", + "examples": [ + "%Y-%m-%d", + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M:%S.%fZ", + "%Y-%m-%dT%H:%M:%S.%f%z", + "%Y-%m-%d %H:%M:%S.%f+00:00", + "%s", + "%ms" + ] + }, + "start_datetime": { + "title": "Start Datetime", + "description": "The datetime that determines the earliest record that should be synced.", + "anyOf": [ + { + "$ref": "#/definitions/MinMaxDatetime" + }, + { + "type": "string", + "title": "Interpolated Value" + } + ], + "interpolation_context": ["config"], + "examples": ["2020-01-1T00:00:00Z", "{{ config['start_time'] }}"] + }, + "start_time_option": { + "title": "Inject Start Time Into Outgoing HTTP Request", + "description": "Optionally configures how the start datetime will be sent in requests to the source API.", + "$ref": "#/definitions/RequestOption" + }, + "end_datetime": { + "title": "End Datetime", + "description": "The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.", + "anyOf": [ + { + "$ref": "#/definitions/MinMaxDatetime" + }, + { + "type": "string", + "title": "Interpolated Value" + } + ], + "interpolation_context": ["config"], + "examples": [ + "2021-01-1T00:00:00Z", + "{{ now_utc() }}", + "{{ day_delta(-1) }}" + ] + }, + "end_time_option": { + "title": "Inject End Time Into Outgoing HTTP Request", + "description": "Optionally configures how the end datetime will be sent in requests to the source API.", + "$ref": "#/definitions/RequestOption" + }, + "datetime_format": { + "title": "Outgoing Datetime Format", + "description": "The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", + "type": "string", + "examples": [ + "%Y-%m-%dT%H:%M:%S.%f%z", + "%Y-%m-%d", + "%s", + "%ms", + "%s_as_float" + ] + }, + "cursor_granularity": { + "title": "Cursor Granularity", + "description": "Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should\nbe P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.\n * **PT0.000001S**: 1 microsecond\n * **PT0.001S**: 1 millisecond\n * **PT1S**: 1 second\n * **PT1M**: 1 minute\n * **PT1H**: 1 hour\n * **P1D**: 1 day\n", + "type": "string", + "examples": ["PT1S"] + }, + "is_data_feed": { + "title": "Data Feed API", + "description": "A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.", + "type": "boolean" + }, + "is_client_side_incremental": { + "title": "Client-side Incremental Filtering", + "description": "Set to True if the target API endpoint does not take cursor values to filter records and returns all records anyway. This will cause the connector to filter out records locally, and only emit new records from the last sync, hence incremental. This means that all records would be read from the API, but only new records will be emitted to the destination.", + "type": "boolean" + }, + "is_compare_strictly": { + "title": "Strict Start-End Time Comparison", + "description": "Set to True if the target API does not accept queries where the start time equal the end time. This will cause those requests to be skipped.", + "type": "boolean", + "default": false + }, + "global_substream_cursor": { + "title": "Global Substream Cursor", + "description": "Setting to True causes the connector to store the cursor as one value, instead of per-partition. This setting optimizes performance when the parent stream has thousands of partitions. Notably, the substream state is updated only at the end of the sync, which helps prevent data loss in case of a sync failure. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/incremental-syncs).", + "type": "boolean", + "default": false + }, + "lookback_window": { + "title": "Lookback Window", + "description": "Time interval (ISO8601 duration) before the start_datetime to read data for, e.g. P1M for looking back one month.\n * **PT1H**: 1 hour\n * **P1D**: 1 day\n * **P1W**: 1 week\n * **P1M**: 1 month\n * **P1Y**: 1 year\n", + "type": "string", + "interpolation_context": ["config"], + "examples": ["P1D", "P{{ config['lookback_days'] }}D"] + }, + "partition_field_end": { + "title": "Partition Field End", + "description": "Name of the partition start time field.", + "type": "string", + "examples": ["ending_time"] + }, + "partition_field_start": { + "title": "Partition Field Start", + "description": "Name of the partition end time field.", + "type": "string", + "examples": ["starting_time"] + }, + "step": { + "title": "Step", + "description": "The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.\n * **PT1H**: 1 hour\n * **P1D**: 1 day\n * **P1W**: 1 week\n * **P1M**: 1 month\n * **P1Y**: 1 year\n", + "type": "string", + "examples": ["P1W", "{{ config['step_increment'] }}"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "JwtAuthenticator": { + "title": "JWT Authenticator", + "description": "Authenticator for requests using JWT authentication flow.", + "type": "object", + "required": ["type", "secret_key", "algorithm"], + "properties": { + "type": { + "type": "string", + "enum": ["JwtAuthenticator"] + }, + "secret_key": { + "title": "Secret Key", + "type": "string", + "description": "Secret used to sign the JSON web token.", + "interpolation_context": ["config"], + "examples": ["{{ config['secret_key'] }}"] + }, + "base64_encode_secret_key": { + "title": "Base64-encode Secret Key", + "type": "boolean", + "description": "When set to true, the secret key will be base64 encoded prior to being encoded as part of the JWT. Only set to \"true\" when required by the API.", + "default": false + }, + "algorithm": { + "title": "Algorithm", + "type": "string", + "description": "Algorithm used to sign the JSON web token.", + "enum": [ + "HS256", + "HS384", + "HS512", + "ES256", + "ES256K", + "ES384", + "ES512", + "RS256", + "RS384", + "RS512", + "PS256", + "PS384", + "PS512", + "EdDSA" + ], + "examples": ["ES256", "HS256", "RS256", "{{ config['algorithm'] }}"] + }, + "token_duration": { + "type": "integer", + "title": "Token Duration", + "description": "The amount of time in seconds a JWT token can be valid after being issued.", + "default": 1200, + "examples": [1200, 3600] + }, + "header_prefix": { + "type": "string", + "title": "Header Prefix", + "description": "The prefix to be used within the Authentication header.", + "examples": ["Bearer", "Basic"] + }, + "jwt_headers": { + "type": "object", + "title": "JWT Headers", + "description": "JWT headers used when signing JSON web token.", + "additionalProperties": false, + "properties": { + "kid": { + "type": "string", + "title": "Key Identifier", + "description": "Private key ID for user account.", + "examples": ["{{ config['kid'] }}"] + }, + "typ": { + "type": "string", + "title": "Type", + "description": "The media type of the complete JWT.", + "default": "JWT", + "examples": ["JWT"] + }, + "cty": { + "type": "string", + "title": "Content Type", + "description": "Content type of JWT header.", + "examples": ["JWT"] + } + } + }, + "additional_jwt_headers": { + "type": "object", + "title": "Additional JWT Headers", + "description": "Additional headers to be included with the JWT headers object.", + "additionalProperties": true + }, + "jwt_payload": { + "type": "object", + "title": "JWT Payload", + "description": "JWT Payload used when signing JSON web token.", + "additionalProperties": false, + "properties": { + "iss": { + "type": "string", + "title": "Issuer", + "description": "The user/principal that issued the JWT. Commonly a value unique to the user.", + "examples": ["{{ config['iss'] }}"] + }, + "sub": { + "type": "string", + "title": "Subject", + "description": "The subject of the JWT. Commonly defined by the API." + }, + "aud": { + "type": "string", + "title": "Audience", + "description": "The recipient that the JWT is intended for. Commonly defined by the API.", + "examples": ["appstoreconnect-v1"] + } + } + }, + "additional_jwt_payload": { + "type": "object", + "title": "Additional JWT Payload Properties", + "description": "Additional properties to be added to the JWT payload.", + "additionalProperties": true + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "OAuthAuthenticator": { + "title": "OAuth2", + "description": "Authenticator for requests using OAuth 2.0 authorization flow.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["OAuthAuthenticator"] + }, + "client_id_name": { + "title": "Client ID Property Name", + "description": "The name of the property to use to refresh the `access_token`.", + "type": "string", + "default": "client_id", + "examples": ["custom_app_id"] + }, + "client_id": { + "title": "Client ID", + "description": "The OAuth client ID. Fill it in the user inputs.", + "type": "string", + "interpolation_context": ["config"], + "examples": [ + "{{ config['client_id'] }}", + "{{ config['credentials']['client_id }}" + ] + }, + "client_secret_name": { + "title": "Client Secret Property Name", + "description": "The name of the property to use to refresh the `access_token`.", + "type": "string", + "default": "client_secret", + "examples": ["custom_app_secret"] + }, + "client_secret": { + "title": "Client Secret", + "description": "The OAuth client secret. Fill it in the user inputs.", + "type": "string", + "interpolation_context": ["config"], + "examples": [ + "{{ config['client_secret'] }}", + "{{ config['credentials']['client_secret }}" + ] + }, + "refresh_token_name": { + "title": "Refresh Token Property Name", + "description": "The name of the property to use to refresh the `access_token`.", + "type": "string", + "default": "refresh_token", + "examples": ["custom_app_refresh_value"] + }, + "refresh_token": { + "title": "Refresh Token", + "description": "Credential artifact used to get a new access token.", + "type": "string", + "interpolation_context": ["config"], + "examples": [ + "{{ config['refresh_token'] }}", + "{{ config['credentials]['refresh_token'] }}" + ] + }, + "token_refresh_endpoint": { + "title": "Token Refresh Endpoint", + "description": "The full URL to call to obtain a new access token.", + "type": "string", + "examples": ["https://connect.squareup.com/oauth2/token"] + }, + "access_token_name": { + "title": "Access Token Property Name", + "description": "The name of the property which contains the access token in the response from the token refresh endpoint.", + "type": "string", + "default": "access_token", + "examples": ["access_token"] + }, + "access_token_value": { + "title": "Access Token Value", + "description": "The value of the access_token to bypass the token refreshing using `refresh_token`.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["secret_access_token_value"] + }, + "expires_in_name": { + "title": "Token Expiry Property Name", + "description": "The name of the property which contains the expiry date in the response from the token refresh endpoint.", + "type": "string", + "default": "expires_in", + "examples": ["expires_in"] + }, + "grant_type_name": { + "title": "Grant Type Property Name", + "description": "The name of the property to use to refresh the `access_token`.", + "type": "string", + "default": "grant_type", + "examples": ["custom_grant_type"] + }, + "grant_type": { + "title": "Grant Type", + "description": "Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.", + "type": "string", + "default": "refresh_token", + "examples": ["refresh_token", "client_credentials"] + }, + "refresh_request_body": { + "title": "Refresh Request Body", + "description": "Body of the request sent to get a new access token.", + "type": "object", + "additionalProperties": true, + "examples": [ + { + "applicationId": "{{ config['application_id'] }}", + "applicationSecret": "{{ config['application_secret'] }}", + "token": "{{ config['token'] }}" + } + ] + }, + "refresh_request_headers": { + "title": "Refresh Request Headers", + "description": "Headers of the request sent to get a new access token.", + "type": "object", + "additionalProperties": true, + "examples": [ + { + "Authorization": "", + "Content-Type": "application/x-www-form-urlencoded" + } + ] + }, + "scopes": { + "title": "Scopes", + "description": "List of scopes that should be granted to the access token.", + "type": "array", + "items": { + "type": "string" + }, + "examples": [ + [ + "crm.list.read", + "crm.objects.contacts.read", + "crm.schema.contacts.read" + ] + ] + }, + "token_expiry_date": { + "title": "Token Expiry Date", + "description": "The access token expiry date.", + "type": "string", + "examples": ["2023-04-06T07:12:10.421833+00:00", 1680842386] + }, + "token_expiry_date_format": { + "title": "Token Expiry Date Format", + "description": "The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.", + "type": "string", + "examples": ["%Y-%m-%d %H:%M:%S.%f+00:00"] + }, + "refresh_token_updater": { + "title": "Refresh Token Updater", + "description": "When the refresh token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.", + "properties": { + "refresh_token_name": { + "title": "Refresh Token Property Name", + "description": "The name of the property which contains the updated refresh token in the response from the token refresh endpoint.", + "type": "string", + "default": "refresh_token", + "examples": ["refresh_token"] + }, + "access_token_config_path": { + "title": "Config Path To Access Token", + "description": "Config path to the access token. Make sure the field actually exists in the config.", + "type": "array", + "items": { + "type": "string" + }, + "default": ["credentials", "access_token"], + "examples": [["credentials", "access_token"], ["access_token"]] + }, + "refresh_token_config_path": { + "title": "Config Path To Refresh Token", + "description": "Config path to the access token. Make sure the field actually exists in the config.", + "type": "array", + "items": { + "type": "string" + }, + "default": ["credentials", "refresh_token"], + "examples": [["credentials", "refresh_token"], ["refresh_token"]] + }, + "token_expiry_date_config_path": { + "title": "Config Path To Expiry Date", + "description": "Config path to the expiry date. Make sure actually exists in the config.", + "type": "array", + "items": { + "type": "string" + }, + "default": ["credentials", "token_expiry_date"], + "examples": [["credentials", "token_expiry_date"]] + }, + "refresh_token_error_status_codes": { + "title": "Refresh Token Error Status Codes", + "description": "Status Codes to Identify refresh token error in response (Refresh Token Error Key and Refresh Token Error Values should be also specified). Responses with one of the error status code and containing an error value will be flagged as a config error", + "type": "array", + "items": { + "type": "integer" + }, + "default": [], + "examples": [[400, 500]] + }, + "refresh_token_error_key": { + "title": "Refresh Token Error Key", + "description": "Key to Identify refresh token error in response (Refresh Token Error Status Codes and Refresh Token Error Values should be also specified).", + "type": "string", + "default": "", + "examples": ["error"] + }, + "refresh_token_error_values": { + "title": "Refresh Token Error Values", + "description": "List of values to check for exception during token refresh process. Used to check if the error found in the response matches the key from the Refresh Token Error Key field (e.g. response={\"error\": \"invalid_grant\"}). Only responses with one of the error status code and containing an error value will be flagged as a config error", + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "examples": [["invalid_grant", "invalid_permissions"]] + } + } + }, + "profile_assertion": { + "title": "Profile Assertion", + "description": "The authenticator being used to authenticate the client authenticator.", + "$ref": "#/definitions/JwtAuthenticator" + }, + "use_profile_assertion": { + "title": "Use Profile Assertion", + "description": "Enable using profile assertion as a flow for OAuth authorization.", + "type": "boolean", + "default": false + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "DeclarativeStream": { + "title": "Declarative Stream", + "description": "A stream whose behavior is described by a set of declarative low code components.", + "type": "object", + "additionalProperties": true, + "required": ["type", "retriever"], + "properties": { + "type": { + "type": "string", + "enum": ["DeclarativeStream"] + }, + "name": { + "title": "Name", + "description": "The stream name.", + "type": "string", + "default": "", + "example": ["Users"] + }, + "retriever": { + "title": "Retriever", + "description": "Component used to coordinate how records are extracted across stream slices and request pages.", + "anyOf": [ + { + "$ref": "#/definitions/SimpleRetriever" + }, + { + "$ref": "#/definitions/AsyncRetriever" + }, + { + "$ref": "#/definitions/CustomRetriever" + } + ] + }, + "incremental_sync": { + "title": "Incremental Sync", + "description": "Component used to fetch data incrementally based on a time field in the data.", + "anyOf": [ + { + "$ref": "#/definitions/DatetimeBasedCursor" + }, + { + "$ref": "#/definitions/IncrementingCountCursor" + }, + { + "$ref": "#/definitions/CustomIncrementalSync" + } + ] + }, + "primary_key": { + "title": "Primary Key", + "$ref": "#/definitions/PrimaryKey", + "default": "" + }, + "schema_loader": { + "title": "Schema Loader", + "description": "One or many schema loaders can be used to retrieve the schema for the current stream. When multiple schema loaders are defined, schema properties will be merged together. Schema loaders defined first taking precedence in the event of a conflict.", + "anyOf": [ + { + "$ref": "#/definitions/InlineSchemaLoader" + }, + { + "$ref": "#/definitions/DynamicSchemaLoader" + }, + { + "$ref": "#/definitions/JsonFileSchemaLoader" + }, + { + "title": "Multiple Schema Loaders", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/InlineSchemaLoader" + }, + { + "$ref": "#/definitions/DynamicSchemaLoader" + }, + { + "$ref": "#/definitions/JsonFileSchemaLoader" + }, + { + "$ref": "#/definitions/CustomSchemaLoader" + } + ] + } + }, + { + "$ref": "#/definitions/CustomSchemaLoader" + } + ] + }, + "transformations": { + "title": "Transformations", + "description": "A list of transformations to be applied to each output record.", + "linkable": true, + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/AddFields" + }, + { + "$ref": "#/definitions/RemoveFields" + }, + { + "$ref": "#/definitions/KeysToLower" + }, + { + "$ref": "#/definitions/KeysToSnakeCase" + }, + { + "$ref": "#/definitions/FlattenFields" + }, + { + "$ref": "#/definitions/DpathFlattenFields" + }, + { + "$ref": "#/definitions/KeysReplace" + }, + { + "$ref": "#/definitions/CustomTransformation" + } + ] + } + }, + "state_migrations": { + "title": "State Migrations", + "description": "Array of state migrations to be applied on the input state", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/LegacyToPerPartitionStateMigration" + }, + { + "$ref": "#/definitions/CustomStateMigration" + } + ] + }, + "default": [] + }, + "file_uploader": { + "title": "File Uploader", + "description": "(experimental) Describes how to fetch a file", + "linkable": true, + "type": "object", + "required": ["type", "requester", "download_target_extractor"], + "properties": { + "type": { + "type": "string", + "enum": ["FileUploader"] + }, + "requester": { + "description": "Requester component that describes how to prepare HTTP requests to send to the source API.", + "anyOf": [ + { + "$ref": "#/definitions/HttpRequester" + }, + { + "$ref": "#/definitions/CustomRequester" + } + ] + }, + "download_target_extractor": { + "description": "Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response", + "anyOf": [ + { + "$ref": "#/definitions/DpathExtractor" + }, + { + "$ref": "#/definitions/CustomRecordExtractor" + } + ] + }, + "file_extractor": { + "description": "Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content", + "anyOf": [ + { + "$ref": "#/definitions/DpathExtractor" + }, + { + "$ref": "#/definitions/CustomRecordExtractor" + } + ] + }, + "filename_extractor": { + "description": "Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.", + "type": "string", + "interpolation_context": ["config", "record"], + "examples": [ + "{{ record.id }}/{{ record.file_name }}/", + "{{ record.id }}_{{ record.file_name }}/" + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "$parameters": { + "type": "object", + "additional_properties": true + } + } + }, + "HTTPAPIBudget": { + "title": "HTTP API Budget", + "description": "Defines how many requests can be made to the API in a given time frame. `HTTPAPIBudget` extracts the remaining call count and the reset time from HTTP response headers using the header names provided by `ratelimit_remaining_header` and `ratelimit_reset_header`. Only requests using `HttpRequester` are rate-limited; custom components that bypass `HttpRequester` are not covered by this budget.\n", + "type": "object", + "required": ["type", "policies"], + "properties": { + "type": { + "type": "string", + "enum": ["HTTPAPIBudget"] + }, + "policies": { + "title": "Policies", + "description": "List of call rate policies that define how many calls are allowed.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/FixedWindowCallRatePolicy" + }, + { + "$ref": "#/definitions/MovingWindowCallRatePolicy" + }, + { + "$ref": "#/definitions/UnlimitedCallRatePolicy" + } + ] + } + }, + "ratelimit_reset_header": { + "title": "Rate Limit Reset Header", + "description": "The HTTP response header name that indicates when the rate limit resets.", + "type": "string", + "default": "ratelimit-reset" + }, + "ratelimit_remaining_header": { + "title": "Rate Limit Remaining Header", + "description": "The HTTP response header name that indicates the number of remaining allowed calls.", + "type": "string", + "default": "ratelimit-remaining" + }, + "status_codes_for_ratelimit_hit": { + "title": "Status Codes for Rate Limit Hit", + "description": "List of HTTP status codes that indicate a rate limit has been hit.", + "type": "array", + "items": { + "type": "integer" + }, + "default": [429] + } + }, + "additionalProperties": true + }, + "FixedWindowCallRatePolicy": { + "title": "Fixed Window Call Rate Policy", + "description": "A policy that allows a fixed number of calls within a specific time window.", + "type": "object", + "required": ["type", "period", "call_limit", "matchers"], + "properties": { + "type": { + "type": "string", + "enum": ["FixedWindowCallRatePolicy"] + }, + "period": { + "title": "Period", + "description": "The time interval for the rate limit window.", + "type": "string" + }, + "call_limit": { + "title": "Call Limit", + "description": "The maximum number of calls allowed within the period.", + "type": "integer" + }, + "matchers": { + "title": "Matchers", + "description": "List of matchers that define which requests this policy applies to.", + "type": "array", + "items": { + "$ref": "#/definitions/HttpRequestRegexMatcher" + } + } + }, + "additionalProperties": true + }, + "MovingWindowCallRatePolicy": { + "title": "Moving Window Call Rate Policy", + "description": "A policy that allows a fixed number of calls within a moving time window.", + "type": "object", + "required": ["type", "rates", "matchers"], + "properties": { + "type": { + "type": "string", + "enum": ["MovingWindowCallRatePolicy"] + }, + "rates": { + "title": "Rates", + "description": "List of rates that define the call limits for different time intervals.", + "type": "array", + "items": { + "$ref": "#/definitions/Rate" + } + }, + "matchers": { + "title": "Matchers", + "description": "List of matchers that define which requests this policy applies to.", + "type": "array", + "items": { + "$ref": "#/definitions/HttpRequestRegexMatcher" + } + } + }, + "additionalProperties": true + }, + "UnlimitedCallRatePolicy": { + "title": "Unlimited Call Rate Policy", + "description": "A policy that allows unlimited calls for specific requests.", + "type": "object", + "required": ["type", "matchers"], + "properties": { + "type": { + "type": "string", + "enum": ["UnlimitedCallRatePolicy"] + }, + "matchers": { + "title": "Matchers", + "description": "List of matchers that define which requests this policy applies to.", + "type": "array", + "items": { + "$ref": "#/definitions/HttpRequestRegexMatcher" + } + } + }, + "additionalProperties": true + }, + "Rate": { + "title": "Rate", + "description": "Defines a rate limit with a specific number of calls allowed within a time interval.", + "type": "object", + "required": ["limit", "interval"], + "properties": { + "limit": { + "title": "Limit", + "description": "The maximum number of calls allowed within the interval.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + } + ], + "interpolation_context": ["config"] + }, + "interval": { + "title": "Interval", + "description": "The time interval for the rate limit.", + "type": "string", + "examples": ["PT1H", "P1D"] + } + }, + "additionalProperties": true + }, + "HttpRequestRegexMatcher": { + "title": "HTTP Request Matcher", + "description": "Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers. Use `url_base` to specify the scheme and host (without trailing slash) and `url_path_pattern` to apply a regex to the request path.\n", + "type": "object", + "properties": { + "method": { + "title": "Method", + "description": "The HTTP method to match (e.g., GET, POST).", + "type": "string" + }, + "url_base": { + "title": "URL Base", + "description": "The base URL (scheme and host, e.g. \"https://api.example.com\") to match.", + "type": "string" + }, + "url_path_pattern": { + "title": "URL Path Pattern", + "description": "A regular expression pattern to match the URL path.", + "type": "string" + }, + "params": { + "title": "Parameters", + "description": "The query parameters to match.", + "type": "object", + "additionalProperties": true + }, + "headers": { + "title": "Headers", + "description": "The headers to match.", + "type": "object", + "additionalProperties": true + } + }, + "additionalProperties": true + }, + "DefaultErrorHandler": { + "title": "Default Error Handler", + "description": "Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["DefaultErrorHandler"] + }, + "backoff_strategies": { + "title": "Backoff Strategies", + "description": "List of backoff strategies to use to determine how long to wait before retrying a retryable request.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/ConstantBackoffStrategy" + }, + { + "$ref": "#/definitions/ExponentialBackoffStrategy" + }, + { + "$ref": "#/definitions/WaitTimeFromHeader" + }, + { + "$ref": "#/definitions/WaitUntilTimeFromHeader" + }, + { + "$ref": "#/definitions/CustomBackoffStrategy" + } + ] + } + }, + "max_retries": { + "title": "Max Retry Count", + "description": "The maximum number of time to retry a retryable request before giving up and failing.", + "type": "integer", + "default": 5, + "examples": [5, 0, 10] + }, + "response_filters": { + "title": "Response Filters", + "description": "List of response filters to iterate on when deciding how to handle an error. When using an array of multiple filters, the filters will be applied sequentially and the response will be selected if it matches any of the filter's predicate.", + "type": "array", + "items": { + "$ref": "#/definitions/HttpResponseFilter" + } + }, + "$parameters": { + "type": "object", + "additional_properties": true + } + } + }, + "DefaultPaginator": { + "title": "Default Paginator", + "description": "Default pagination implementation to request pages of results with a fixed size until the pagination strategy no longer returns a next_page_token.", + "type": "object", + "required": ["type", "pagination_strategy"], + "properties": { + "type": { + "type": "string", + "enum": ["DefaultPaginator"] + }, + "pagination_strategy": { + "title": "Pagination Strategy", + "description": "Strategy defining how records are paginated.", + "anyOf": [ + { + "$ref": "#/definitions/PageIncrement" + }, + { + "$ref": "#/definitions/OffsetIncrement" + }, + { + "$ref": "#/definitions/CursorPagination" + }, + { + "$ref": "#/definitions/CustomPaginationStrategy" + } + ] + }, + "page_size_option": { + "title": "Inject Page Size Into Outgoing HTTP Request", + "$ref": "#/definitions/RequestOption" + }, + "page_token_option": { + "title": "Inject Page Token Into Outgoing HTTP Request", + "anyOf": [ + { + "$ref": "#/definitions/RequestOption" + }, + { + "$ref": "#/definitions/RequestPath" + } + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "DpathExtractor": { + "title": "Dpath Extractor", + "description": "Record extractor that searches a decoded response over a path defined as an array of fields.", + "type": "object", + "required": ["type", "field_path"], + "properties": { + "type": { + "type": "string", + "enum": ["DpathExtractor"] + }, + "field_path": { + "title": "Field Path", + "description": "List of potentially nested fields describing the full path of the field to extract. Use \"*\" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).", + "type": "array", + "items": { + "type": "string" + }, + "interpolation_context": ["config"], + "examples": [ + ["data"], + ["data", "records"], + ["data", "{{ parameters.name }}"], + ["data", "*", "record"] + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ResponseToFileExtractor": { + "title": "CSV To File Extractor", + "description": "A record extractor designed for handling large responses that may exceed memory limits (to prevent OOM issues). It downloads a CSV file to disk, reads the data from disk, and deletes the file once it has been fully processed.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["ResponseToFileExtractor"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ExponentialBackoffStrategy": { + "title": "Exponential Backoff", + "description": "Backoff strategy with an exponential backoff interval. The interval is defined as factor * 2^attempt_count.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["ExponentialBackoffStrategy"] + }, + "factor": { + "title": "Factor", + "description": "Multiplicative constant applied on each retry.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "default": 5, + "interpolation_context": ["config"], + "examples": [5, 5.5, "10"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "GroupByKeyMergeStrategy": { + "title": "Group by Key", + "description": "Record merge strategy that combines records according to fields on the record.", + "required": ["type", "key"], + "properties": { + "type": { + "type": "string", + "enum": ["GroupByKeyMergeStrategy"] + }, + "key": { + "title": "Key", + "description": "The name of the field on the record whose value will be used to group properties that were retrieved through multiple API requests.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "examples": ["id", ["parent_id", "end_date"]] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "SessionTokenAuthenticator": { + "title": "Session Token Authenticator", + "description": "Authenticator for requests using the session token as an API key that's injected into the request.", + "type": "object", + "required": [ + "type", + "login_requester", + "session_token_path", + "request_authentication" + ], + "properties": { + "type": { + "type": "string", + "enum": ["SessionTokenAuthenticator"] + }, + "login_requester": { + "title": "Login Requester", + "description": "Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.", + "$ref": "#/definitions/HttpRequester", + "examples": [ + { + "type": "HttpRequester", + "url_base": "https://my_api.com", + "path": "/login", + "authenticator": { + "type": "BasicHttpAuthenticator", + "username": "{{ config.username }}", + "password": "{{ config.password }}" + } + } + ] + }, + "session_token_path": { + "title": "Session Token Path", + "description": "The path in the response body returned from the login requester to the session token.", + "examples": [["access_token"], ["result", "token"]], + "type": "array", + "items": { + "type": "string" + } + }, + "expiration_duration": { + "title": "Expiration Duration", + "description": "The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.\n * **PT1H**: 1 hour\n * **P1D**: 1 day\n * **P1W**: 1 week\n * **P1M**: 1 month\n * **P1Y**: 1 year\n", + "type": "string", + "examples": ["PT1H", "P1D"] + }, + "request_authentication": { + "title": "Data Request Authentication", + "description": "Authentication method to use for requests sent to the API, specifying how to inject the session token.", + "anyOf": [ + { + "$ref": "#/definitions/SessionTokenRequestApiKeyAuthenticator" + }, + { + "$ref": "#/definitions/SessionTokenRequestBearerAuthenticator" + } + ] + }, + "decoder": { + "title": "Decoder", + "description": "Component used to decode the response.", + "anyOf": [ + { + "$ref": "#/definitions/JsonDecoder" + }, + { + "$ref": "#/definitions/XmlDecoder" + } + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "SessionTokenRequestApiKeyAuthenticator": { + "type": "object", + "title": "API Key Authenticator", + "description": "Authenticator for requests using the session token as an API key that's injected into the request.", + "required": ["type", "inject_into"], + "properties": { + "type": { + "enum": ["ApiKey"] + }, + "inject_into": { + "title": "Inject API Key Into Outgoing HTTP Request", + "description": "Configure how the API Key will be sent in requests to the source API.", + "$ref": "#/definitions/RequestOption", + "examples": [ + { + "inject_into": "header", + "field_name": "Authorization" + }, + { + "inject_into": "request_parameter", + "field_name": "authKey" + } + ] + } + } + }, + "SessionTokenRequestBearerAuthenticator": { + "title": "Bearer Authenticator", + "description": "Authenticator for requests using the session token as a standard bearer token.", + "required": ["type"], + "properties": { + "type": { + "enum": ["Bearer"] + } + } + }, + "HttpRequester": { + "title": "HTTP Requester", + "description": "Requester submitting HTTP requests and extracting records from the response.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["HttpRequester"] + }, + "url_base": { + "deprecated": true, + "deprecation_message": "Use `url` field instead.", + "title": "API Base URL", + "description": "Deprecated, use the `url` instead. Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authenticator component for this.", + "linkable": true, + "type": "string", + "interpolation_context": [ + "config", + "next_page_token", + "stream_interval", + "stream_partition", + "stream_slice", + "creation_response", + "polling_response", + "download_target" + ], + "examples": [ + "https://connect.squareup.com/v2", + "{{ config['base_url'] or 'https://app.posthog.com'}}/api", + "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups", + "https://example.com/api/v1/resource/{{ next_page_token['id'] }}" + ] + }, + "url": { + "title": "API Endpoint URL", + "description": "The URL of the source API endpoint. Do not put sensitive information (e.g. API tokens) into this field - Use the Authenticator component for this.", + "type": "string", + "interpolation_context": [ + "config", + "next_page_token", + "stream_interval", + "stream_partition", + "stream_slice", + "creation_response", + "polling_response", + "download_target" + ], + "examples": [ + "https://connect.squareup.com/v2", + "{{ config['url'] or 'https://app.posthog.com'}}/api", + "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups", + "https://example.com/api/v1/resource/{{ next_page_token['id'] }}" + ] + }, + "path": { + "deprecated": true, + "deprecation_message": "Use `url` field instead.", + "title": "URL Path", + "description": "Deprecated, use the `url` instead. Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authenticator component for this.", + "type": "string", + "interpolation_context": [ + "config", + "next_page_token", + "stream_interval", + "stream_partition", + "stream_slice", + "creation_response", + "polling_response", + "download_target" + ], + "examples": [ + "/products", + "/quotes/{{ stream_partition['id'] }}/quote_line_groups", + "/trades/{{ config['symbol_id'] }}/history" + ] + }, + "http_method": { + "title": "HTTP Method", + "description": "The HTTP method used to fetch data from the source (can be GET or POST).", + "type": "string", + "enum": ["GET", "POST"], + "default": "GET", + "examples": ["GET", "POST"] + }, + "authenticator": { + "title": "Authenticator", + "linkable": true, + "description": "Authentication method to use for requests sent to the API.", + "anyOf": [ + { + "$ref": "#/definitions/ApiKeyAuthenticator" + }, + { + "$ref": "#/definitions/BasicHttpAuthenticator" + }, + { + "$ref": "#/definitions/BearerAuthenticator" + }, + { + "$ref": "#/definitions/OAuthAuthenticator" + }, + { + "$ref": "#/definitions/JwtAuthenticator" + }, + { + "$ref": "#/definitions/SessionTokenAuthenticator" + }, + { + "$ref": "#/definitions/SelectiveAuthenticator" + }, + { + "$ref": "#/definitions/CustomAuthenticator" + }, + { + "$ref": "#/definitions/NoAuth" + }, + { + "$ref": "#/definitions/LegacySessionTokenAuthenticator" + } + ] + }, + "fetch_properties_from_endpoint": { + "deprecated": true, + "deprecation_message": "Use `query_properties` field instead.", + "title": "Fetch Properties from Endpoint", + "description": "Allows for retrieving a dynamic set of properties from an API endpoint which can be injected into outbound request using the stream_partition.extra_fields.", + "$ref": "#/definitions/PropertiesFromEndpoint" + }, + "query_properties": { + "title": "Query Properties", + "description": "For APIs that require explicit specification of the properties to query for, this component will take a static or dynamic set of properties (which can be optionally split into chunks) and allow them to be injected into an outbound request by accessing stream_partition.extra_fields.", + "$ref": "#/definitions/QueryProperties" + }, + "request_parameters": { + "title": "Query Parameters", + "description": "Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.", + "linkable": true, + "anyOf": [ + { + "type": "object", + "title": "Key/Value Pairs", + "additionalProperties": { + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/QueryProperties" + } + ] + } + }, + { + "type": "string", + "title": "Interpolated Value" + } + ], + "interpolation_context": [ + "next_page_token", + "stream_interval", + "stream_partition", + "stream_slice" + ], + "examples": [ + { + "unit": "day" + }, + { + "query": "last_event_time BETWEEN TIMESTAMP \"{{ stream_interval.start_time }}\" AND TIMESTAMP \"{{ stream_interval.end_time }}\"" + }, + { + "searchIn": "{{ ','.join(config.get('search_in', [])) }}" + }, + { + "sort_by[asc]": "updated_at" + } + ] + }, + "request_headers": { + "title": "Request Headers", + "description": "Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.", + "linkable": true, + "anyOf": [ + { + "type": "object", + "title": "Key/Value Pairs", + "additionalProperties": { + "type": "string" + } + }, + { + "type": "string", + "title": "Interpolated Value" + } + ], + "interpolation_context": [ + "next_page_token", + "stream_interval", + "stream_partition", + "stream_slice" + ], + "examples": [ + { + "Output-Format": "JSON" + }, + { + "Version": "{{ config['version'] }}" + } + ] + }, + "request_body_data": { + "deprecated": true, + "deprecation_message": "Use `request_body` field instead.", + "title": "Request Body Payload (Non-JSON)", + "description": "Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.", + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + { + "type": "string" + } + ], + "interpolation_context": [ + "next_page_token", + "stream_interval", + "stream_partition", + "stream_slice" + ], + "examples": [ + "[{\"clause\": {\"type\": \"timestamp\", \"operator\": 10, \"parameters\":\n [{\"value\": {{ stream_interval['start_time'] | int * 1000 }} }]\n }, \"orderBy\": 1, \"columnName\": \"Timestamp\"}]/\n" + ] + }, + "request_body_json": { + "deprecated": true, + "deprecation_message": "Use `request_body` field instead.", + "title": "Request Body JSON Payload", + "description": "Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.", + "anyOf": [ + { + "type": "object", + "additionalProperties": true + }, + { + "type": "string" + } + ], + "interpolation_context": [ + "next_page_token", + "stream_interval", + "stream_partition", + "stream_slice" + ], + "examples": [ + { + "sort_order": "ASC", + "sort_field": "CREATED_AT" + }, + { + "key": "{{ config['value'] }}" + }, + { + "sort": { + "field": "updated_at", + "order": "ascending" + } + } + ] + }, + "request_body": { + "title": "Request Body", + "description": "Specifies how to populate the body of the request with a payload. Can contain nested objects.", + "linkable": true, + "anyOf": [ + { + "$ref": "#/definitions/RequestBodyPlainText" + }, + { + "$ref": "#/definitions/RequestBodyUrlEncodedForm" + }, + { + "$ref": "#/definitions/RequestBodyJsonObject" + }, + { + "$ref": "#/definitions/RequestBodyGraphQL" + } + ], + "interpolation_context": [ + "next_page_token", + "stream_interval", + "stream_partition", + "stream_slice" + ] + }, + "error_handler": { + "title": "Error Handler", + "description": "Error handler component that defines how to handle errors.", + "linkable": true, + "anyOf": [ + { + "$ref": "#/definitions/DefaultErrorHandler" + }, + { + "$ref": "#/definitions/CompositeErrorHandler" + }, + { + "$ref": "#/definitions/CustomErrorHandler" + } + ] + }, + "use_cache": { + "title": "Use Cache", + "description": "Enables stream requests caching. This field is automatically set by the CDK.", + "type": "boolean", + "default": false + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "HttpResponseFilter": { + "description": "A filter that is used to select on properties of the HTTP response received. When used with additional filters, a response will be selected if it matches any of the filter's criteria.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["HttpResponseFilter"] + }, + "action": { + "title": "Action", + "description": "Action to execute if a response matches the filter.", + "type": "string", + "enum": ["SUCCESS", "FAIL", "RETRY", "IGNORE", "RATE_LIMITED"], + "examples": ["SUCCESS", "FAIL", "RETRY", "IGNORE", "RATE_LIMITED"] + }, + "failure_type": { + "title": "Failure Type", + "description": "Failure type of traced exception if a response matches the filter.", + "type": "string", + "enum": ["system_error", "config_error", "transient_error"], + "examples": ["system_error", "config_error", "transient_error"] + }, + "error_message": { + "title": "Error Message", + "description": "Error Message to display if the response matches the filter.", + "type": "string", + "interpolation_context": ["config", "response", "headers"] + }, + "error_message_contains": { + "title": "Error Message Substring", + "description": "Match the response if its error message contains the substring.", + "type": "string", + "example": ["This API operation is not enabled for this site"] + }, + "http_codes": { + "title": "HTTP Codes", + "description": "Match the response if its HTTP code is included in this list.", + "type": "array", + "items": { + "type": "integer" + }, + "uniqueItems": true, + "examples": [[420, 429], [500]] + }, + "predicate": { + "title": "Predicate", + "description": "Match the response if the predicate evaluates to true.", + "type": "string", + "interpolation_context": ["response", "headers"], + "examples": [ + "{{ 'Too much requests' in response }}", + "{{ 'error_code' in response and response['error_code'] == 'ComplexityException' }}" + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ComplexFieldType": { + "title": "Schema Field Type", + "description": "(This component is experimental. Use at your own risk.) Represents a complex field type.", + "type": "object", + "required": ["field_type"], + "properties": { + "field_type": { + "type": "string" + }, + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComplexFieldType" + } + ] + } + } + }, + "TypesMap": { + "title": "Types Map", + "description": "(This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.", + "type": "object", + "required": ["target_type", "current_type"], + "properties": { + "target_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "$ref": "#/definitions/ComplexFieldType" + } + ] + }, + "current_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "condition": { + "type": "string", + "interpolation_context": ["raw_schema"] + } + } + }, + "SchemaTypeIdentifier": { + "title": "Schema Type Identifier", + "description": "(This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.", + "type": "object", + "required": ["key_pointer"], + "properties": { + "type": { + "type": "string", + "enum": ["SchemaTypeIdentifier"] + }, + "schema_pointer": { + "title": "Schema Path", + "description": "List of nested fields defining the schema field path to extract. Defaults to [].", + "type": "array", + "default": [], + "items": { + "type": "string" + }, + "interpolation_context": ["config"] + }, + "key_pointer": { + "title": "Key Path", + "description": "List of potentially nested fields describing the full path of the field key to extract.", + "type": "array", + "items": { + "type": "string" + }, + "interpolation_context": ["config"] + }, + "type_pointer": { + "title": "Type Path", + "description": "List of potentially nested fields describing the full path of the field type to extract.", + "type": "array", + "items": { + "type": "string" + }, + "interpolation_context": ["config"] + }, + "types_mapping": { + "type": "array", + "items": { + "$ref": "#/definitions/TypesMap" + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "DynamicSchemaLoader": { + "title": "Dynamic Schema Loader", + "description": "(This component is experimental. Use at your own risk.) Loads a schema by extracting data from retrieved records.", + "type": "object", + "required": ["type", "retriever", "schema_type_identifier"], + "properties": { + "type": { + "type": "string", + "enum": ["DynamicSchemaLoader"] + }, + "retriever": { + "title": "Retriever", + "description": "Component used to coordinate how records are extracted across stream slices and request pages.", + "anyOf": [ + { + "$ref": "#/definitions/SimpleRetriever" + }, + { + "$ref": "#/definitions/AsyncRetriever" + }, + { + "$ref": "#/definitions/CustomRetriever" + } + ] + }, + "schema_filter": { + "title": "Schema Filter", + "description": "Responsible for filtering fields to be added to json schema.", + "anyOf": [ + { + "$ref": "#/definitions/RecordFilter" + }, + { + "$ref": "#/definitions/CustomRecordFilter" + } + ] + }, + "schema_transformations": { + "title": "Schema Transformations", + "description": "A list of transformations to be applied to the schema.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/AddFields" + }, + { + "$ref": "#/definitions/RemoveFields" + }, + { + "$ref": "#/definitions/KeysToLower" + }, + { + "$ref": "#/definitions/KeysToSnakeCase" + }, + { + "$ref": "#/definitions/FlattenFields" + }, + { + "$ref": "#/definitions/DpathFlattenFields" + }, + { + "$ref": "#/definitions/KeysReplace" + }, + { + "$ref": "#/definitions/CustomTransformation" + } + ] + } + }, + "schema_type_identifier": { + "$ref": "#/definitions/SchemaTypeIdentifier" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "InlineSchemaLoader": { + "title": "Inline Schema Loader", + "description": "Loads a schema that is defined directly in the manifest file.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["InlineSchemaLoader"] + }, + "schema": { + "title": "Schema", + "description": "Describes a streams' schema. Refer to the Data Types documentation for more details on which types are valid.", + "type": "object", + "additionalProperties": true + } + } + }, + "JsonFileSchemaLoader": { + "deprecated": true, + "deprecation_message": "Use `InlineSchemaLoader` instead.", + "title": "Json File Schema Loader", + "description": "Loads the schema from a json file.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["JsonFileSchemaLoader"] + }, + "file_path": { + "title": "File Path", + "description": "Path to the JSON file defining the schema. The path is relative to the connector module's root.", + "type": "string", + "interpolation_context": ["config"], + "example": ["./schemas/users.json"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "JsonDecoder": { + "title": "JSON", + "description": "Select 'JSON' if the response is formatted as a JSON object.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["JsonDecoder"] + } + } + }, + "JsonlDecoder": { + "title": "JSON Lines", + "description": "Select 'JSON Lines' if the response consists of JSON objects separated by new lines ('\\n') in JSONL format.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["JsonlDecoder"] + } + } + }, + "KeysToLower": { + "title": "Keys to Lower Case", + "description": "A transformation that renames all keys to lower case.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["KeysToLower"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "KeysToSnakeCase": { + "title": "Key to Snake Case", + "description": "A transformation that renames all keys to snake case.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["KeysToSnakeCase"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "FlattenFields": { + "title": "Flatten Fields", + "description": "A transformation that flatten record to single level format.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["FlattenFields"] + }, + "flatten_lists": { + "title": "Flatten Lists", + "description": "Whether to flatten lists or leave it as is. Default is True.", + "type": "boolean", + "default": true + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "KeyTransformation": { + "title": "Transformation to apply for extracted object keys by Dpath Flatten Fields", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["KeyTransformation"] + }, + "prefix": { + "title": "Key Prefix", + "description": "Prefix to add for object keys. If not provided original keys remain unchanged.", + "type": "string", + "examples": ["flattened_"] + }, + "suffix": { + "title": "Key Suffix", + "description": "Suffix to add for object keys. If not provided original keys remain unchanged.", + "type": "string", + "examples": ["_flattened"] + } + } + }, + "DpathFlattenFields": { + "title": "Dpath Flatten Fields", + "description": "A transformation that flatten field values to the to top of the record.", + "type": "object", + "required": ["type", "field_path"], + "properties": { + "type": { + "type": "string", + "enum": ["DpathFlattenFields"] + }, + "field_path": { + "title": "Field Path", + "description": "A path to field that needs to be flattened.", + "type": "array", + "items": { + "type": "string" + }, + "examples": [["data"], ["data", "*", "field"]] + }, + "delete_origin_value": { + "title": "Delete Origin Value", + "description": "Whether to delete the origin value or keep it. Default is False.", + "type": "boolean" + }, + "replace_record": { + "title": "Replace Origin Record", + "description": "Whether to replace the origin record or not. Default is False.", + "type": "boolean" + }, + "key_transformation": { + "title": "Key transformation", + "description": "Transformation for object keys. If not provided, original key will be used.", + "type": "object", + "$ref": "#/definitions/KeyTransformation" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "KeysReplace": { + "title": "Keys Replace", + "description": "A transformation that replaces symbols in keys.", + "type": "object", + "required": ["type", "old", "new"], + "properties": { + "type": { + "type": "string", + "enum": ["KeysReplace"] + }, + "old": { + "type": "string", + "title": "Old value", + "description": "Old value to replace.", + "examples": [ + " ", + "{{ record.id }}", + "{{ config['id'] }}", + "{{ stream_slice['id'] }}" + ], + "interpolation_context": ["config", "record", "stream_slice"] + }, + "new": { + "type": "string", + "title": "New value", + "description": "New value to set.", + "examples": [ + "_", + "{{ record.id }}", + "{{ config['id'] }}", + "{{ stream_slice['id'] }}" + ], + "interpolation_context": ["config", "record", "stream_slice"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "IterableDecoder": { + "title": "Iterable", + "description": "Select 'Iterable' if the response consists of strings separated by new lines (`\\n`). The string will then be wrapped into a JSON object with the `record` key.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["IterableDecoder"] + } + } + }, + "XmlDecoder": { + "title": "XML", + "description": "Select 'XML' if the response consists of XML-formatted data.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["XmlDecoder"] + } + } + }, + "CustomDecoder": { + "title": "Custom Decoder", + "description": "Use this to implement custom decoder logic.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomDecoder"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Decoder. The format is `source_..`.", + "type": "string", + "additionalProperties": true, + "examples": ["source_amazon_ads.components.GzipJsonlDecoder"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ZipfileDecoder": { + "title": "ZIP File", + "description": "Select 'ZIP file' for response data that is returned as a zipfile. Requires specifying an inner data type/decoder to parse the unzipped data.", + "type": "object", + "additionalProperties": true, + "required": ["type", "decoder"], + "properties": { + "type": { + "type": "string", + "enum": ["ZipfileDecoder"] + }, + "decoder": { + "title": "Parser", + "description": "Parser to parse the decompressed data from the zipfile(s).", + "anyOf": [ + { + "$ref": "#/definitions/CsvDecoder" + }, + { + "$ref": "#/definitions/GzipDecoder" + }, + { + "$ref": "#/definitions/JsonDecoder" + }, + { + "$ref": "#/definitions/JsonlDecoder" + } + ] + } + } + }, + "ListPartitionRouter": { + "title": "List Partition Router", + "description": "A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.", + "type": "object", + "required": ["type", "cursor_field", "values"], + "properties": { + "type": { + "type": "string", + "enum": ["ListPartitionRouter"] + }, + "cursor_field": { + "title": "Current Partition Value Identifier", + "description": "While iterating over list values, the name of field used to reference a list value. The partition value can be accessed with string interpolation. e.g. \"{{ stream_partition['my_key'] }}\" where \"my_key\" is the value of the cursor_field.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["section", "{{ config['section_key'] }}"] + }, + "values": { + "title": "Partition Values", + "description": "The list of attributes being iterated over and used as input for the requests made to the source API.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "interpolation_context": ["config"], + "examples": [ + ["section_a", "section_b", "section_c"], + "{{ config['sections'] }}" + ] + }, + "request_option": { + "title": "Inject Partition Value Into Outgoing HTTP Request", + "description": "A request option describing where the list value should be injected into and under what field name if applicable.", + "$ref": "#/definitions/RequestOption" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "MinMaxDatetime": { + "title": "Min-Max Datetime", + "description": "Compares the provided date against optional minimum or maximum times. The max_datetime serves as the ceiling and will be returned when datetime exceeds it. The min_datetime serves as the floor.", + "type": "object", + "required": ["type", "datetime"], + "properties": { + "type": { + "type": "string", + "enum": ["MinMaxDatetime"] + }, + "datetime": { + "title": "Datetime", + "description": "Datetime value.", + "type": "string", + "interpolation_context": ["config"], + "examples": [ + "2021-01-01", + "2021-01-01T00:00:00+00:00", + "{{ config['start_time'] }}", + "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%SZ') }}" + ] + }, + "datetime_format": { + "title": "Datetime Format", + "description": "Format of the datetime value. Defaults to \"%Y-%m-%dT%H:%M:%S.%f%z\" if left empty. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", + "type": "string", + "default": "", + "examples": ["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"] + }, + "max_datetime": { + "title": "Max Datetime", + "description": "Ceiling applied on the datetime value. Must be formatted with the datetime_format field.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["2021-01-01T00:00:00Z", "2021-01-01"] + }, + "min_datetime": { + "title": "Min Datetime", + "description": "Floor applied on the datetime value. Must be formatted with the datetime_format field.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["2010-01-01T00:00:00Z", "2010-01-01"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "NoAuth": { + "title": "No Authentication", + "description": "Authenticator for requests requiring no authentication.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["NoAuth"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "NoPagination": { + "title": "No Pagination", + "description": "Pagination implementation that never returns a next page.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["NoPagination"] + } + } + }, + "OAuthConfigSpecification": { + "title": "OAuth Config Specification", + "description": "Specification describing how an 'advanced' Auth flow would need to function.", + "type": "object", + "additionalProperties": true, + "properties": { + "oauth_user_input_from_connector_config_specification": { + "title": "OAuth user input", + "description": "OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", + "type": "object", + "examples": [ + { + "app_id": { + "type": "string", + "path_in_connector_config": ["app_id"] + } + }, + { + "app_id": { + "type": "string", + "path_in_connector_config": ["info", "app_id"] + } + } + ] + }, + "oauth_connector_input_specification": { + "title": "DeclarativeOAuth Connector Specification", + "description": "The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n \"oauth_connector_input_specification\": {\n \"type\": \"object\",\n \"additionalProperties\": false,\n \"properties\": {\n \"consent_url\": \"https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}\",\n \"access_token_url\": \"https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/\",\n \"access_token_params\": {\n \"{{ auth_code_key }}\": \"{{ auth_code_value }}\",\n \"{{ client_id_key }}\": \"{{ client_id_value }}\",\n \"{{ client_secret_key }}\": \"{{ client_secret_value }}\"\n },\n \"access_token_headers\": {\n \"Content-Type\": \"application/json\",\n \"Accept\": \"application/json\"\n },\n \"extract_output\": [\"data.access_token\"],\n \"client_id_key\": \"app_id\",\n \"client_secret_key\": \"secret\",\n \"auth_code_key\": \"auth_code\"\n }\n }\n }", + "type": "object", + "additionalProperties": true, + "required": ["consent_url", "access_token_url"], + "properties": { + "consent_url": { + "title": "Consent URL", + "type": "string", + "description": "The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.", + "examples": [ + "https://domain.host.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}", + "https://endpoint.host.com/oauth2/authorize?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{scope_key}}={{{{scope_value}} | urlEncoder}}&{{state_key}}={{state_value}}&subdomain={{subdomain}}" + ] + }, + "scope": { + "title": "Scopes", + "type": "string", + "description": "The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.", + "examples": ["user:read user:read_orders workspaces:read"] + }, + "access_token_url": { + "title": "Access Token URL", + "type": "string", + "description": "The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.", + "examples": [ + "https://auth.host.com/oauth2/token?{{client_id_key}}={{client_id_value}}&{{client_secret_key}}={{client_secret_value}}&{{auth_code_key}}={{auth_code_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}" + ] + }, + "access_token_headers": { + "title": "Access Token Headers", + "type": "object", + "additionalProperties": true, + "description": "The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.", + "examples": [ + { + "Authorization": "Basic {{ {{ client_id_value }}:{{ client_secret_value }} | base64Encoder }}" + } + ] + }, + "access_token_params": { + "title": "Access Token Query Params (Json Encoded)", + "type": "object", + "additionalProperties": true, + "description": "The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.", + "examples": [ + { + "{{ auth_code_key }}": "{{ auth_code_value }}", + "{{ client_id_key }}": "{{ client_id_value }}", + "{{ client_secret_key }}": "{{ client_secret_value }}" + } + ] + }, + "extract_output": { + "title": "Extract Output", + "type": "array", + "items": { + "type": "string" + }, + "description": "The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.", + "examples": [["access_token", "refresh_token", "other_field"]] + }, + "state": { + "title": "Configurable State Query Param", + "type": "object", + "additionalProperties": true, + "required": ["min", "max"], + "description": "The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.", + "properties": { + "min": { + "type": "integer" + }, + "max": { + "type": "integer" + } + }, + "examples": [ + { + "min": 7, + "max": 128 + } + ] + }, + "client_id_key": { + "title": "Client ID Key Override", + "type": "string", + "description": "The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.", + "examples": ["my_custom_client_id_key_name"] + }, + "client_secret_key": { + "title": "Client Secret Key Override", + "type": "string", + "description": "The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.", + "examples": ["my_custom_client_secret_key_name"] + }, + "scope_key": { + "title": "Scopes Key Override", + "type": "string", + "description": "The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.", + "examples": ["my_custom_scope_key_key_name"] + }, + "state_key": { + "title": "State Key Override", + "type": "string", + "description": "The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.", + "examples": ["my_custom_state_key_key_name"] + }, + "auth_code_key": { + "title": "Auth Code Key Override", + "type": "string", + "description": "The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.", + "examples": ["my_custom_auth_code_key_name"] + }, + "redirect_uri_key": { + "title": "Redirect URI Key Override", + "type": "string", + "description": "The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.", + "examples": ["my_custom_redirect_uri_key_name"] + } + } + }, + "complete_oauth_output_specification": { + "title": "OAuth output specification", + "description": "OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }", + "type": "object", + "additionalProperties": true, + "examples": [ + { + "refresh_token": { + "type": "string,", + "path_in_connector_config": ["credentials", "refresh_token"] + } + } + ] + }, + "complete_oauth_server_input_specification": { + "title": "OAuth input specification", + "description": "OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", + "type": "object", + "additionalProperties": true, + "examples": [ + { + "client_id": { + "type": "string" + }, + "client_secret": { + "type": "string" + } + } + ] + }, + "complete_oauth_server_output_specification": { + "title": "OAuth server output specification", + "description": "OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", + "type": "object", + "additionalProperties": true, + "examples": [ + { + "client_id": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_id"] + }, + "client_secret": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_secret"] + } + } + ] + } + } + }, + "OffsetIncrement": { + "title": "Offset Increment", + "description": "Pagination strategy that returns the number of records reads so far and returns it as the next page token.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["OffsetIncrement"] + }, + "page_size": { + "title": "Limit", + "description": "The number of records to include in each pages.", + "anyOf": [ + { + "type": "integer", + "title": "Number of Records" + }, + { + "type": "string", + "title": "Interpolated Value" + } + ], + "interpolation_context": ["config", "response"], + "examples": [100, "{{ config['page_size'] }}"] + }, + "inject_on_first_request": { + "title": "Inject Offset on First Request", + "description": "Using the `offset` with value `0` during the first request", + "type": "boolean", + "default": false + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "PageIncrement": { + "title": "Page Increment", + "description": "Pagination strategy that returns the number of pages reads so far and returns it as the next page token.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["PageIncrement"] + }, + "page_size": { + "title": "Page Size", + "description": "The number of records to include in each pages.", + "interpolation_context": ["config"], + "anyOf": [ + { + "type": "integer", + "title": "Number of Records" + }, + { + "type": "string", + "title": "Interpolated Value" + } + ], + "examples": [100, "100", "{{ config['page_size'] }}"] + }, + "start_from_page": { + "title": "Start From Page", + "description": "Index of the first page to request.", + "type": "integer", + "default": 0, + "examples": [0, 1] + }, + "inject_on_first_request": { + "title": "Inject Page Number on First Request", + "description": "Using the `page number` with value defined by `start_from_page` during the first request", + "type": "boolean", + "default": false + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ParentStreamConfig": { + "title": "Parent Stream Config", + "description": "Describes how to construct partitions from the records retrieved from the parent stream..", + "type": "object", + "required": ["type", "parent_key", "partition_field", "stream"], + "properties": { + "type": { + "type": "string", + "enum": ["ParentStreamConfig"] + }, + "stream": { + "title": "Parent Stream", + "description": "Reference to the parent stream.", + "anyOf": [ + { + "$ref": "#/definitions/DeclarativeStream" + }, + { + "$ref": "#/definitions/StateDelegatingStream" + } + ] + }, + "parent_key": { + "title": "Parent Key", + "description": "The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.", + "type": "string", + "examples": ["id", "{{ config['parent_record_id'] }}"] + }, + "partition_field": { + "title": "Current Parent Key Value Identifier", + "description": "While iterating over parent records during a sync, the parent_key value can be referenced by using this field.", + "type": "string", + "examples": ["parent_id", "{{ config['parent_partition_field'] }}"] + }, + "request_option": { + "title": "Request Option", + "description": "A request option describing where the parent key value should be injected into and under what field name if applicable.", + "$ref": "#/definitions/RequestOption" + }, + "incremental_dependency": { + "title": "Incremental Dependency", + "description": "Indicates whether the parent stream should be read incrementally based on updates in the child stream.", + "type": "boolean", + "default": false + }, + "lazy_read_pointer": { + "title": "Lazy Read Pointer", + "description": "If set, this will enable lazy reading, using the initial read of parent records to extract child records.", + "type": "array", + "default": [], + "items": { + "type": "string" + }, + "interpolation_context": ["config"] + }, + "extra_fields": { + "title": "Extra Fields", + "description": "Array of field paths to include as additional fields in the stream slice. Each path is an array of strings representing keys to access fields in the respective parent record. Accessible via `stream_slice.extra_fields`. Missing fields are set to `None`.", + "interpolation_context": ["config"], + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Defines a field path as an array of strings.", + "examples": [["field1"], ["nested", "field2"]] + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "PrimaryKey": { + "title": "Primary Key", + "description": "The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.", + "anyOf": [ + { + "type": "string", + "title": "Single Key", + "description": "The single top-level field to use as the primary key." + }, + { + "type": "array", + "title": "Composite Key", + "description": "An array of top-level fields representing a composite primary key.", + "items": { + "type": "string" + } + }, + { + "type": "array", + "title": "Composite Key of Nested Fields", + "description": "An array of arrays representing a composite primary key where the fields are nested fields.", + "items": { + "type": "array", + "title": "Nested Field Path", + "description": "Path to the nested field in the record.", + "items": { + "type": "string" + } + } + } + ], + "default": "", + "examples": ["id", ["code", "type"]] + }, + "PropertiesFromEndpoint": { + "title": "Properties from Endpoint", + "description": "Defines the behavior for fetching the list of properties from an API that will be loaded into the requests to extract records.", + "type": "object", + "required": ["type", "property_field_path", "retriever"], + "properties": { + "type": { + "type": "string", + "enum": ["PropertiesFromEndpoint"] + }, + "property_field_path": { + "description": "Describes the path to the field that should be extracted", + "type": "array", + "items": { + "type": "string" + }, + "examples": [["name"]], + "interpolation_context": ["config", "parameters"] + }, + "retriever": { + "description": "Requester component that describes how to fetch the properties to query from a remote API endpoint.", + "anyOf": [ + { + "$ref": "#/definitions/SimpleRetriever" + }, + { + "$ref": "#/definitions/CustomRetriever" + } + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "PropertyChunking": { + "title": "Property Chunking", + "description": "For APIs with restrictions on the amount of properties that can be requester per request, property chunking can be applied to make multiple requests with a subset of the properties.", + "type": "object", + "required": ["type", "property_limit_type"], + "properties": { + "type": { + "type": "string", + "enum": ["PropertyChunking"] + }, + "property_limit_type": { + "title": "Property Limit Type", + "description": "The type used to determine the maximum number of properties per chunk", + "enum": ["characters", "property_count"] + }, + "property_limit": { + "title": "Property Limit", + "description": "The maximum amount of properties that can be retrieved per request according to the limit type.", + "type": "integer" + }, + "record_merge_strategy": { + "title": "Record Merge Strategy", + "description": "Dictates how to records that require multiple requests to get all properties should be emitted to the destination", + "$ref": "#/definitions/GroupByKeyMergeStrategy" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "QueryProperties": { + "title": "Query Properties", + "description": "For APIs that require explicit specification of the properties to query for, this component specifies which property fields and how they are supplied to outbound requests.", + "type": "object", + "required": ["type", "property_list"], + "properties": { + "type": { + "type": "string", + "enum": ["QueryProperties"] + }, + "property_list": { + "title": "Property List", + "description": "The set of properties that will be queried for in the outbound request. This can either be statically defined or dynamic based on an API endpoint", + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "$ref": "#/definitions/PropertiesFromEndpoint" + } + ] + }, + "always_include_properties": { + "title": "Always Include Properties", + "description": "The list of properties that should be included in every set of properties when multiple chunks of properties are being requested.", + "type": "array", + "items": { + "type": "string" + } + }, + "property_chunking": { + "title": "Property Chunking", + "description": "Defines how query properties will be grouped into smaller sets for APIs with limitations on the number of properties fetched per API request.", + "$ref": "#/definitions/PropertyChunking" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "RecordFilter": { + "title": "Record Filter", + "description": "Filter applied on a list of records.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["RecordFilter"] + }, + "condition": { + "title": "Condition", + "description": "The predicate to filter a record. Records will be removed if evaluated to False.", + "type": "string", + "default": "", + "interpolation_context": [ + "config", + "next_page_token", + "record", + "stream_interval", + "stream_partition", + "stream_slice" + ], + "examples": [ + "{{ record['created_at'] >= stream_interval['start_time'] }}", + "{{ record.status in ['active', 'expired'] }}" + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "RecordSelector": { + "title": "Record Selector", + "description": "Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering records based on a heuristic.", + "type": "object", + "required": ["type", "extractor"], + "properties": { + "type": { + "type": "string", + "enum": ["RecordSelector"] + }, + "extractor": { + "anyOf": [ + { + "$ref": "#/definitions/DpathExtractor" + }, + { + "$ref": "#/definitions/CustomRecordExtractor" + } + ] + }, + "record_filter": { + "title": "Record Filter", + "description": "Responsible for filtering records to be emitted by the Source.", + "anyOf": [ + { + "$ref": "#/definitions/RecordFilter" + }, + { + "$ref": "#/definitions/CustomRecordFilter" + } + ] + }, + "schema_normalization": { + "title": "Schema Normalization", + "description": "Responsible for normalization according to the schema.", + "anyOf": [ + { + "$ref": "#/definitions/SchemaNormalization" + }, + { + "$ref": "#/definitions/CustomSchemaNormalization" + } + ] + }, + "transform_before_filtering": { + "title": "Transform Before Filtering", + "description": "If true, transformation will be applied before record filtering.", + "type": "boolean" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "SchemaNormalization": { + "title": "Schema Normalization", + "description": "Responsible for normalization according to the schema.", + "type": "string", + "enum": ["Default", "None"], + "examples": ["Default", "None"] + }, + "RemoveFields": { + "title": "Remove Fields", + "description": "A transformation which removes fields from a record. The fields removed are designated using FieldPointers. During transformation, if a field or any of its parents does not exist in the record, no error is thrown.", + "type": "object", + "required": ["type", "field_pointers"], + "properties": { + "type": { + "type": "string", + "enum": ["RemoveFields"] + }, + "condition": { + "description": "The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.,", + "type": "string", + "default": "", + "interpolation_context": ["config", "property", "parameters"], + "examples": [ + "{{ property|string == '' }}", + "{{ property is integer }}", + "{{ property|length > 5 }}", + "{{ property == 'some_string_to_match' }}" + ] + }, + "field_pointers": { + "title": "Field Paths", + "description": "Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + }, + "examples": [ + ["tags"], + [ + ["content", "html"], + ["content", "plain_text"] + ] + ] + } + } + }, + "RequestPath": { + "title": "Request Path", + "description": "Specifies where in the request path a component's value should be inserted.", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["RequestPath"] + } + } + }, + "RequestOption": { + "title": "Request Option", + "description": "Specifies the key field or path and where in the request a component's value should be injected.", + "type": "object", + "required": ["type", "inject_into"], + "properties": { + "type": { + "type": "string", + "enum": ["RequestOption"] + }, + "inject_into": { + "title": "Inject Into", + "description": "Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.", + "enum": ["request_parameter", "header", "body_data", "body_json"], + "examples": ["request_parameter", "header", "body_data", "body_json"] + }, + "field_name": { + "title": "Field Name", + "description": "Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.", + "type": "string", + "examples": ["segment_id"], + "interpolation_context": ["config", "parameters"] + }, + "field_path": { + "title": "Field Path", + "description": "Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)", + "type": "array", + "items": { + "type": "string" + }, + "examples": [["data", "viewer", "id"]], + "interpolation_context": ["config", "parameters"] + } + } + }, + "Schemas": { + "title": "Schemas", + "description": "The stream schemas representing the shape of the data emitted by the stream.", + "type": "object", + "additionalProperties": true + }, + "LegacySessionTokenAuthenticator": { + "title": "Session Token Authenticator", + "deprecated": true, + "description": "Deprecated - use SessionTokenAuthenticator instead. Authenticator for requests authenticated using session tokens. A session token is a random value generated by a server to identify a specific user for the duration of one interaction session.", + "type": "object", + "required": [ + "type", + "header", + "login_url", + "session_token_response_key", + "validate_session_url" + ], + "properties": { + "type": { + "type": "string", + "enum": ["LegacySessionTokenAuthenticator"] + }, + "header": { + "title": "Session Request Header", + "description": "The name of the session token header that will be injected in the request", + "type": "string", + "examples": ["X-Session"] + }, + "login_url": { + "title": "Login Path", + "description": "Path of the login URL (do not include the base URL)", + "type": "string", + "examples": ["session"] + }, + "session_token": { + "title": "Session Token", + "description": "Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair", + "type": "string", + "example": ["{{ config['session_token'] }}"] + }, + "session_token_response_key": { + "title": "Response Token Response Key", + "description": "Name of the key of the session token to be extracted from the response", + "type": "string", + "examples": ["id"] + }, + "username": { + "title": "Username", + "description": "Username used to authenticate and obtain a session token", + "type": "string", + "examples": [" {{ config['username'] }}"] + }, + "password": { + "title": "Password", + "description": "Password used to authenticate and obtain a session token", + "type": "string", + "default": "", + "examples": ["{{ config['password'] }}", ""] + }, + "validate_session_url": { + "title": "Validate Session Path", + "description": "Path of the URL to use to validate that the session token is valid (do not include the base URL)", + "type": "string", + "examples": ["user/current"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "StateDelegatingStream": { + "title": "State Delegating Stream", + "description": "(This component is experimental. Use at your own risk.) Orchestrate the retriever's usage based on the state value.", + "type": "object", + "required": ["type", "name", "full_refresh_stream", "incremental_stream"], + "properties": { + "type": { + "type": "string", + "enum": ["StateDelegatingStream"] + }, + "name": { + "title": "Name", + "description": "The stream name.", + "type": "string", + "default": "", + "example": ["Users"] + }, + "full_refresh_stream": { + "title": "Full Refresh Stream", + "description": "Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.", + "$ref": "#/definitions/DeclarativeStream" + }, + "incremental_stream": { + "title": "Incremental Stream", + "description": "Component used to coordinate how records are extracted across stream slices and request pages when the state provided.", + "$ref": "#/definitions/DeclarativeStream" + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "SimpleRetriever": { + "title": "Synchronous Retriever", + "description": "Retrieves records by synchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router.", + "type": "object", + "required": ["type", "record_selector", "requester"], + "properties": { + "type": { + "type": "string", + "enum": ["SimpleRetriever"] + }, + "requester": { + "description": "Requester component that describes how to prepare HTTP requests to send to the source API.", + "anyOf": [ + { + "$ref": "#/definitions/HttpRequester" + }, + { + "$ref": "#/definitions/CustomRequester" + } + ] + }, + "decoder": { + "title": "HTTP Response Format", + "description": "Component decoding the response so records can be extracted.", + "anyOf": [ + { + "$ref": "#/definitions/JsonDecoder" + }, + { + "$ref": "#/definitions/XmlDecoder" + }, + { + "$ref": "#/definitions/CsvDecoder" + }, + { + "$ref": "#/definitions/JsonlDecoder" + }, + { + "$ref": "#/definitions/GzipDecoder" + }, + { + "$ref": "#/definitions/IterableDecoder" + }, + { + "$ref": "#/definitions/ZipfileDecoder" + }, + { + "$ref": "#/definitions/CustomDecoder" + } + ] + }, + "record_selector": { + "description": "Component that describes how to extract records from a HTTP response.", + "$ref": "#/definitions/RecordSelector" + }, + "paginator": { + "description": "Paginator component that describes how to navigate through the API's pages.", + "linkable": true, + "anyOf": [ + { + "$ref": "#/definitions/DefaultPaginator" + }, + { + "$ref": "#/definitions/NoPagination" + } + ] + }, + "ignore_stream_slicer_parameters_on_paginated_requests": { + "description": "If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.", + "type": "boolean", + "default": false + }, + "partition_router": { + "title": "Partition Router", + "description": "Used to iteratively execute requests over a set of values, such as a parent stream's records or a list of constant values.", + "anyOf": [ + { + "$ref": "#/definitions/SubstreamPartitionRouter" + }, + { + "$ref": "#/definitions/ListPartitionRouter" + }, + { + "$ref": "#/definitions/GroupingPartitionRouter" + }, + { + "$ref": "#/definitions/CustomPartitionRouter" + }, + { + "type": "array", + "title": "Multiple Partition Routers", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/SubstreamPartitionRouter" + }, + { + "$ref": "#/definitions/ListPartitionRouter" + }, + { + "$ref": "#/definitions/GroupingPartitionRouter" + }, + { + "$ref": "#/definitions/CustomPartitionRouter" + } + ] + } + } + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "GzipDecoder": { + "title": "gzip", + "description": "Select 'gzip' for response data that is compressed with gzip. Requires specifying an inner data type/decoder to parse the decompressed data.", + "type": "object", + "required": ["type", "decoder"], + "properties": { + "type": { + "type": "string", + "enum": ["GzipDecoder"] + }, + "decoder": { + "anyOf": [ + { + "$ref": "#/definitions/CsvDecoder" + }, + { + "$ref": "#/definitions/GzipDecoder" + }, + { + "$ref": "#/definitions/JsonDecoder" + }, + { + "$ref": "#/definitions/JsonlDecoder" + } + ] + } + } + }, + "CsvDecoder": { + "title": "CSV", + "description": "Select 'CSV' for response data that is formatted as CSV (comma-separated values). Can specify an encoding (default: 'utf-8') and a delimiter (default: ',').", + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["CsvDecoder"] + }, + "encoding": { + "type": "string", + "default": "utf-8" + }, + "delimiter": { + "type": "string", + "default": "," + }, + "set_values_to_none": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "AsyncJobStatusMap": { + "description": "Matches the api job status to Async Job Status.", + "type": "object", + "required": ["running", "completed", "failed", "timeout"], + "properties": { + "type": { + "type": "string", + "enum": ["AsyncJobStatusMap"] + }, + "running": { + "type": "array", + "items": { + "type": "string" + } + }, + "completed": { + "type": "array", + "items": { + "type": "string" + } + }, + "failed": { + "type": "array", + "items": { + "type": "string" + } + }, + "timeout": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "AsyncRetriever": { + "title": "Asynchronous Retriever", + "description": "Retrieves records by Asynchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router.", + "type": "object", + "required": [ + "type", + "record_selector", + "status_mapping", + "creation_requester", + "polling_requester", + "download_requester", + "status_extractor", + "download_target_extractor" + ], + "properties": { + "type": { + "type": "string", + "enum": ["AsyncRetriever"] + }, + "record_selector": { + "description": "Component that describes how to extract records from a HTTP response.", + "$ref": "#/definitions/RecordSelector" + }, + "status_mapping": { + "description": "Async Job Status to Airbyte CDK Async Job Status mapping.", + "anyOf": [ + { + "$ref": "#/definitions/AsyncJobStatusMap" + } + ] + }, + "status_extractor": { + "description": "Responsible for fetching the actual status of the async job.", + "anyOf": [ + { + "$ref": "#/definitions/DpathExtractor" + }, + { + "$ref": "#/definitions/CustomRecordExtractor" + } + ] + }, + "download_target_extractor": { + "description": "Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.", + "anyOf": [ + { + "$ref": "#/definitions/DpathExtractor" + }, + { + "$ref": "#/definitions/CustomRecordExtractor" + } + ] + }, + "download_extractor": { + "description": "Responsible for fetching the records from provided urls.", + "anyOf": [ + { + "$ref": "#/definitions/DpathExtractor" + }, + { + "$ref": "#/definitions/CustomRecordExtractor" + }, + { + "$ref": "#/definitions/ResponseToFileExtractor" + } + ] + }, + "creation_requester": { + "description": "Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.", + "anyOf": [ + { + "$ref": "#/definitions/HttpRequester" + }, + { + "$ref": "#/definitions/CustomRequester" + } + ] + }, + "polling_requester": { + "description": "Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.", + "anyOf": [ + { + "$ref": "#/definitions/HttpRequester" + }, + { + "$ref": "#/definitions/CustomRequester" + } + ] + }, + "polling_job_timeout": { + "description": "The time in minutes after which the single Async Job should be considered as Timed Out.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + } + ], + "interpolation_context": ["config"] + }, + "download_target_requester": { + "description": "Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.", + "anyOf": [ + { + "$ref": "#/definitions/HttpRequester" + }, + { + "$ref": "#/definitions/CustomRequester" + } + ] + }, + "download_requester": { + "description": "Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.", + "anyOf": [ + { + "$ref": "#/definitions/HttpRequester" + }, + { + "$ref": "#/definitions/CustomRequester" + } + ] + }, + "download_paginator": { + "description": "Paginator component that describes how to navigate through the API's pages during download.", + "anyOf": [ + { + "$ref": "#/definitions/DefaultPaginator" + }, + { + "$ref": "#/definitions/NoPagination" + } + ] + }, + "abort_requester": { + "description": "Requester component that describes how to prepare HTTP requests to send to the source API to abort a job once it is timed out from the source's perspective.", + "anyOf": [ + { + "$ref": "#/definitions/HttpRequester" + }, + { + "$ref": "#/definitions/CustomRequester" + } + ] + }, + "delete_requester": { + "description": "Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.", + "anyOf": [ + { + "$ref": "#/definitions/HttpRequester" + }, + { + "$ref": "#/definitions/CustomRequester" + } + ] + }, + "partition_router": { + "title": "Partition Router", + "description": "PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", + "default": [], + "anyOf": [ + { + "$ref": "#/definitions/ListPartitionRouter" + }, + { + "$ref": "#/definitions/SubstreamPartitionRouter" + }, + { + "$ref": "#/definitions/GroupingPartitionRouter" + }, + { + "$ref": "#/definitions/CustomPartitionRouter" + }, + { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/ListPartitionRouter" + }, + { + "$ref": "#/definitions/SubstreamPartitionRouter" + }, + { + "$ref": "#/definitions/GroupingPartitionRouter" + }, + { + "$ref": "#/definitions/CustomPartitionRouter" + } + ] + } + } + ] + }, + "decoder": { + "title": "HTTP Response Format", + "description": "Component decoding the response so records can be extracted.", + "anyOf": [ + { + "$ref": "#/definitions/CsvDecoder" + }, + { + "$ref": "#/definitions/GzipDecoder" + }, + { + "$ref": "#/definitions/JsonDecoder" + }, + { + "$ref": "#/definitions/JsonlDecoder" + }, + { + "$ref": "#/definitions/IterableDecoder" + }, + { + "$ref": "#/definitions/XmlDecoder" + }, + { + "$ref": "#/definitions/ZipfileDecoder" + }, + { + "$ref": "#/definitions/CustomDecoder" + } + ] + }, + "download_decoder": { + "title": "Download HTTP Response Format", + "description": "Component decoding the download response so records can be extracted.", + "anyOf": [ + { + "$ref": "#/definitions/CsvDecoder" + }, + { + "$ref": "#/definitions/GzipDecoder" + }, + { + "$ref": "#/definitions/JsonDecoder" + }, + { + "$ref": "#/definitions/JsonlDecoder" + }, + { + "$ref": "#/definitions/IterableDecoder" + }, + { + "$ref": "#/definitions/XmlDecoder" + }, + { + "$ref": "#/definitions/ZipfileDecoder" + }, + { + "$ref": "#/definitions/CustomDecoder" + } + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "Spec": { + "title": "Spec", + "description": "A source specification made up of connector metadata and how it can be configured.", + "type": "object", + "required": ["type", "connection_specification"], + "properties": { + "type": { + "type": "string", + "enum": ["Spec"] + }, + "connection_specification": { + "title": "Connection Specification", + "description": "A connection specification describing how a the connector can be configured.", + "type": "object", + "additionalProperties": true + }, + "documentation_url": { + "title": "Documentation URL", + "description": "URL of the connector's documentation page.", + "type": "string", + "examples": ["https://docs.airbyte.com/integrations/sources/dremio"] + }, + "advanced_auth": { + "title": "Advanced Auth", + "description": "Advanced specification for configuring the authentication flow.", + "$ref": "#/definitions/AuthFlow" + }, + "config_normalization_rules": { + "title": "Config Normalization Rules", + "type": "object", + "additionalProperties": false, + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["ConfigNormalizationRules"] + }, + "config_migrations": { + "title": "Config Migrations", + "description": "The discrete migrations that will be applied on the incoming config. Each migration will be applied in the order they are defined.", + "type": "array", + "items": { + "$ref": "#/definitions/ConfigMigration" + }, + "default": [] + }, + "transformations": { + "title": "Transformations", + "description": "The list of transformations that will be applied on the incoming config at the start of each sync. The transformations will be applied in the order they are defined.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/ConfigRemapField" + }, + { + "$ref": "#/definitions/ConfigAddFields" + }, + { + "$ref": "#/definitions/ConfigRemoveFields" + }, + { + "$ref": "#/definitions/CustomConfigTransformation" + } + ] + }, + "default": [] + }, + "validations": { + "title": "Validations", + "description": "The list of validations that will be performed on the incoming config at the start of each sync.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/DpathValidator" + }, + { + "$ref": "#/definitions/PredicateValidator" + } + ] + }, + "default": [] + } + } + } + } + }, + "ConfigMigration": { + "title": "Config Migration", + "description": "A config migration that will be applied on the incoming config at the start of a sync.", + "type": "object", + "required": ["type", "transformations"], + "properties": { + "type": { + "type": "string", + "enum": ["ConfigMigration"] + }, + "description": { + "type": "string", + "description": "The description/purpose of the config migration." + }, + "transformations": { + "title": "Transformations", + "description": "The list of transformations that will attempt to be applied on an incoming unmigrated config. The transformations will be applied in the order they are defined.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/ConfigRemapField" + }, + { + "$ref": "#/definitions/ConfigAddFields" + }, + { + "$ref": "#/definitions/ConfigRemoveFields" + }, + { + "$ref": "#/definitions/CustomConfigTransformation" + } + ] + }, + "default": [] + } + } + }, + "SubstreamPartitionRouter": { + "title": "Substream Partition Router", + "description": "Partition router that is used to retrieve records that have been partitioned according to records from the specified parent streams. An example of a parent stream is automobile brands and the substream would be the various car models associated with each branch.", + "type": "object", + "required": ["type", "parent_stream_configs"], + "properties": { + "type": { + "type": "string", + "enum": ["SubstreamPartitionRouter"] + }, + "parent_stream_configs": { + "title": "Parent Stream Configs", + "description": "Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.", + "type": "array", + "items": { + "$ref": "#/definitions/ParentStreamConfig" + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ValueType": { + "title": "Value Type", + "description": "A schema type.", + "type": "string", + "enum": ["string", "number", "integer", "boolean"] + }, + "WaitTimeFromHeader": { + "title": "Wait Time Extracted From Response Header", + "description": "Extract wait time from a HTTP header in the response.", + "type": "object", + "required": ["type", "header"], + "properties": { + "type": { + "type": "string", + "enum": ["WaitTimeFromHeader"] + }, + "header": { + "title": "Response Header Name", + "description": "The name of the response header defining how long to wait before retrying.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["Retry-After"] + }, + "regex": { + "title": "Extraction Regex", + "description": "Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + "type": "string", + "examples": ["([-+]?\\d+)"] + }, + "max_waiting_time_in_seconds": { + "title": "Max Waiting Time in Seconds", + "description": "Given the value extracted from the header is greater than this value, stop the stream.", + "type": "number", + "examples": [3600] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "GroupingPartitionRouter": { + "title": "Grouping Partition Router", + "description": "A decorator on top of a partition router that groups partitions into batches of a specified size. This is useful for APIs that support filtering by multiple partition keys in a single request. Note that per-partition incremental syncs may not work as expected because the grouping of partitions might change between syncs, potentially leading to inconsistent state tracking.\n", + "type": "object", + "required": ["type", "group_size", "underlying_partition_router"], + "properties": { + "type": { + "type": "string", + "enum": ["GroupingPartitionRouter"] + }, + "group_size": { + "title": "Group Size", + "description": "The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.", + "type": "integer", + "examples": [10, 50] + }, + "underlying_partition_router": { + "title": "Underlying Partition Router", + "description": "The partition router whose output will be grouped. This can be any valid partition router component.", + "anyOf": [ + { + "$ref": "#/definitions/ListPartitionRouter" + }, + { + "$ref": "#/definitions/SubstreamPartitionRouter" + }, + { + "$ref": "#/definitions/CustomPartitionRouter" + } + ] + }, + "deduplicate": { + "title": "Deduplicate Partitions", + "description": "If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.", + "type": "boolean", + "default": true + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "WaitUntilTimeFromHeader": { + "title": "Wait Until Time Defined In Response Header", + "description": "Extract time at which we can retry the request from response header and wait for the difference between now and that time.", + "type": "object", + "required": ["type", "header"], + "properties": { + "type": { + "type": "string", + "enum": ["WaitUntilTimeFromHeader"] + }, + "header": { + "title": "Response Header", + "description": "The name of the response header defining how long to wait before retrying.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["wait_time"] + }, + "min_wait": { + "title": "Minimum Wait Time", + "description": "Minimum time to wait before retrying.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "interpolation_context": ["config"], + "examples": [10, "60"] + }, + "regex": { + "title": "Extraction Regex", + "description": "Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + "type": "string", + "interpolation_context": ["config"], + "examples": ["([-+]?\\d+)"] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ComponentMappingDefinition": { + "title": "Component Mapping Definition", + "description": "(This component is experimental. Use at your own risk.) Specifies a mapping definition to update or add fields in a record or configuration. This allows dynamic mapping of data by interpolating values into the template based on provided contexts.", + "type": "object", + "required": ["type", "field_path", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["ComponentMappingDefinition"] + }, + "field_path": { + "title": "Field Path", + "description": "A list of potentially nested fields indicating the full path where value will be added or updated.", + "type": "array", + "items": { + "type": "string" + }, + "interpolation_context": [ + "config", + "components_values", + "stream_slice", + "stream_template_config" + ], + "examples": [ + ["data"], + ["data", "records"], + ["data", 1, "name"], + ["data", "{{ components_values.name }}"], + ["data", "*", "record"], + ["*", "**", "name"] + ] + }, + "value": { + "title": "Value", + "description": "The dynamic or static value to assign to the key. Interpolated values can be used to dynamically determine the value during runtime.", + "type": "string", + "interpolation_context": [ + "config", + "stream_template_config", + "components_values", + "stream_slice" + ], + "examples": [ + "{{ components_values['updates'] }}", + "{{ components_values['MetaData']['LastUpdatedTime'] }}", + "{{ config['segment_id'] }}", + "{{ stream_slice['parent_id'] }}", + "{{ stream_slice['extra_fields']['name'] }}" + ] + }, + "value_type": { + "title": "Value Type", + "description": "The expected data type of the value. If omitted, the type will be inferred from the value provided.", + "$ref": "#/definitions/ValueType" + }, + "create_or_update": { + "title": "Create or Update", + "description": "Determines whether to create a new path if it doesn't exist (true) or only update existing paths (false). When set to true, the resolver will create new paths in the stream template if they don't exist. When false (default), it will only update existing paths.", + "type": "boolean", + "default": false + }, + "condition": { + "title": "Condition", + "description": "A condition that must be met for the mapping to be applied. This property is only supported for `ConfigComponentsResolver`.", + "type": "string", + "interpolation_context": [ + "config", + "stream_template_config", + "components_values", + "stream_slice" + ], + "examples": [ + "{{ components_values.get('cursor_field', None) }}", + "{{ '_incremental' in components_values.get('stream_name', '') }}" + ] + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "HttpComponentsResolver": { + "type": "object", + "title": "Http Components Resolver", + "description": "(This component is experimental. Use at your own risk.) Component resolve and populates stream templates with components fetched via an HTTP retriever.", + "properties": { + "type": { + "type": "string", + "enum": ["HttpComponentsResolver"] + }, + "retriever": { + "title": "Retriever", + "description": "Component used to coordinate how records are extracted across stream slices and request pages.", + "anyOf": [ + { + "$ref": "#/definitions/SimpleRetriever" + }, + { + "$ref": "#/definitions/AsyncRetriever" + }, + { + "$ref": "#/definitions/CustomRetriever" + } + ] + }, + "components_mapping": { + "type": "array", + "items": { + "$ref": "#/definitions/ComponentMappingDefinition" + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + }, + "required": ["type", "retriever", "components_mapping"] + }, + "StreamConfig": { + "title": "Stream Config", + "description": "(This component is experimental. Use at your own risk.) Describes how to get streams config from the source config.", + "type": "object", + "required": ["type", "configs_pointer"], + "properties": { + "type": { + "type": "string", + "enum": ["StreamConfig"] + }, + "configs_pointer": { + "title": "Configs Pointer", + "description": "A list of potentially nested fields indicating the full path in source config file where streams configs located.", + "type": "array", + "items": { + "type": "string" + }, + "interpolation_context": ["parameters"], + "examples": [ + ["data"], + ["data", "streams"], + ["data", "{{ parameters.name }}"] + ] + }, + "default_values": { + "title": "Default Values", + "description": "A list of default values, each matching the structure expected from the parsed component value.", + "type": "array", + "items": { + "type": "object" + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + } + }, + "ConfigComponentsResolver": { + "type": "object", + "title": "Config Components Resolver", + "description": "(This component is experimental. Use at your own risk.) Resolves and populates stream templates with components fetched from the source config.", + "properties": { + "type": { + "type": "string", + "enum": ["ConfigComponentsResolver"] + }, + "stream_config": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/definitions/StreamConfig" + } + }, + { + "$ref": "#/definitions/StreamConfig" + } + ] + }, + "components_mapping": { + "type": "array", + "items": { + "$ref": "#/definitions/ComponentMappingDefinition" + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + }, + "required": ["type", "stream_config", "components_mapping"] + }, + "StreamParametersDefinition": { + "title": "Stream Parameters Definition", + "description": "(This component is experimental. Use at your own risk.) Represents a stream parameters definition to set up dynamic streams from defined values in manifest.", + "type": "object", + "required": ["type", "list_of_parameters_for_stream"], + "properties": { + "type": { + "type": "string", + "enum": ["StreamParametersDefinition"] + }, + "list_of_parameters_for_stream": { + "title": "Stream Parameters", + "description": "A list of object of parameters for stream, each object in the list represents params for one stream.", + "type": "array", + "items": { + "type": "object" + }, + "examples": [ + [ + { + "name": "test stream", + "$parameters": { + "entity": "test entity" + }, + "primary_key": "test key" + } + ] + ] + } + } + }, + "ParametrizedComponentsResolver": { + "type": "object", + "title": "Parametrized Components Resolver", + "description": "(This component is experimental. Use at your own risk.) Resolves and populates dynamic streams from defined parametrized values in manifest.", + "properties": { + "type": { + "type": "string", + "enum": ["ParametrizedComponentsResolver"] + }, + "stream_parameters": { + "$ref": "#/definitions/StreamParametersDefinition" + }, + "components_mapping": { + "type": "array", + "items": { + "$ref": "#/definitions/ComponentMappingDefinition" + } + }, + "$parameters": { + "type": "object", + "additionalProperties": true + } + }, + "required": ["type", "stream_parameters", "components_mapping"] + }, + "DynamicDeclarativeStream": { + "type": "object", + "description": "(This component is experimental. Use at your own risk.) A component that described how will be created declarative streams based on stream template.", + "properties": { + "type": { + "type": "string", + "enum": ["DynamicDeclarativeStream"] + }, + "name": { + "title": "Name", + "description": "The dynamic stream name.", + "type": "string", + "default": "", + "example": ["Tables"] + }, + "stream_template": { + "title": "Stream Template", + "description": "Reference to the stream template.", + "anyOf": [ + { + "$ref": "#/definitions/DeclarativeStream" + }, + { + "$ref": "#/definitions/StateDelegatingStream" + } + ] + }, + "components_resolver": { + "title": "Components Resolver", + "description": "Component resolve and populates stream templates with components values.", + "anyOf": [ + { + "$ref": "#/definitions/HttpComponentsResolver" + }, + { + "$ref": "#/definitions/ConfigComponentsResolver" + }, + { + "$ref": "#/definitions/ParametrizedComponentsResolver" + } + ] + }, + "use_parent_parameters": { + "title": "Use Parent Parameters", + "description": "Whether or not to prioritize parent parameters over component parameters when constructing dynamic streams. Defaults to true for backward compatibility.", + "type": "boolean", + "default": true + } + }, + "required": ["type", "stream_template", "components_resolver"] + }, + "RequestBodyPlainText": { + "title": "Plain-text Body", + "description": "Request body value is sent as plain text", + "type": "object", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["RequestBodyPlainText"] + }, + "value": { + "type": "string" + } + } + }, + "RequestBodyUrlEncodedForm": { + "title": "URL-encoded Body", + "description": "Request body value is converted into a url-encoded form", + "type": "object", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["RequestBodyUrlEncodedForm"] + }, + "value": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, + "RequestBodyJsonObject": { + "title": "Json Object Body", + "description": "Request body value converted into a JSON object", + "type": "object", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["RequestBodyJsonObject"] + }, + "value": { + "type": "object", + "additionalProperties": true + } + } + }, + "RequestBodyGraphQL": { + "title": "GraphQL Body", + "description": "Request body value converted into a GraphQL query object", + "type": "object", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["RequestBodyGraphQL"] + }, + "value": { + "$ref": "#/definitions/RequestBodyGraphQlQuery" + } + } + }, + "RequestBodyGraphQlQuery": { + "title": "GraphQL Query Body", + "description": "Request body GraphQL query object", + "type": "object", + "required": ["query"], + "properties": { + "query": { + "type": "string", + "description": "The GraphQL query to be executed", + "default": "query {\n \n}" + } + }, + "additionalProperties": true + }, + "DpathValidator": { + "title": "Dpath Validator", + "description": "Validator that extracts the value located at a given field path.", + "type": "object", + "required": ["type", "field_path", "validation_strategy"], + "properties": { + "type": { + "type": "string", + "enum": ["DpathValidator"] + }, + "field_path": { + "title": "Field Path", + "description": "List of potentially nested fields describing the full path of the field to validate. Use \"*\" to validate all values from an array.", + "type": "array", + "items": { + "type": "string" + }, + "interpolation_context": ["config"], + "examples": [ + ["data"], + ["data", "records"], + ["data", "{{ parameters.name }}"], + ["data", "*", "record"] + ] + }, + "validation_strategy": { + "title": "Validation Strategy", + "description": "The condition that the specified config value will be evaluated against", + "anyOf": [ + { + "$ref": "#/definitions/ValidateAdheresToSchema" + }, + { + "$ref": "#/definitions/CustomValidationStrategy" + } + ] + } + } + }, + "PredicateValidator": { + "title": "Predicate Validator", + "description": "Validator that applies a validation strategy to a specified value.", + "type": "object", + "required": ["type", "value", "validation_strategy"], + "properties": { + "type": { + "type": "string", + "enum": ["PredicateValidator"] + }, + "value": { + "title": "Value", + "description": "The value to be validated. Can be a literal value or interpolated from configuration.", + "type": ["string", "number", "object", "array", "boolean", "null"], + "interpolation_context": ["config"], + "examples": [ + "test-value", + "{{ config['api_version'] }}", + "{{ config['tenant_id'] }}", + 123 + ] + }, + "validation_strategy": { + "title": "Validation Strategy", + "description": "The validation strategy to apply to the value.", + "anyOf": [ + { + "$ref": "#/definitions/ValidateAdheresToSchema" + }, + { + "$ref": "#/definitions/CustomValidationStrategy" + } + ] + } + } + }, + "ValidateAdheresToSchema": { + "title": "Validate Adheres To Schema", + "description": "Validates that a user-provided schema adheres to a specified JSON schema.", + "type": "object", + "required": ["type", "base_schema"], + "properties": { + "type": { + "type": "string", + "enum": ["ValidateAdheresToSchema"] + }, + "base_schema": { + "title": "Base JSON Schema", + "description": "The base JSON schema against which the user-provided schema will be validated.", + "type": ["string", "object"], + "interpolation_context": ["config"], + "examples": [ + "{{ config['report_validation_schema'] }}", + "'{\n \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n \"title\": \"Person\",\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\n \"type\": \"string\",\n \"description\": \"The person's name\"\n },\n \"age\": {\n \"type\": \"integer\",\n \"minimum\": 0,\n \"description\": \"The person's age\"\n }\n },\n \"required\": [\"name\", \"age\"]\n}'\n", + { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Person", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The person's name" + }, + "age": { + "type": "integer", + "minimum": 0, + "description": "The person's age" + } + }, + "required": ["name", "age"] + } + ] + } + } + }, + "CustomValidationStrategy": { + "title": "Custom Validation Strategy", + "description": "Custom validation strategy that allows for custom validation logic.", + "type": "object", + "additionalProperties": true, + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomValidationStrategy"] + }, + "class_name": { + "title": "Class Name", + "description": "Fully-qualified name of the class that will be implementing the custom validation strategy. Has to be a sub class of ValidationStrategy. The format is `source_..`.", + "type": "string", + "additionalProperties": true, + "examples": [ + "source_declarative_manifest.components.MyCustomValidationStrategy" + ] + } + } + }, + "ConfigRemapField": { + "title": "Remap Field", + "description": "Transformation that remaps a field's value to another value based on a static map.", + "type": "object", + "required": ["type", "map", "field_path"], + "properties": { + "type": { + "type": "string", + "enum": ["ConfigRemapField"] + }, + "map": { + "title": "Value Mapping", + "description": "A mapping of original values to new values. When a field value matches a key in this map, it will be replaced with the corresponding value.", + "interpolation_context": ["config"], + "type": ["object", "string"], + "additionalProperties": true, + "examples": [ + { + "pending": "in_progress", + "done": "completed", + "cancelled": "terminated" + }, + "{{ config['status_mapping'] }}" + ] + }, + "field_path": { + "title": "Field Path", + "description": "The path to the field whose value should be remapped. Specified as a list of path components to navigate through nested objects.", + "interpolation_context": ["config"], + "type": "array", + "items": { + "type": "string" + }, + "examples": [ + ["status"], + ["data", "status"], + ["data", "{{ config.name }}", "status"], + ["data", "*", "status"] + ] + } + } + }, + "ConfigAddFields": { + "title": "Config Add Fields", + "description": "Transformation that adds fields to a config. The path of the added field can be nested.", + "type": "object", + "required": ["type", "fields"], + "properties": { + "type": { + "type": "string", + "enum": ["ConfigAddFields"] + }, + "fields": { + "title": "Fields", + "description": "A list of transformations (path and corresponding value) that will be added to the config.", + "type": "array", + "items": { + "$ref": "#/definitions/AddedFieldDefinition" + } + }, + "condition": { + "description": "Fields will be added if expression is evaluated to True.", + "type": "string", + "default": "", + "interpolation_context": ["config", "property"], + "examples": [ + "{{ config['environemnt'] == 'sandbox' }}", + "{{ property is integer }}", + "{{ property|length > 5 }}", + "{{ property == 'some_string_to_match' }}" + ] + } + } + }, + "ConfigRemoveFields": { + "title": "Config Remove Fields", + "description": "Transformation that removes a field from the config.", + "type": "object", + "required": ["type", "field_pointers"], + "properties": { + "type": { + "type": "string", + "enum": ["ConfigRemoveFields"] + }, + "field_pointers": { + "title": "Field Pointers", + "description": "A list of field pointers to be removed from the config.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + }, + "examples": [ + ["tags"], + [ + ["content", "html"], + ["content", "plain_text"] + ] + ] + }, + "condition": { + "description": "Fields will be removed if expression is evaluated to True.", + "type": "string", + "default": "", + "interpolation_context": ["config", "property"], + "examples": [ + "{{ config['environemnt'] == 'sandbox' }}", + "{{ property is integer }}", + "{{ property|length > 5 }}", + "{{ property == 'some_string_to_match' }}" + ] + } + } + }, + "CustomConfigTransformation": { + "title": "Custom Config Transformation", + "description": "A custom config transformation that can be used to transform the connector configuration.", + "type": "object", + "required": ["type", "class_name"], + "properties": { + "type": { + "type": "string", + "enum": ["CustomConfigTransformation"] + }, + "class_name": { + "type": "string", + "description": "Fully-qualified name of the class that will be implementing the custom config transformation. The format is `source_..`.", + "examples": [ + "source_declarative_manifest.components.MyCustomConfigTransformation" + ] + }, + "$parameters": { + "type": "object", + "description": "Additional parameters to be passed to the custom config transformation.", + "additionalProperties": true + } + } + } + }, + "interpolation": { + "variables": [ + { + "title": "config", + "description": "The connector configuration. The object's keys are the same as the the keys defined in the connection specification.", + "type": "object", + "examples": [ + { + "start_date": "2010-01-01", + "api_key": "*****" + } + ] + }, + { + "title": "parameters", + "description": "Additional runtime parameters, to be used for string interpolation. Parameters can be passed down from a parent component to its subcomponents using the $parameters key. This can be used to avoid repetitions.", + "type": "object", + "examples": [ + { + "path": "automations", + "data_export_path": "automations", + "cursor_field": "updated_at" + } + ] + }, + { + "title": "headers", + "description": "The HTTP headers from the last response received from the API. The object's keys are the header names from the response.", + "type": "object", + "examples": [ + { + "Server": "nginx", + "Date": "Mon, 24 Apr 2023 20:17:21 GMT", + "Content-Type": "application/json", + "Content-Length": "420", + "Connection": "keep-alive", + "referrer-policy": "strict-origin-when-cross-origin", + "x-content-type-options": "nosniff", + "x-ratelimit-limit": "600", + "x-ratelimit-remaining": "598", + "x-ratelimit-reset": "39" + } + ] + }, + { + "title": "last_record", + "description": "Last record extracted from the response received from the API.", + "type": "object", + "examples": [ + { + "name": "Test List: 19", + "id": "0236d6d2", + "contact_count": 20, + "_metadata": { + "self": "https://api.sendgrid.com/v3/marketing/lists/0236d6d2" + } + } + ] + }, + { + "title": "last_page_size", + "description": "Number of records extracted from the last response received from the API.", + "type": "object", + "examples": [2] + }, + { + "title": "next_page_token", + "description": "Object describing the token to fetch the next page of records. The object has a single key \"next_page_token\".", + "type": "object", + "examples": [ + { + "next_page_token": 3 + }, + { + "next_page_token": "https://api.sendgrid.com/v3/marketing/lists/0236d6d2-75d2-42c5-962d-603e0deaf8d1" + } + ] + }, + { + "title": "record", + "description": "The record being processed. The object's keys are the same keys as the records produced by the RecordSelector.", + "type": "object" + }, + { + "title": "response", + "description": "The body of the last response received from the API. The object's keys are the same keys as the response body's.", + "type": "object", + "examples": [ + { + "result": [ + { + "name": "Test List: 19", + "id": "0236d6d2-75d2-42c5-962d-603e0deaf8d1", + "contact_count": 20, + "_metadata": { + "self": "https://api.sendgrid.com/v3/marketing/lists/0236d6d2" + } + } + ], + "_metadata": { + "self": "https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=", + "next": "https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2", + "count": 82 + } + } + ] + }, + { + "title": "creation_response", + "description": "The response received from the creation_requester in the AsyncRetriever component.", + "type": "object", + "examples": [ + { + "id": "1234" + } + ] + }, + { + "title": "polling_response", + "description": "The response received from the polling_requester in the AsyncRetriever component.", + "type": "object", + "examples": [ + { + "id": "1234" + } + ] + }, + { + "title": "download_target", + "description": "The `URL` received from the polling_requester in the AsyncRetriever with jobStatus as `COMPLETED`.", + "type": "string", + "examples": [ + "https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2&filename=xxx_yyy_zzz.csv" + ] + }, + { + "title": "stream_interval", + "description": "The current stream interval being processed. The keys are defined by the incremental sync component. Default keys are `start_time` and `end_time`.", + "type": "object", + "examples": [ + { + "start_time": "2020-01-01 00:00:00.000+00:00", + "end_time": "2020-01-02 00:00:00.000+00:00" + } + ] + }, + { + "title": "stream_partition", + "description": "The current stream partition being processed. The keys are defined by the partition router component.", + "type": "object", + "examples": [ + { + "survey_id": 1234 + }, + { + "strategy": "DESKTOP" + }, + { + "survey_id": 1234, + "strategy": "MOBILE" + } + ] + }, + { + "title": "stream_slice", + "description": "This variable is deprecated. Use stream_interval or stream_partition instead.", + "type": "object" + } + ], + "macros": [ + { + "title": "now_utc", + "description": "Returns the current date and time in the UTC timezone.", + "arguments": {}, + "return_type": "Datetime", + "examples": [ + "'{{ now_utc() }}' -> '2021-09-01 00:00:00+00:00'", + "'{{ now_utc().strftime('%Y-%m-%d') }}' -> '2021-09-01'" + ] + }, + { + "title": "today_utc", + "description": "Returns the current date in UTC timezone. The output is a date object.", + "arguments": {}, + "return_type": "Date", + "examples": [ + "'{{ today_utc() }}' -> '2021-09-01'", + "'{{ today_utc().strftime('%Y/%m/%d')}}' -> '2021/09/01'" + ] + }, + { + "title": "timestamp", + "description": "Converts a number or a string representing a datetime (formatted as ISO8601) to a timestamp. If the input is a number, it is converted to an int. If no timezone is specified, the string is interpreted as UTC.", + "arguments": { + "datetime": "A string formatted as ISO8601 or an integer representing a unix timestamp" + }, + "return_type": "int", + "examples": [ + "'{{ timestamp(1646006400) }}' -> 1646006400", + "'{{ timestamp('2022-02-28') }}' -> 1646006400", + "'{{ timestamp('2022-02-28T00:00:00Z') }}' -> 1646006400", + "'{{ timestamp('2022-02-28 00:00:00Z') }}' -> 1646006400", + "'{{ timestamp('2022-02-28T00:00:00-08:00') }}' -> 1646035200" + ] + }, + { + "title": "max", + "description": "Returns the largest object of a iterable, or or two or more arguments.", + "arguments": { + "args": "iterable or a sequence of two or more arguments" + }, + "return_type": "Any", + "examples": ["'{{ max(2, 3) }}' -> 3", "'{{ max([2, 3]) }}' -> 3"] + }, + { + "title": "day_delta", + "description": "Returns the datetime of now() + num_days.", + "arguments": { + "num_days": "The number of days to add to now", + "format": "How to format the output string" + }, + "return_type": "str", + "examples": [ + "'{{ day_delta(1) }}' -> '2021-09-02T00:00:00.000000+0000'", + "'{{ day_delta(-1) }}' -> '2021-08-31:00:00.000000+0000'", + "'{{ day_delta(25, format='%Y-%m-%d') }}' -> '2021-09-02'" + ] + }, + { + "title": "duration", + "description": "Converts an ISO8601 duration to datetime timedelta.", + "arguments": { + "duration_string": "A string representing an ISO8601 duration. See https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm for more details." + }, + "return_type": "datetime.timedelta", + "examples": [ + "'{{ duration('P1D') }}' -> '1 day, 0:00:00'", + "'{{ duration('P6DT23H') }}' -> '6 days, 23:00:00'", + "'{{ (now_utc() - duration('P1D')).strftime('%Y-%m-%dT%H:%M:%SZ') }}' -> '2021-08-31T00:00:00Z'" + ] + }, + { + "title": "format_datetime", + "description": "Converts a datetime or a datetime-string to the specified format.", + "arguments": { + "datetime": "The datetime object or a string to convert. If datetime is a string, it must be formatted as ISO8601.", + "format": "The datetime format.", + "input_format": "(optional) The datetime format in the case it is an string." + }, + "return_type": "str", + "examples": [ + "{{ format_datetime(config['start_time'], '%Y-%m-%d') }}", + "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') }}", + "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ', '%a, %d %b %Y %H:%M:%S %z') }}" + ] + }, + { + "title": "str_to_datetime", + "description": "Converts a string to a datetime object with UTC timezone.", + "arguments": { + "s": "The string to convert." + }, + "return_type": "datetime.datetime", + "examples": [ + "{{ str_to_datetime('2022-01-14') }}", + "{{ str_to_datetime('2022-01-01 13:45:30') }}", + "{{ str_to_datetime('2022-01-01T13:45:30+00:00') }}", + "{{ str_to_datetime('2022-01-01T13:45:30.123456Z') }}" + ] + } + ], + "filters": [ + { + "title": "hash", + "description": "Convert the specified value to a hashed string.", + "arguments": { + "hash_type": "Valid hash type for converts ('md5' as default value).", + "salt": "An additional value to further protect sensitive data." + }, + "return_type": "str", + "examples": [ + "{{ 'Test client_secret' | hash() }} -> '3032d57a12f76b61a820e47b9a5a0cbb'", + "{{ 'Test client_secret' | hash('md5') }} -> '3032d57a12f76b61a820e47b9a5a0cbb'", + "{{ 'Test client_secret' | hash('md5', salt='salt') }} -> '5011a0168579c2d94cbbe1c6ad14327c'" + ] + }, + { + "title": "base64encode", + "description": "Convert the specified value to a string in the base64 format.", + "arguments": {}, + "return_type": "str", + "examples": [ + "{{ 'Test client_secret' | base64encode }} -> 'VGVzdCBjbGllbnRfc2VjcmV0'" + ] + }, + { + "title": "base64decode", + "description": "Decodes the specified base64 format value into a common string.", + "arguments": {}, + "return_type": "str", + "examples": [ + "{{ 'ZmFrZSByZWZyZXNoX3Rva2VuIHZhbHVl' | base64decode }} -> 'fake refresh_token value'" + ] + }, + { + "title": "string", + "description": "Converts the specified value to a string.", + "arguments": {}, + "return_type": "str", + "examples": [ + "{{ 1 | string }} -> \"1\"", + "{{ [\"hello\", \"world\" | string }} -> \"[\"hello\", \"world\"]\"" + ] + }, + { + "title": "regex_search", + "description": "Match the input string against a regular expression and return the first match.", + "arguments": { + "regex": "The regular expression to search for. It must include a capture group." + }, + "return_type": "str", + "examples": [ + "{{ \"goodbye, cruel world\" | regex_search(\"goodbye,\\s(.*)$\") }} -> \"cruel world\"" + ] + } + ] + } +} diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index fb004a65e..0db30a710 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -1,5 +1,3 @@ -# Copyright (c) 2025 Airbyte, Inc., all rights reserved. - # generated by datamodel-codegen: # filename: declarative_component_schema.yaml diff --git a/bin/generate-component-manifest-dagger.sh b/bin/generate-component-manifest-dagger.sh deleted file mode 100755 index f920ff727..000000000 --- a/bin/generate-component-manifest-dagger.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -# We need to run this script in a docker container because we need to use a -# specific version of datamodel-codegen that generates pydantic v1 models (correctly). -# The newer datamodel-codegen's "pydantic v1" models are different than those v1 models -# generated by the older version of datamodel-codegen. - -set -e - -pip install dagger-io==0.13.3 -python bin/generate_component_manifest_files.py diff --git a/bin/generate_component_manifest_files.py b/bin/generate_component_manifest_files.py index 51b3d8efb..82bbaf441 100755 --- a/bin/generate_component_manifest_files.py +++ b/bin/generate_component_manifest_files.py @@ -1,23 +1,33 @@ +#!/usr/bin/env python3 +# +# Usage: +# > uv run bin/generate_component_manifest_files.py +# +# /// script +# dependencies = [ +# "datamodel-code-generator==0.26.3", +# "PyYAML>=6.0.1", +# ] +# /// + # Copyright (c) 2024 Airbyte, Inc., all rights reserved. +import json +import os import re +import shutil +import subprocess import sys +import tempfile from glob import glob from pathlib import Path -import anyio -import dagger +import yaml -PYTHON_IMAGE = "python:3.10" LOCAL_YAML_DIR_PATH = "airbyte_cdk/sources/declarative" LOCAL_OUTPUT_DIR_PATH = "airbyte_cdk/sources/declarative/models" -PIP_DEPENDENCIES = [ - "datamodel_code_generator==0.26.3", -] - - def get_all_yaml_files_without_ext() -> list[str]: return [Path(f).stem for f in glob(f"{LOCAL_YAML_DIR_PATH}/*.yaml")] @@ -29,6 +39,37 @@ def generate_init_module_content() -> str: return header +def generate_json_schema(): + """Generate JSON schema from the YAML file for schemastore.org registration. + + When registered with schemastore.org, a number of IDEs and libraries + automatically apply the JSON Schema validation features such as: + - auto-complete for keys and enums + - hover-tooltips for descriptions and examples + - linting squiggles for validation errors + """ + yaml_file_path = f"{LOCAL_YAML_DIR_PATH}/declarative_component_schema.yaml" + json_file_path = f"{LOCAL_YAML_DIR_PATH}/generated/declarative_component_schema.json" + + with open(yaml_file_path, "r") as yaml_file: + schema_data = yaml.safe_load(yaml_file) + + class DateTimeEncoder(json.JSONEncoder): + def default(self, obj): + if hasattr(obj, "isoformat"): + return obj.isoformat() + return super().default(obj) + + import os + + os.makedirs(os.path.dirname(json_file_path), exist_ok=True) + + with open(json_file_path, "w") as json_file: + json.dump(schema_data, json_file, indent=2, cls=DateTimeEncoder) + + print(f"Generated JSON schema: {json_file_path}") + + def replace_base_model_for_classes_with_deprecated_fields(post_processed_content: str) -> str: """ Replace the base model for classes with deprecated fields. @@ -86,15 +127,18 @@ def replace_base_model_for_classes_with_deprecated_fields(post_processed_content return post_processed_content -async def post_process_codegen(codegen_container: dagger.Container): - codegen_container = codegen_container.with_exec( - ["mkdir", "/generated_post_processed"], use_entrypoint=True - ) - for generated_file in await codegen_container.directory("/generated").entries(): +def post_process_codegen(generated_dir: str, post_processed_dir: str): + """Post-process generated files to fix pydantic imports and deprecated fields.""" + os.makedirs(post_processed_dir, exist_ok=True) + + for generated_file in os.listdir(generated_dir): if generated_file.endswith(".py"): - original_content = await codegen_container.file( - f"/generated/{generated_file}" - ).contents() + input_path = os.path.join(generated_dir, generated_file) + output_path = os.path.join(post_processed_dir, generated_file) + + with open(input_path, "r") as f: + original_content = f.read() + # the space before _parameters is intentional to avoid replacing things like `request_parameters:` with `requestparameters:` post_processed_content = original_content.replace( " _parameters:", " parameters:" @@ -104,55 +148,65 @@ async def post_process_codegen(codegen_container: dagger.Container): post_processed_content ) - codegen_container = codegen_container.with_new_file( - f"/generated_post_processed/{generated_file}", contents=post_processed_content - ) - return codegen_container + with open(output_path, "w") as f: + f.write(post_processed_content) -async def main(): +def main(): + generate_json_schema() init_module_content = generate_init_module_content() - async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as dagger_client: - codegen_container = ( - dagger_client.container() - .from_(PYTHON_IMAGE) - .with_exec(["mkdir", "/generated"], use_entrypoint=True) - .with_exec(["pip", "install", " ".join(PIP_DEPENDENCIES)], use_entrypoint=True) - .with_mounted_directory( - "/yaml", dagger_client.host().directory(LOCAL_YAML_DIR_PATH, include=["*.yaml"]) - ) - .with_new_file("/generated/__init__.py", contents=init_module_content) - ) - for yaml_file in get_all_yaml_files_without_ext(): - codegen_container = codegen_container.with_exec( - [ - "datamodel-codegen", - "--input", - f"/yaml/{yaml_file}.yaml", - "--output", - f"/generated/{yaml_file}.py", - "--disable-timestamp", - "--enum-field-as-literal", - "one", - "--set-default-enum-member", - "--use-double-quotes", - "--remove-special-field-name-prefix", - # allow usage of the extra key such as `deprecated`, etc. - "--field-extra-keys", - # account the `deprecated` flag provided for the field. - "deprecated", - # account the `deprecation_message` provided for the field. - "deprecation_message", - ], - use_entrypoint=True, - ) + with tempfile.TemporaryDirectory() as temp_dir: + generated_dir = os.path.join(temp_dir, "generated") + post_processed_dir = os.path.join(temp_dir, "generated_post_processed") - await ( - (await post_process_codegen(codegen_container)) - .directory("/generated_post_processed") - .export(LOCAL_OUTPUT_DIR_PATH) - ) + os.makedirs(generated_dir, exist_ok=True) + init_file_path = os.path.join(generated_dir, "__init__.py") + with open(init_file_path, "w") as f: + f.write(init_module_content) -anyio.run(main) + for yaml_file in get_all_yaml_files_without_ext(): + input_yaml = os.path.join(LOCAL_YAML_DIR_PATH, f"{yaml_file}.yaml") + output_py = os.path.join(generated_dir, f"{yaml_file}.py") + + cmd = [ + "datamodel-codegen", + "--input", + input_yaml, + "--output", + output_py, + "--disable-timestamp", + "--enum-field-as-literal", + "one", + "--set-default-enum-member", + "--use-double-quotes", + "--remove-special-field-name-prefix", + # allow usage of the extra key such as `deprecated`, etc. + "--field-extra-keys", + # account the `deprecated` flag provided for the field. + "deprecated", + # account the `deprecation_message` provided for the field. + "deprecation_message", + ] + + try: + result = subprocess.run(cmd, check=True, capture_output=True, text=True) + print(f"Generated {output_py}") + except subprocess.CalledProcessError as e: + print(f"Error generating {output_py}: {e}") + print(f"stdout: {e.stdout}") + print(f"stderr: {e.stderr}") + sys.exit(1) + + post_process_codegen(generated_dir, post_processed_dir) + + if os.path.exists(LOCAL_OUTPUT_DIR_PATH): + shutil.rmtree(LOCAL_OUTPUT_DIR_PATH) + shutil.copytree(post_processed_dir, LOCAL_OUTPUT_DIR_PATH) + + print(f"Generated models exported to {LOCAL_OUTPUT_DIR_PATH}") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 1e578f7cc..d7a4f7bba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,7 +143,7 @@ lock = { shell = "poetry lock", help = "Lock all dependencies." } pre-commit = {cmd = "poetry run pre-commit run --all-files", help = "Run all pre-commit hooks on all files."} # Build tasks -assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} +assemble = {cmd = "uv run bin/generate_component_manifest_files.py", help = "Generate component manifest files."} build-package = {cmd = "poetry build", help = "Build the python package: source and wheels archives."} build = {sequence = ["assemble", "openapi-generate", "build-package"], help = "Run all tasks to build the package."}