-
Notifications
You must be signed in to change notification settings - Fork 20
feat(cdk): add KeyValueExtractor #552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
f081466
50f729a
ae16e6a
3c87c74
4378007
c2c1db3
4cd1e0c
ef6bd7a
58b98ee
15c0ddc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,10 @@ | |
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from airbyte_cdk.sources.declarative.extractors.combined_extractor import CombinedExtractor | ||
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor | ||
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector | ||
from airbyte_cdk.sources.declarative.extractors.key_value_extractor import KeyValueExtractor | ||
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter | ||
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector | ||
from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import ( | ||
|
@@ -18,4 +20,6 @@ | |
"RecordFilter", | ||
"RecordSelector", | ||
"ResponseToFileExtractor", | ||
"KeyValueExtractor", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit, can we alphabetize this |
||
"CombinedExtractor", | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from dataclasses import InitVar, dataclass, field | ||
from itertools import islice | ||
from typing import Any, Iterable, List, Mapping, MutableMapping, Union | ||
|
||
import dpath | ||
import requests | ||
|
||
from airbyte_cdk.sources.declarative.decoders import Decoder, JsonDecoder | ||
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor | ||
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString | ||
from airbyte_cdk.sources.types import Config | ||
|
||
|
||
@dataclass | ||
class CombinedExtractor(RecordExtractor): | ||
""" | ||
Extractor that combines keys and values from two separate extractors. | ||
|
||
The `keys_extractor` and `values_extractor` extract records independently | ||
from the response. Their outputs are zipped together to form key-value mappings. | ||
|
||
Each key from `keys_extractor` should correspond to a key in the resulting dictionary, | ||
and each value from `values_extractor` is the value for that key. | ||
|
||
Example: | ||
keys_extractor -> yields: ["name", "age"] | ||
values_extractor -> yields: ["Alice", 30] | ||
result: { "name": "Alice", "age": 30 } | ||
""" | ||
lazebnyi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
extractors: List[RecordExtractor] | ||
|
||
def extract_records(self, response: requests.Response) -> Iterable[MutableMapping[Any, Any]]: | ||
extractors_records = [extractor.extract_records(response) for extractor in self.extractors] | ||
|
||
for records in zip(*extractors_records): | ||
merged = {} | ||
for record in records: | ||
merged.update(record) # merge all fields | ||
yield merged |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# | ||
# Copyright (c) 2025 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from dataclasses import dataclass | ||
from itertools import islice | ||
from typing import Any, Iterable, MutableMapping | ||
|
||
import requests | ||
|
||
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor | ||
|
||
|
||
@dataclass | ||
class KeyValueExtractor(RecordExtractor): | ||
""" | ||
Extractor that combines keys and values from two separate extractors. | ||
|
||
The `keys_extractor` and `values_extractor` extract records independently | ||
from the response. Their outputs are zipped together to form key-value mappings. | ||
|
||
Each key from `keys_extractor` should correspond to a key in the resulting dictionary, | ||
and each value from `values_extractor` is the value for that key. | ||
|
||
Example: | ||
keys_extractor -> yields: ["name", "age"] | ||
values_extractor -> yields: ["Alice", 30] | ||
result: { "name": "Alice", "age": 30 } | ||
""" | ||
|
||
keys_extractor: RecordExtractor | ||
values_extractor: RecordExtractor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for consistency, we should probably also take in the |
||
|
||
def extract_records(self, response: requests.Response) -> Iterable[MutableMapping[Any, Any]]: | ||
keys = list(self.keys_extractor.extract_records(response)) | ||
values = self.values_extractor.extract_records(response) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is still a little unclear to me is what the expected behavior for this should be beyond the simplest case of there are 2 field keys and 2 values in the list. What happens in the following:
|
||
|
||
while True: | ||
chunk = list(islice(values, len(keys))) | ||
if not chunk: | ||
break | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels a little more idiomatic to avoid using a permanent
|
||
yield dict(zip(keys, chunk)) |
Uh oh!
There was an error while loading. Please reload this page.