Skip to content

Commit fc779f8

Browse files
Create meltano-map-transform (#1)
* first commit * Update readme * Fix docstring in activate_version mapping method Co-authored-by: Aaron ("AJ") Steers <[email protected]> * Update meltano_map_transform/mapper.py Co-authored-by: Aaron ("AJ") Steers <[email protected]> * fix: map activate_version message to all stream aliases and duplicates * use SDK 0.3.18 * add dependabot Co-authored-by: Aaron ("AJ") Steers <[email protected]>
1 parent 51825bf commit fc779f8

File tree

9 files changed

+1566
-2
lines changed

9 files changed

+1566
-2
lines changed

.github/dependabot.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# To get started with Dependabot version updates, you'll need to specify which
2+
# package ecosystems to update and where the package manifests are located.
3+
# Please see the documentation for all configuration options:
4+
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5+
6+
version: 2
7+
updates:
8+
- package-ecosystem: "pip"
9+
directory: "/"
10+
schedule:
11+
interval: "weekly"
12+
time: "13:00"
13+
day: "monday"
14+
timezone: "US/Central"
15+
reviewers:
16+
- "edgarrmondragon"
17+
- "aaronsteers"
18+
labels:
19+
- "dependencies"

.github/workflows/test.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
### A CI workflow template that runs linting and python testing
2+
3+
name: Test
4+
5+
on: [push]
6+
7+
jobs:
8+
linting:
9+
10+
runs-on: ubuntu-latest
11+
env:
12+
SETUPTOOLS_USE_DISTUTILS: stdlib
13+
strategy:
14+
matrix:
15+
# Only lint using the primary version used for dev
16+
python-version: ["3.9"]
17+
18+
steps:
19+
- name: Checkout code
20+
uses: actions/checkout@v2
21+
22+
- name: Set up Python ${{ matrix.python-version }}
23+
uses: actions/setup-python@v2
24+
with:
25+
python-version: ${{ matrix.python-version }}
26+
27+
- name: Install Poetry
28+
uses: snok/install-poetry@v1
29+
with:
30+
version: 1.1.12
31+
32+
- name: Install dependencies
33+
run: |
34+
pip install tox==3.24.4
35+
36+
- name: Run lint command from tox.ini
37+
run: |
38+
tox -e lint

.gitignore

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# Ignore meltano internal cache and sqlite systemdb
2+
3+
.meltano/
4+
5+
# Byte-compiled / optimized / DLL files
6+
__pycache__/
7+
*.py[cod]
8+
*$py.class
9+
10+
# C extensions
11+
*.so
12+
13+
# Distribution / packaging
14+
.Python
15+
build/
16+
develop-eggs/
17+
dist/
18+
downloads/
19+
eggs/
20+
.eggs/
21+
lib/
22+
lib64/
23+
parts/
24+
sdist/
25+
var/
26+
wheels/
27+
pip-wheel-metadata/
28+
share/python-wheels/
29+
*.egg-info/
30+
.installed.cfg
31+
*.egg
32+
MANIFEST
33+
34+
# PyInstaller
35+
# Usually these files are written by a python script from a template
36+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
37+
*.manifest
38+
*.spec
39+
40+
# Installer logs
41+
pip-log.txt
42+
pip-delete-this-directory.txt
43+
44+
# Unit test / coverage reports
45+
htmlcov/
46+
.tox/
47+
.nox/
48+
.coverage
49+
.coverage.*
50+
.cache
51+
nosetests.xml
52+
coverage.xml
53+
*.cover
54+
*.py,cover
55+
.hypothesis/
56+
.pytest_cache/
57+
58+
# Translations
59+
*.mo
60+
*.pot
61+
62+
# Django stuff:
63+
*.log
64+
local_settings.py
65+
db.sqlite3
66+
db.sqlite3-journal
67+
68+
# Flask stuff:
69+
instance/
70+
.webassets-cache
71+
72+
# Scrapy stuff:
73+
.scrapy
74+
75+
# Sphinx documentation
76+
docs/_build/
77+
78+
# PyBuilder
79+
target/
80+
81+
# Jupyter Notebook
82+
.ipynb_checkpoints
83+
84+
# IPython
85+
profile_default/
86+
ipython_config.py
87+
88+
# pyenv
89+
.python-version
90+
91+
# pipenv
92+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
94+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
95+
# install all needed dependencies.
96+
#Pipfile.lock
97+
98+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
99+
__pypackages__/
100+
101+
# Celery stuff
102+
celerybeat-schedule
103+
celerybeat.pid
104+
105+
# SageMath parsed files
106+
*.sage.py
107+
108+
# Environments
109+
.env
110+
.venv
111+
env/
112+
venv/
113+
ENV/
114+
env.bak/
115+
venv.bak/
116+
117+
# Spyder project settings
118+
.spyderproject
119+
.spyproject
120+
121+
# Rope project settings
122+
.ropeproject
123+
124+
# mkdocs documentation
125+
/site
126+
127+
# mypy
128+
.mypy_cache/
129+
.dmypy.json
130+
dmypy.json
131+
132+
# Pyre type checker
133+
.pyre/

README.md

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
1-
# meltano-map-transform
1+
# `meltano-map-transformer`
2+
23
A map transformer which implements the [`Stream Maps` capability](https://sdk.meltano.com/en/latest/stream_maps.html) from Meltano's tap and target SDK: https://sdk.meltano.com/
34

45
This mapper plugin is fully compliant with the Singer Spec and can be placed in between any Singer tap and target.
56

6-
Status: Under Development
7+
## Capabilities
8+
9+
* `stream-maps`
10+
11+
## Settings
12+
13+
| Setting | Required | Default | Description |
14+
|:------------|:--------:|:-------:|:------------|
15+
| stream_maps | True | None | Stream maps |
16+
17+
A full list of supported settings and capabilities is available by running: `meltano-map-transformer --about`

meltano_map_transform/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""A map transformer which implements the Stream Maps capability.
2+
3+
Based on Meltano's tap and target SDK.
4+
"""

meltano_map_transform/mapper.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
"""A sample inline mapper app."""
2+
3+
from pathlib import PurePath
4+
from typing import Generator, List, Optional, Union
5+
6+
import singer
7+
import singer_sdk.typing as th
8+
from singer_sdk.helpers._util import utc_now
9+
from singer_sdk.mapper import PluginMapper
10+
from singer_sdk.mapper_base import InlineMapper
11+
12+
13+
class StreamTransform(InlineMapper):
14+
"""A map transformer which implements the Stream Maps capability."""
15+
16+
name = "meltano-map-transformer"
17+
18+
config_jsonschema = th.PropertiesList(
19+
th.Property(
20+
"stream_maps",
21+
th.ObjectType(
22+
additional_properties=th.CustomType(
23+
{
24+
"type": ["object", "string", "null"],
25+
"properties": {
26+
"__filter__": {"type": ["string", "null"]},
27+
"__source__": {"type": ["string", "null"]},
28+
"__else__": {"type": ["null"]},
29+
"__key_properties__": {
30+
"type": ["array", "null"],
31+
"items": {"type": "string"},
32+
},
33+
},
34+
"additionalProperties": {"type": ["string", "null"]},
35+
}
36+
)
37+
),
38+
required=True,
39+
description="Stream maps",
40+
)
41+
).to_dict()
42+
43+
def __init__(
44+
self,
45+
config: Optional[Union[dict, PurePath, str, List[Union[PurePath, str]]]] = None,
46+
parse_env_config: bool = False,
47+
validate_config: bool = True,
48+
) -> None:
49+
"""Create a new inline mapper.
50+
51+
Args:
52+
config: Mapper configuration. Can be a dictionary, a single path to a
53+
configuration file, or a list of paths to multiple configuration
54+
files.
55+
parse_env_config: Whether to look for configuration values in environment
56+
variables.
57+
validate_config: True to require validation of config settings.
58+
"""
59+
super().__init__(
60+
config=config,
61+
parse_env_config=parse_env_config,
62+
validate_config=validate_config,
63+
)
64+
65+
self.mapper = PluginMapper(plugin_config=dict(self.config), logger=self.logger)
66+
67+
def map_schema_message(
68+
self,
69+
message_dict: dict,
70+
) -> Generator[singer.Message, None, None]:
71+
"""Map a schema message according to config.
72+
73+
Args:
74+
message_dict: A SCHEMA message JSON dictionary.
75+
76+
Yields:
77+
Transformed schema messages.
78+
"""
79+
self._assert_line_requires(message_dict, requires={"stream", "schema"})
80+
81+
stream_id: str = message_dict["stream"]
82+
self.mapper.register_raw_stream_schema(
83+
stream_id,
84+
message_dict["schema"],
85+
message_dict.get("key_properties", []),
86+
)
87+
for stream_map in self.mapper.stream_maps[stream_id]:
88+
schema_message = singer.SchemaMessage(
89+
stream_map.stream_alias,
90+
stream_map.transformed_schema,
91+
stream_map.transformed_key_properties,
92+
message_dict.get("bookmark_keys", []),
93+
)
94+
yield schema_message
95+
96+
def map_record_message(
97+
self,
98+
message_dict: dict,
99+
) -> Generator[singer.Message, None, None]:
100+
"""Map a record message according to config.
101+
102+
Args:
103+
message_dict: A RECORD message JSON dictionary.
104+
105+
Yields:
106+
Transformed record messages.
107+
"""
108+
self._assert_line_requires(message_dict, requires={"stream", "record"})
109+
110+
stream_id: str = message_dict["stream"]
111+
for stream_map in self.mapper.stream_maps[stream_id]:
112+
mapped_record = stream_map.transform(message_dict["record"])
113+
if mapped_record is not None:
114+
record_message = singer.RecordMessage(
115+
stream=stream_map.stream_alias,
116+
record=mapped_record,
117+
version=message_dict.get("version"),
118+
time_extracted=utc_now(),
119+
)
120+
self.logger.info(stream_map.stream_alias)
121+
yield record_message
122+
123+
def map_state_message(self, message_dict: dict) -> List[singer.Message]:
124+
"""Do nothing to the message.
125+
126+
Args:
127+
message_dict: A STATE message JSON dictionary.
128+
129+
Returns:
130+
The same state message
131+
"""
132+
return [singer.StateMessage(value=message_dict["value"])]
133+
134+
def map_activate_version_message(
135+
self,
136+
message_dict: dict,
137+
) -> Generator[singer.Message, None, None]:
138+
"""Duplicate the message or alias the stream name as defined in configuration.
139+
140+
Args:
141+
message_dict: An ACTIVATE_VERSION message JSON dictionary.
142+
143+
Yields:
144+
An ACTIVATE_VERSION for each duplicated or aliased stream.
145+
"""
146+
self._assert_line_requires(message_dict, requires={"stream", "version"})
147+
148+
stream_id: str = message_dict["stream"]
149+
for stream_map in self.mapper.stream_maps[stream_id]:
150+
yield singer.ActivateVersionMessage(
151+
stream=stream_map.stream_alias,
152+
version=message_dict["version"],
153+
)

0 commit comments

Comments
 (0)