Skip to content

Commit 6d5a0e6

Browse files
fix: Exclude some stream fields to avoid leaking sensitive data (#17)
* fix: Exclude `pipelines` field `properties` to avoid leaking sensitive data in the stream Signed-off-by: Edgar Ramírez Mondragón <edgarrm358@gmail.com> * fix: Exclude additional fields Signed-off-by: Edgar Ramírez Mondragón <edgarrm358@gmail.com> --------- Signed-off-by: Edgar Ramírez Mondragón <edgarrm358@gmail.com>
1 parent ef6f75a commit 6d5a0e6

8 files changed

Lines changed: 144 additions & 116 deletions

File tree

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
build-backend = "hatchling.build"
33
requires = [
44
"hatch-vcs>=0.5",
5-
"hatchling>=1.29.0,<2",
5+
"hatchling>=1.29,<2",
66
]
77

88
[project]
@@ -52,7 +52,7 @@ test = [
5252
"pytest>=9",
5353
"pytest-github-actions-annotate-failures>=0.3",
5454
"singer-sdk[testing]",
55-
"syrupy>=5.1.0",
55+
"syrupy>=5.1",
5656
]
5757
typing = [
5858
"mypy>=1.19.1",
@@ -78,7 +78,7 @@ select = [ "ALL" ]
7878
allow-star-arg-any = true
7979
[tool.ruff.lint.per-file-ignores]
8080
"tests/**/*.py" = [
81-
"S101", # assert is used by pytest
81+
"S101", # assert is used by pytest
8282
]
8383
[tool.ruff.lint.pydocstyle]
8484
convention = "google"

tap_meltano_cloud/streams/base.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,65 @@ def get_stream_schema(self, *args: Any, **kwargs: Any) -> dict:
7676
"type": ["string", "null"],
7777
}
7878
return schema
79+
80+
81+
class _WorkspaceSchema(StreamSchema[str]):
82+
"""Schema for workspace streams — excludes sensitive fields.
83+
84+
``deploymentSecret`` and ``sshPrivateKey`` can contain sensitive credentials,
85+
so they are omitted until a reliable obfuscation mechanism exists.
86+
See https://github.com/MeltanoLabs/tap-meltano-cloud/issues/15
87+
"""
88+
89+
@override
90+
def get_stream_schema(self, *args: Any, **kwargs: Any) -> dict:
91+
schema = super().get_stream_schema(*args, **kwargs)
92+
schema["properties"].pop("deploymentSecret", None)
93+
schema["properties"].pop("sshPrivateKey", None)
94+
return schema
95+
96+
97+
class _PipelineSchema(_WorkspaceChildSchema):
98+
"""Schema for pipeline streams — excludes the ``properties`` field.
99+
100+
Pipeline properties can contain sensitive data and there is currently no
101+
reliable way to obfuscate them, so the field is omitted until that
102+
capability exists. See https://github.com/MeltanoLabs/tap-meltano-cloud/issues/15
103+
"""
104+
105+
@override
106+
def get_stream_schema(self, *args: Any, **kwargs: Any) -> dict:
107+
schema = super().get_stream_schema(*args, **kwargs)
108+
schema["properties"].pop("properties", None)
109+
return schema
110+
111+
112+
class _DataComponentSchema(_WorkspaceChildSchema):
113+
"""Schema for data component streams — excludes the ``properties`` field.
114+
115+
Data component properties can contain sensitive data and there is currently no
116+
reliable way to obfuscate them, so the field is omitted until that
117+
capability exists. See https://github.com/MeltanoLabs/tap-meltano-cloud/issues/15
118+
"""
119+
120+
@override
121+
def get_stream_schema(self, *args: Any, **kwargs: Any) -> dict:
122+
schema = super().get_stream_schema(*args, **kwargs)
123+
schema["properties"].pop("properties", None)
124+
return schema
125+
126+
127+
class _DataStoreSchema(_WorkspaceChildSchema):
128+
"""Schema for data store streams — excludes sensitive fields.
129+
130+
``properties`` and ``jdbcUrl`` can contain sensitive data (credentials,
131+
connection strings), so they are omitted until a reliable obfuscation
132+
mechanism exists. See https://github.com/MeltanoLabs/tap-meltano-cloud/issues/15
133+
"""
134+
135+
@override
136+
def get_stream_schema(self, *args: Any, **kwargs: Any) -> dict:
137+
schema = super().get_stream_schema(*args, **kwargs)
138+
schema["properties"].pop("jdbcUrl", None)
139+
schema["properties"].pop("properties", None)
140+
return schema

tap_meltano_cloud/streams/by_workspace.py

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,26 @@
22

33
from __future__ import annotations
44

5-
from typing import Any
5+
import sys
6+
from typing import TYPE_CHECKING, Any
67

7-
from singer_sdk import StreamSchema
8+
from .base import (
9+
OPENAPI_SCHEMA,
10+
MeltanoCloudStream,
11+
_DataComponentSchema,
12+
_DataStoreSchema,
13+
_PipelineSchema,
14+
_WorkspaceChildSchema,
15+
_WorkspaceSchema,
16+
)
817

9-
from .base import OPENAPI_SCHEMA, MeltanoCloudStream, _WorkspaceChildSchema
18+
if sys.version_info >= (3, 12):
19+
from typing import override
20+
else:
21+
from typing_extensions import override
22+
23+
if TYPE_CHECKING:
24+
from singer_sdk.helpers.types import Context
1025

1126

1227
class _ByWorkspaceStream(MeltanoCloudStream):
@@ -28,7 +43,13 @@ class WorkspacesStream(_ByWorkspaceStream):
2843
name = "workspaces"
2944
path = "/workspaces/{workspaceId}"
3045
records_jsonpath = "$"
31-
schema = StreamSchema(OPENAPI_SCHEMA, key="WorkspaceResource")
46+
schema = _WorkspaceSchema(OPENAPI_SCHEMA, key="WorkspaceResource")
47+
48+
@override
49+
def post_process(self, row: dict, context: Context | None = None) -> dict | None:
50+
row.pop("deploymentSecret", None)
51+
row.pop("sshPrivateKey", None)
52+
return super().post_process(row, context)
3253

3354

3455
class PipelinesStream(_ByWorkspaceStream):
@@ -37,7 +58,12 @@ class PipelinesStream(_ByWorkspaceStream):
3758
name = "pipelines"
3859
path = "/workspaces/{workspaceId}/pipelines"
3960
records_jsonpath = "$._embedded.pipelines[*]"
40-
schema = _WorkspaceChildSchema(OPENAPI_SCHEMA, key="PipelineResource")
61+
schema = _PipelineSchema(OPENAPI_SCHEMA, key="PipelineResource")
62+
63+
@override
64+
def post_process(self, row: dict, context: Context | None = None) -> dict | None:
65+
row.pop("properties", None)
66+
return super().post_process(row, context)
4167

4268

4369
class DatasetsStream(_ByWorkspaceStream):
@@ -73,7 +99,13 @@ class DataStoresStream(_ByWorkspaceStream):
7399
name = "datastores"
74100
path = "/workspaces/{workspaceId}/datastores"
75101
records_jsonpath = "$._embedded.datastores[*]"
76-
schema = _WorkspaceChildSchema(OPENAPI_SCHEMA, key="DataStoreResource")
102+
schema = _DataStoreSchema(OPENAPI_SCHEMA, key="DataStoreResource")
103+
104+
@override
105+
def post_process(self, row: dict, context: Context | None = None) -> dict | None:
106+
row.pop("jdbcUrl", None)
107+
row.pop("properties", None)
108+
return super().post_process(row, context)
77109

78110

79111
class DataComponentsStream(_ByWorkspaceStream):
@@ -82,4 +114,9 @@ class DataComponentsStream(_ByWorkspaceStream):
82114
name = "datacomponents"
83115
path = "/workspaces/{workspaceId}/datacomponents"
84116
records_jsonpath = "$._embedded.datacomponents[*]"
85-
schema = _WorkspaceChildSchema(OPENAPI_SCHEMA, key="DataComponentResource")
117+
schema = _DataComponentSchema(OPENAPI_SCHEMA, key="DataComponentResource")
118+
119+
@override
120+
def post_process(self, row: dict, context: Context | None = None) -> dict | None:
121+
row.pop("properties", None)
122+
return super().post_process(row, context)

tap_meltano_cloud/streams/me.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,15 @@
55
import sys
66
from typing import TYPE_CHECKING, Any
77

8-
from singer_sdk import StreamSchema
9-
10-
from .base import OPENAPI_SCHEMA, MeltanoCloudStream, _WorkspaceChildSchema
8+
from .base import (
9+
OPENAPI_SCHEMA,
10+
MeltanoCloudStream,
11+
_DataComponentSchema,
12+
_DataStoreSchema,
13+
_PipelineSchema,
14+
_WorkspaceChildSchema,
15+
_WorkspaceSchema,
16+
)
1117

1218
if sys.version_info >= (3, 12):
1319
from typing import override
@@ -26,7 +32,7 @@ class WorkspacesStream(MeltanoCloudStream):
2632
name = "workspaces"
2733
path = "/workspaces"
2834
records_jsonpath = "$._embedded.workspaces[*]"
29-
schema = StreamSchema(OPENAPI_SCHEMA, key="WorkspaceResource")
35+
schema = _WorkspaceSchema(OPENAPI_SCHEMA, key="WorkspaceResource")
3036

3137
@override
3238
def generate_child_contexts(
@@ -37,6 +43,12 @@ def generate_child_contexts(
3743
"""Generate child contexts for workspace-scoped streams."""
3844
yield {"workspaceId": record["id"]}
3945

46+
@override
47+
def post_process(self, row: dict, context: Context | None = None) -> dict | None:
48+
row.pop("deploymentSecret", None)
49+
row.pop("sshPrivateKey", None)
50+
return super().post_process(row, context)
51+
4052

4153
class _WorkspaceChildStream(MeltanoCloudStream):
4254
"""Base class for workspace-child streams driven by the me WorkspacesStream."""
@@ -50,7 +62,12 @@ class PipelinesStream(_WorkspaceChildStream):
5062
name = "pipelines"
5163
path = "/workspaces/{workspaceId}/pipelines"
5264
records_jsonpath = "$._embedded.pipelines[*]"
53-
schema = _WorkspaceChildSchema(OPENAPI_SCHEMA, key="PipelineResource")
65+
schema = _PipelineSchema(OPENAPI_SCHEMA, key="PipelineResource")
66+
67+
@override
68+
def post_process(self, row: dict, context: Context | None = None) -> dict | None:
69+
row.pop("properties", None)
70+
return super().post_process(row, context)
5471

5572

5673
class DatasetsStream(_WorkspaceChildStream):
@@ -86,7 +103,13 @@ class DataStoresStream(_WorkspaceChildStream):
86103
name = "datastores"
87104
path = "/workspaces/{workspaceId}/datastores"
88105
records_jsonpath = "$._embedded.datastores[*]"
89-
schema = _WorkspaceChildSchema(OPENAPI_SCHEMA, key="DataStoreResource")
106+
schema = _DataStoreSchema(OPENAPI_SCHEMA, key="DataStoreResource")
107+
108+
@override
109+
def post_process(self, row: dict, context: Context | None = None) -> dict | None:
110+
row.pop("jdbcUrl", None)
111+
row.pop("properties", None)
112+
return super().post_process(row, context)
90113

91114

92115
class DataComponentsStream(_WorkspaceChildStream):
@@ -95,4 +118,9 @@ class DataComponentsStream(_WorkspaceChildStream):
95118
name = "datacomponents"
96119
path = "/workspaces/{workspaceId}/datacomponents"
97120
records_jsonpath = "$._embedded.datacomponents[*]"
98-
schema = _WorkspaceChildSchema(OPENAPI_SCHEMA, key="DataComponentResource")
121+
schema = _DataComponentSchema(OPENAPI_SCHEMA, key="DataComponentResource")
122+
123+
@override
124+
def post_process(self, row: dict, context: Context | None = None) -> dict | None:
125+
row.pop("properties", None)
126+
return super().post_process(row, context)

tests/__snapshots__/test_schema_evolution/test_catalog_changes[datacomponents].json

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,6 @@
3939
"inclusion": "available"
4040
}
4141
},
42-
{
43-
"breadcrumb": [
44-
"properties",
45-
"properties"
46-
],
47-
"metadata": {
48-
"inclusion": "available"
49-
}
50-
},
5142
{
5243
"breadcrumb": [
5344
"properties",
@@ -30963,15 +30954,6 @@
3096330954
"null"
3096430955
]
3096530956
},
30966-
"properties": {
30967-
"additionalProperties": {
30968-
"type": "string"
30969-
},
30970-
"type": [
30971-
"object",
30972-
"null"
30973-
]
30974-
},
3097530957
"repositoryPath": {
3097630958
"type": [
3097730959
"string",

tests/__snapshots__/test_schema_evolution/test_catalog_changes[datastores].json

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,6 @@
3939
"inclusion": "available"
4040
}
4141
},
42-
{
43-
"breadcrumb": [
44-
"properties",
45-
"properties"
46-
],
47-
"metadata": {
48-
"inclusion": "available"
49-
}
50-
},
5142
{
5243
"breadcrumb": [
5344
"properties",
@@ -149,15 +140,6 @@
149140
"inclusion": "available"
150141
}
151142
},
152-
{
153-
"breadcrumb": [
154-
"properties",
155-
"jdbcUrl"
156-
],
157-
"metadata": {
158-
"inclusion": "available"
159-
}
160-
},
161143
{
162144
"breadcrumb": [
163145
"properties",
@@ -30971,12 +30953,6 @@
3097130953
"format": "uuid",
3097230954
"type": "string"
3097330955
},
30974-
"jdbcUrl": {
30975-
"type": [
30976-
"string",
30977-
"null"
30978-
]
30979-
},
3098030956
"lastModified": {
3098130957
"format": "date-time",
3098230958
"type": [
@@ -31002,15 +30978,6 @@
3100230978
"null"
3100330979
]
3100430980
},
31005-
"properties": {
31006-
"additionalProperties": {
31007-
"type": "string"
31008-
},
31009-
"type": [
31010-
"object",
31011-
"null"
31012-
]
31013-
},
3101430981
"repositoryPath": {
3101530982
"type": [
3101630983
"string",

tests/__snapshots__/test_schema_evolution/test_catalog_changes[pipelines].json

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,6 @@
9393
"inclusion": "available"
9494
}
9595
},
96-
{
97-
"breadcrumb": [
98-
"properties",
99-
"properties"
100-
],
101-
"metadata": {
102-
"inclusion": "available"
103-
}
104-
},
10596
{
10697
"breadcrumb": [
10798
"properties",
@@ -330,15 +321,6 @@
330321
"null"
331322
]
332323
},
333-
"properties": {
334-
"additionalProperties": {
335-
"type": "string"
336-
},
337-
"type": [
338-
"object",
339-
"null"
340-
]
341-
},
342324
"repositoryPath": {
343325
"type": [
344326
"string",

0 commit comments

Comments
 (0)