Skip to content

Commit 97889f1

Browse files
authored
fix(data-warehouse): redact openweather appid from error messages
OpenWeather sends the API key as the `appid` query param, so `response.url` contains it. `raise_for_status()` embeds that URL in the HTTPError message, which would leak the key into the sync's stored error and logs. Re-raise with the `appid` value redacted (keeping the host prefix intact for non-retryable-error matching). Adds tests for the redaction helper and the end-to-end fetch path. Generated-By: PostHog Code Task-Id: b4a325cd-69a4-4d18-887a-793c05cf7755
1 parent 56b4693 commit 97889f1

2 files changed

Lines changed: 54 additions & 1 deletion

File tree

posthog/temporal/data_imports/sources/openweather/openweather.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import dataclasses
23
from collections.abc import Iterator
34
from datetime import UTC, datetime
@@ -75,6 +76,15 @@ def _build_url(path: str, params: dict[str, Any]) -> str:
7576
return f"{OPENWEATHER_BASE_URL}{path}?{urlencode(params)}"
7677

7778

79+
# The API key is passed as the `appid` query param, so it ends up in `response.url`. `raise_for_status()`
80+
# embeds that URL in its message, which would otherwise leak the key into the sync's stored error and logs.
81+
_APPID_RE = re.compile(r"(appid=)[^&\s]+", re.IGNORECASE)
82+
83+
84+
def _redact_appid(text: str) -> str:
85+
return _APPID_RE.sub(r"\1REDACTED", text)
86+
87+
7888
@retry(
7989
retry=retry_if_exception_type((OpenWeatherRetryableError, requests.ReadTimeout, requests.ConnectionError)),
8090
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -90,7 +100,12 @@ def _fetch(session: requests.Session, url: str, logger: FilteringBoundLogger) ->
90100

91101
if not response.ok:
92102
logger.error(f"OpenWeather API error: status={response.status_code}, body={response.text}")
93-
response.raise_for_status()
103+
try:
104+
response.raise_for_status()
105+
except requests.HTTPError as exc:
106+
# Re-raise with the `appid` redacted so the key never reaches stored errors / logs, keeping the
107+
# `... for url: https://api.openweathermap.org` prefix intact for `get_non_retryable_errors()`.
108+
raise requests.HTTPError(_redact_appid(str(exc)), response=exc.response) from None
94109

95110
return response.json()
96111

posthog/temporal/data_imports/sources/openweather/tests/test_openweather.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
_dt_to_iso,
1616
_fetch,
1717
_normalize_rows,
18+
_redact_appid,
1819
get_rows,
1920
openweather_source,
2021
parse_locations,
@@ -85,6 +86,20 @@ def test_dt_to_iso(self, dt, expected):
8586
assert _dt_to_iso(dt) == expected
8687

8788

89+
class TestRedactAppid:
90+
@pytest.mark.parametrize(
91+
"text, expected",
92+
[
93+
("https://x/?lat=1&appid=secret", "https://x/?lat=1&appid=REDACTED"),
94+
("https://x/?appid=secret&lat=1", "https://x/?appid=REDACTED&lat=1"),
95+
("APPID=secret", "APPID=REDACTED"), # case-insensitive
96+
("no key here", "no key here"),
97+
],
98+
)
99+
def test_redact(self, text, expected):
100+
assert _redact_appid(text) == expected
101+
102+
88103
class TestBuildUrl:
89104
def test_includes_coords_and_appid(self):
90105
url = _build_url("/data/2.5/weather", {"lat": 51.5, "lon": -0.12, "appid": "secret-key"})
@@ -162,6 +177,29 @@ def test_client_error_raises_for_status(self):
162177
with pytest.raises(requests.HTTPError):
163178
_fetch_once(session, "https://example.com", structlog.get_logger())
164179

180+
def test_error_message_redacts_appid(self):
181+
# The API key is passed as `appid`, so the raise_for_status URL must not leak it into the error.
182+
resp = mock.MagicMock()
183+
resp.status_code = 401
184+
resp.ok = False
185+
resp.text = '{"cod":401,"message":"Invalid API key."}'
186+
resp.raise_for_status.side_effect = requests.HTTPError(
187+
"401 Client Error: Unauthorized for url: "
188+
"https://api.openweathermap.org/data/2.5/weather?lat=51.5&lon=-0.12&appid=SUPERSECRETKEY",
189+
response=requests.Response(),
190+
)
191+
session = mock.MagicMock()
192+
session.get.return_value = resp
193+
194+
with pytest.raises(requests.HTTPError) as exc_info:
195+
_fetch_once(session, "https://example.com", structlog.get_logger())
196+
197+
message = str(exc_info.value)
198+
assert "SUPERSECRETKEY" not in message
199+
assert "appid=REDACTED" in message
200+
# The host prefix is preserved so non-retryable-error matching still works.
201+
assert "for url: https://api.openweathermap.org" in message
202+
165203

166204
class TestValidateCredentials:
167205
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)