Skip to content

Commit db9382d

Browse files
authored
Merge pull request #1237 from Aiven-Open/mbasani-add-test-user-not-found
Improvements to normalization
2 parents 0931e73 + 052ff4c commit db9382d

4 files changed

Lines changed: 181 additions & 27 deletions

File tree

src/karapace/core/instrumentation/path_normalization.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,28 @@
55
See LICENSE for details
66
"""
77

8-
from __future__ import annotations
9-
108
import re
9+
from enum import Enum
10+
11+
12+
class PathPattern(Enum):
13+
"""Regex patterns and their replacements for normalizing dynamic path segments.
14+
15+
Each member is a (pattern, replacement) pair. Patterns are applied in
16+
declaration order -- put broader matches (e.g. UUIDs) before narrower ones.
17+
"""
18+
19+
UUID = (r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}", "{uuid}")
20+
TOPIC = (r"(/topics/)([^/]+)", r"\1{topic}")
21+
SCHEMA_ID = (r"(/schemas/ids/)(\d+)", r"\1{id}")
22+
SUBJECT = (r"(/subjects/)([^/]+)", r"\1{subject}")
23+
VERSION = (r"(/versions/)(\d+)", r"\1{version}")
24+
CONFIG_SUBJECT = (r"(/config/)([^/]+)", r"\1{subject}")
25+
MODE_SUBJECT = (r"(/mode/)([^/]+)", r"\1{subject}")
1126

12-
# UUIDs: 8-4-4-4-12 hex format
13-
UUID_PATTERN = re.compile(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
14-
# Topic names after /topics/ - match until next slash or end
15-
TOPIC_PATTERN = re.compile(r"(/topics/)([^/]+)")
16-
# Schema IDs (numeric) after /schemas/ids/
17-
SCHEMA_ID_PATTERN = re.compile(r"(/schemas/ids/)(\d+)")
18-
# Subject names after /subjects/
19-
SUBJECT_PATTERN = re.compile(r"(/subjects/)([^/]+)")
20-
# Version numbers after /versions/
21-
VERSION_PATTERN = re.compile(r"(/versions/)(\d+)")
22-
# Subject names after /config/
23-
CONFIG_SUBJECT_PATTERN = re.compile(r"(/config/)([^/]+)")
24-
# Subject names after /mode/
25-
MODE_SUBJECT_PATTERN = re.compile(r"(/mode/)([^/]+)")
27+
def __init__(self, pattern: str, replacement: str) -> None:
28+
self.regex = re.compile(pattern)
29+
self.replacement = replacement
2630

2731

2832
def normalize_path(path: str) -> str:
@@ -33,17 +37,13 @@ def normalize_path(path: str) -> str:
3337
3438
Examples:
3539
/topics/my-topic -> /topics/{topic}
36-
/consumers/abc-123-def/instances/xyz-456 -> /consumers/{uuid}/instances/{uuid}
40+
/consumers/3f410583-cfa3-48e8-bd9f-22eff1881c00/instances/0333f5a1-d242-42bc-ba35-d7c5d67fc121 -> /consumers/{uuid}/instances/{uuid}
3741
/schemas/ids/42 -> /schemas/ids/{id}
3842
/subjects/my-subject/versions/3 -> /subjects/{subject}/versions/{version}
3943
/config/my-subject -> /config/{subject}
4044
/mode/my-subject -> /mode/{subject}
4145
"""
42-
normalized = UUID_PATTERN.sub("{uuid}", path)
43-
normalized = TOPIC_PATTERN.sub(r"\1{topic}", normalized)
44-
normalized = SCHEMA_ID_PATTERN.sub(r"\1{id}", normalized)
45-
normalized = SUBJECT_PATTERN.sub(r"\1{subject}", normalized)
46-
normalized = VERSION_PATTERN.sub(r"\1{version}", normalized)
47-
normalized = CONFIG_SUBJECT_PATTERN.sub(r"\1{subject}", normalized)
48-
normalized = MODE_SUBJECT_PATTERN.sub(r"\1{subject}", normalized)
46+
normalized = path
47+
for p in PathPattern:
48+
normalized = p.regex.sub(p.replacement, normalized)
4949
return normalized

tests/integration/test_schema_coordinator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ async def test_coordinator_workflow(
167167
session_timeout_ms=10000,
168168
heartbeat_interval_ms=500,
169169
retry_backoff_ms=100,
170+
waiting_time_before_acting_as_master_ms=waiting_time_before_acting_as_master_sec * 1000,
170171
)
171172
coordinator2.start()
172173
assert coordinator2.coordinator_id is None
@@ -205,8 +206,8 @@ async def test_coordinator_workflow(
205206
await primary.close()
206207
await primary_client.close()
207208

208-
now = time.time()
209-
while time.time() - now < waiting_time_before_acting_as_master_sec:
209+
now = time.monotonic()
210+
while time.monotonic() - now < waiting_time_before_acting_as_master_sec:
210211
assert not secondary.are_we_master(), (
211212
f"Cannot become master before {waiting_time_before_acting_as_master_sec} seconds "
212213
f"for the late records that can arrive from the previous master"

tests/unit/instrumentation/test_prometheus.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,95 @@ async def handler(req):
233233

234234
# Total should NOT be incremented when handler raises a non-HTTP exception
235235
total_metric.labels.assert_not_called()
236+
237+
@patch("karapace.core.instrumentation.prometheus.time")
238+
async def test_http_request_metrics_middleware_normalizes_path(
239+
self,
240+
mock_time: MagicMock,
241+
prometheus: PrometheusInstrumentation,
242+
) -> None:
243+
"""Verify that the middleware normalizes dynamic path segments to prevent unbounded metric cardinality."""
244+
mock_time.time.return_value = 10
245+
246+
(
247+
app_metrics,
248+
in_progress_metric,
249+
in_progress_instance,
250+
duration_metric,
251+
duration_instance,
252+
total_metric,
253+
total_instance,
254+
) = _make_metric_mocks(prometheus)
255+
256+
request = DummyRequest(path="/schemas/ids/878916964", method="GET", app=app_metrics)
257+
258+
response = MagicMock(status=200)
259+
260+
async def handler(req):
261+
return response
262+
263+
await prometheus.http_request_metrics_middleware(request=request, handler=handler)
264+
265+
in_progress_metric.labels.assert_called_with("GET", "/schemas/ids/{id}")
266+
total_metric.labels.assert_called_with("GET", "/schemas/ids/{id}", response.status)
267+
duration_metric.labels.assert_called_with("GET", "/schemas/ids/{id}")
268+
269+
@patch("karapace.core.instrumentation.prometheus.time")
270+
async def test_http_request_metrics_middleware_normalizes_subject_version_path(
271+
self,
272+
mock_time: MagicMock,
273+
prometheus: PrometheusInstrumentation,
274+
) -> None:
275+
mock_time.time.return_value = 10
276+
277+
(
278+
app_metrics,
279+
in_progress_metric,
280+
in_progress_instance,
281+
duration_metric,
282+
duration_instance,
283+
total_metric,
284+
total_instance,
285+
) = _make_metric_mocks(prometheus)
286+
287+
request = DummyRequest(path="/subjects/my-subject/versions/3", method="GET", app=app_metrics)
288+
289+
response = MagicMock(status=200)
290+
291+
async def handler(req):
292+
return response
293+
294+
await prometheus.http_request_metrics_middleware(request=request, handler=handler)
295+
296+
in_progress_metric.labels.assert_called_with("GET", "/subjects/{subject}/versions/{version}")
297+
total_metric.labels.assert_called_with("GET", "/subjects/{subject}/versions/{version}", response.status)
298+
299+
@patch("karapace.core.instrumentation.prometheus.time")
300+
async def test_http_request_metrics_middleware_normalizes_config_subject_path(
301+
self,
302+
mock_time: MagicMock,
303+
prometheus: PrometheusInstrumentation,
304+
) -> None:
305+
mock_time.time.return_value = 10
306+
307+
(
308+
app_metrics,
309+
in_progress_metric,
310+
in_progress_instance,
311+
duration_metric,
312+
duration_instance,
313+
total_metric,
314+
total_instance,
315+
) = _make_metric_mocks(prometheus)
316+
317+
request = DummyRequest(path="/config/journeys_v1-dbx", method="GET", app=app_metrics)
318+
319+
response = MagicMock(status=200)
320+
321+
async def handler(req):
322+
return response
323+
324+
await prometheus.http_request_metrics_middleware(request=request, handler=handler)
325+
326+
in_progress_metric.labels.assert_called_with("GET", "/config/{subject}")
327+
total_metric.labels.assert_called_with("GET", "/config/{subject}", response.status)

tests/unit/test_auth.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,18 @@
55

66
import re
77

8-
from karapace.core.auth import ACLAuthorizer, ACLEntry, HashAlgorithm, Operation, User, hash_password
8+
import pytest
9+
10+
from karapace.core.auth import (
11+
ACLAuthorizer,
12+
ACLEntry,
13+
AuthenticationError,
14+
HashAlgorithm,
15+
HTTPAuthorizer,
16+
Operation,
17+
User,
18+
hash_password,
19+
)
920

1021

1122
def test_empty_acl_authorizer() -> None:
@@ -223,3 +234,53 @@ def test_acl_authorizer() -> None:
223234
"Subject:readwrite_subject",
224235
],
225236
)
237+
238+
239+
def test_get_user_returns_none_for_nonexistent_user() -> None:
240+
"""get_user must return None (not raise) for unknown usernames.
241+
242+
Regression test: a previous implementation raised ValueError here,
243+
which bypassed the AuthenticationError handling in authenticate()
244+
and surfaced as an unhandled 500 to clients.
245+
"""
246+
admin_password_hash = hash_password(algorithm=HashAlgorithm.SHA256, salt="salt", plaintext_password="password")
247+
authorizer = ACLAuthorizer(
248+
user_db={
249+
"admin": User(username="admin", algorithm=HashAlgorithm.SHA256, salt="salt", password_hash=admin_password_hash),
250+
},
251+
)
252+
253+
assert authorizer.get_user("admin") is not None
254+
assert authorizer.get_user("nonexistent") is None
255+
256+
257+
def test_authenticate_raises_authentication_error_for_nonexistent_user() -> None:
258+
"""authenticate() must raise AuthenticationError -- not ValueError --
259+
when the user does not exist, so the caller can return a proper 401."""
260+
admin_password_hash = hash_password(algorithm=HashAlgorithm.SHA256, salt="salt", plaintext_password="password")
261+
authorizer = ACLAuthorizer(
262+
user_db={
263+
"admin": User(username="admin", algorithm=HashAlgorithm.SHA256, salt="salt", password_hash=admin_password_hash),
264+
},
265+
)
266+
http_authorizer = HTTPAuthorizer.__new__(HTTPAuthorizer)
267+
http_authorizer.user_db = authorizer.user_db
268+
http_authorizer.permissions = authorizer.permissions
269+
270+
with pytest.raises(AuthenticationError):
271+
http_authorizer.authenticate(username="nonexistent", password="any")
272+
273+
274+
def test_authenticate_raises_authentication_error_for_wrong_password() -> None:
275+
admin_password_hash = hash_password(algorithm=HashAlgorithm.SHA256, salt="salt", plaintext_password="password")
276+
authorizer = ACLAuthorizer(
277+
user_db={
278+
"admin": User(username="admin", algorithm=HashAlgorithm.SHA256, salt="salt", password_hash=admin_password_hash),
279+
},
280+
)
281+
http_authorizer = HTTPAuthorizer.__new__(HTTPAuthorizer)
282+
http_authorizer.user_db = authorizer.user_db
283+
http_authorizer.permissions = authorizer.permissions
284+
285+
with pytest.raises(AuthenticationError):
286+
http_authorizer.authenticate(username="admin", password="wrong")

0 commit comments

Comments
 (0)