Skip to content

Commit 9112131

Browse files
authored
Merge pull request #150 from vcon-dev/pavankumar/con-365-investigate-broken-vcon-sets-that-were-incorrectly-accepted
fix(CON-365): reject malformed vCons at POST /api/vcon
2 parents f663f91 + a7a6c30 commit 9112131

7 files changed

Lines changed: 241 additions & 20 deletions

File tree

api/api.py

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"""
2222

2323
import os
24+
import re
2425
import traceback
2526
from typing import Dict, List, Optional
2627
from uuid import UUID
@@ -38,7 +39,7 @@
3839
PostgresqlExtDatabase,
3940
UUIDField,
4041
)
41-
from pydantic import BaseModel, ConfigDict
42+
from pydantic import BaseModel, ConfigDict, field_validator, model_validator
4243
from starlette.status import HTTP_403_FORBIDDEN
4344

4445
from config import Configuration
@@ -278,6 +279,79 @@ async def get_queue_depth(
278279
raise HTTPException(status_code=500, detail="Failed to get queue depth")
279280

280281

282+
# --- vCon field validation constants & helpers ---
283+
284+
_VALID_ALG = frozenset({
285+
"SHA-256", "SHA-384", "SHA-512",
286+
"HS256", "HS384", "HS512",
287+
"RS256", "RS384", "RS512",
288+
"ES256", "ES384", "ES512",
289+
"PS256", "PS384", "PS512",
290+
})
291+
_MIME_RE = re.compile(
292+
r'^[a-zA-Z0-9][a-zA-Z0-9!\#$&\-^_]*/[a-zA-Z0-9][a-zA-Z0-9!\#$&\-^_.+]*$'
293+
)
294+
_URL_RE = re.compile(r'^[a-zA-Z][a-zA-Z0-9+\-.]*://.+')
295+
_TEL_RE = re.compile(r'^[+\d(][\d\s\-().+xX#*]{4,}$')
296+
297+
298+
class DialogEntry(BaseModel):
299+
"""A single dialog entry within a vCon."""
300+
model_config = ConfigDict(extra='allow')
301+
302+
duration: Optional[float] = None
303+
start: Optional[str] = None
304+
parties: Optional[List[int]] = None
305+
url: Optional[str] = None
306+
mimetype: Optional[str] = None
307+
alg: Optional[str] = None
308+
309+
@field_validator("duration")
310+
@classmethod
311+
def duration_non_negative(cls, v):
312+
if v is not None and v < 0:
313+
raise ValueError("duration must be >= 0")
314+
return v
315+
316+
@field_validator("url")
317+
@classmethod
318+
def url_valid(cls, v):
319+
if v is not None and not _URL_RE.match(v):
320+
raise ValueError(f"url does not look like a valid URL: {v!r}")
321+
return v
322+
323+
@field_validator("mimetype")
324+
@classmethod
325+
def mimetype_valid(cls, v):
326+
if v is not None and not _MIME_RE.match(v):
327+
raise ValueError(f"mimetype has invalid format: {v!r}")
328+
return v
329+
330+
@field_validator("alg")
331+
@classmethod
332+
def alg_known(cls, v):
333+
if v is not None and v not in _VALID_ALG:
334+
raise ValueError(f"alg {v!r} is not a recognised algorithm")
335+
return v
336+
337+
338+
class PartyEntry(BaseModel):
339+
"""A single party entry within a vCon."""
340+
model_config = ConfigDict(extra='allow')
341+
342+
tel: Optional[str] = None
343+
344+
@field_validator("tel")
345+
@classmethod
346+
def tel_valid(cls, v):
347+
if v is not None and not _TEL_RE.match(v):
348+
raise ValueError(f"tel has invalid format: {v!r}")
349+
return v
350+
351+
352+
# --- end vCon field validation ---
353+
354+
281355
class Vcon(BaseModel):
282356
"""Pydantic model representing a vCon (Voice Conversation) record.
283357
@@ -302,11 +376,25 @@ class Vcon(BaseModel):
302376
redacted: dict = {}
303377
appended: Optional[dict] = None
304378
group: List[Dict] = []
305-
parties: List[Dict] = []
306-
dialog: List[Dict] = []
379+
parties: List[PartyEntry] = []
380+
dialog: List[DialogEntry] = []
307381
analysis: List[Dict] = []
308382
attachments: List[Dict] = []
309383

384+
@model_validator(mode='after')
385+
def check_party_refs(self) -> 'Vcon':
386+
"""Ensure every dialog.parties index references an existing party."""
387+
n = len(self.parties)
388+
for i, d in enumerate(self.dialog):
389+
if d.parties:
390+
for ref in d.parties:
391+
if ref < 0 or ref >= n:
392+
raise ValueError(
393+
f"dialog[{i}].parties contains index {ref} "
394+
f"which is out of range (parties has {n} entries)"
395+
)
396+
return self
397+
310398

311399
if VCON_STORAGE:
312400
class VConPeeWee(Model):
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"uuid": "22222222-2222-2222-2222-222222222222",
3+
"vcon": "0.0.1",
4+
"created_at": "2024-01-01T00:00:00",
5+
"subject": null,
6+
"redacted": {},
7+
"appended": null,
8+
"group": [],
9+
"parties": [
10+
{"name": "Agent", "tel": "+15550001111"},
11+
{"name": "Customer", "tel": "+15550002222"}
12+
],
13+
"dialog": [
14+
{
15+
"type": "recording",
16+
"start": "2024-01-01T00:00:00",
17+
"duration": -30,
18+
"parties": [0, 1],
19+
"mimetype": "audio/x-wav",
20+
"url": "s3://bucket/recording.wav",
21+
"alg": "SHA-512",
22+
"signature": "abc123"
23+
}
24+
],
25+
"analysis": [],
26+
"attachments": []
27+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"uuid": "44444444-4444-4444-4444-444444444444",
3+
"vcon": "0.0.1",
4+
"created_at": "2024-01-01T00:00:00",
5+
"subject": null,
6+
"redacted": {},
7+
"appended": null,
8+
"group": [],
9+
"parties": [
10+
{"name": "Agent", "tel": "+15550001111"},
11+
{"name": "Customer", "tel": "+15550002222"}
12+
],
13+
"dialog": [
14+
{
15+
"type": "recording",
16+
"start": "2024-01-01T00:00:00",
17+
"duration": 120,
18+
"parties": [0, 1],
19+
"mimetype": "not a valid mimetype!",
20+
"url": "s3://bucket/recording.wav",
21+
"alg": "SHA-512",
22+
"signature": "abc123"
23+
}
24+
],
25+
"analysis": [],
26+
"attachments": []
27+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"uuid": "11111111-1111-1111-1111-111111111111",
3+
"vcon": "0.0.1",
4+
"created_at": "2024-01-01T00:00:00",
5+
"subject": null,
6+
"redacted": {},
7+
"appended": null,
8+
"group": [],
9+
"parties": [
10+
{"name": "Agent", "tel": "+15550001111"},
11+
{"name": "Customer", "tel": "+15550002222"}
12+
],
13+
"dialog": [
14+
{
15+
"type": "recording",
16+
"start": "2024-01-01T00:00:00",
17+
"duration": 120,
18+
"parties": [0, 99],
19+
"mimetype": "audio/x-wav",
20+
"url": "s3://bucket/recording.wav",
21+
"alg": "SHA-512",
22+
"signature": "abc123"
23+
}
24+
],
25+
"analysis": [],
26+
"attachments": []
27+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"uuid": "33333333-3333-3333-3333-333333333333",
3+
"vcon": "0.0.1",
4+
"created_at": "2024-01-01T00:00:00",
5+
"subject": null,
6+
"redacted": {},
7+
"appended": null,
8+
"group": [],
9+
"parties": [
10+
{"name": "Agent", "tel": "+15550001111"},
11+
{"name": "Customer", "tel": "+15550002222"}
12+
],
13+
"dialog": [
14+
{
15+
"type": "recording",
16+
"start": "2024-01-01T00:00:00",
17+
"duration": 120,
18+
"parties": [0, 1],
19+
"mimetype": "audio/x-wav",
20+
"url": "not-a-url",
21+
"alg": "SHA-512",
22+
"signature": "abc123"
23+
}
24+
],
25+
"analysis": [],
26+
"attachments": []
27+
}

common/tests/test_api.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import json
2+
import os
3+
14
from fastapi.testclient import TestClient
25
from vcon_fixture import generate_mock_vcon
36
import pytest
@@ -87,6 +90,26 @@ def test_create_vcon_with_extra_attribute():
8790
assert response.json()["meta"] == {"foo": "bar"}
8891

8992

93+
_INVALID_FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "invalid_fixtures")
94+
95+
96+
@pytest.mark.parametrize("filename", [
97+
"bad_party_ref.json",
98+
"bad_duration.json",
99+
"bad_url.json",
100+
"bad_mimetype.json",
101+
])
102+
def test_invalid_vcon_rejected(filename):
103+
"""Malformed vCons must be rejected with 422, not silently accepted."""
104+
with open(os.path.join(_INVALID_FIXTURES_DIR, filename)) as f:
105+
broken_vcon = json.load(f)
106+
with TestClient(app=api.app, headers={CONSERVER_HEADER_NAME: CONSERVER_API_TOKEN}) as client:
107+
response = client.post("/vcon", json=broken_vcon)
108+
assert response.status_code == 422, (
109+
f"{filename} was unexpectedly accepted: status={response.status_code}, body={response.json()}"
110+
)
111+
112+
90113
@pytest.mark.anyio
91114
def test_post_vcon_with_ingress_list():
92115
# Generate a mock vCon

common/tests/vcon_fixture.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,25 @@ def generate_mock_vcon():
2121
"attachments": [],
2222
}
2323

24+
# Generate parties first so dialog can reference valid indices.
25+
# Minimum 2 parties so the hardcoded [0, 1] party references are always valid.
26+
num_parties = random.randint(2, 5)
27+
for i in range(num_parties):
28+
vcon["parties"].append(
29+
{
30+
"tel": fake.phone_number(),
31+
"meta": {"role": random.choice(["agent", "customer"])},
32+
"name": fake.name(),
33+
"stir": None,
34+
"jcard": None,
35+
"gmlpos": None,
36+
"mailto": fake.email(),
37+
"timezone": None,
38+
"validation": None,
39+
"civicaddress": None,
40+
}
41+
)
42+
2443
num_dialogs = random.randint(1, 5)
2544
for i in range(num_dialogs):
2645
vcon["dialog"].append(
@@ -45,23 +64,6 @@ def generate_mock_vcon():
4564
}
4665
)
4766

48-
num_parties = random.randint(1, 5)
49-
for i in range(num_parties):
50-
vcon["parties"].append(
51-
{
52-
"tel": fake.phone_number(),
53-
"meta": {"role": random.choice(["agent", "customer"])},
54-
"name": fake.name(),
55-
"stir": None,
56-
"jcard": None,
57-
"gmlpos": None,
58-
"mailto": fake.email(),
59-
"timezone": None,
60-
"validation": None,
61-
"civicaddress": None,
62-
}
63-
)
64-
6567
num_analysis = random.randint(1, 5)
6668
for i in range(num_analysis):
6769
vcon["analysis"].append(

0 commit comments

Comments
 (0)