Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 91 additions & 3 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"""

import os
import re
import traceback
from typing import Dict, List, Optional
from uuid import UUID
Expand All @@ -38,7 +39,7 @@
PostgresqlExtDatabase,
UUIDField,
)
from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, field_validator, model_validator
from starlette.status import HTTP_403_FORBIDDEN

from config import Configuration
Expand Down Expand Up @@ -278,6 +279,79 @@ async def get_queue_depth(
raise HTTPException(status_code=500, detail="Failed to get queue depth")


# --- vCon field validation constants & helpers ---

_VALID_ALG = frozenset({
"SHA-256", "SHA-384", "SHA-512",
"HS256", "HS384", "HS512",
"RS256", "RS384", "RS512",
"ES256", "ES384", "ES512",
"PS256", "PS384", "PS512",
})
_MIME_RE = re.compile(
r'^[a-zA-Z0-9][a-zA-Z0-9!\#$&\-^_]*/[a-zA-Z0-9][a-zA-Z0-9!\#$&\-^_.+]*$'
)
_URL_RE = re.compile(r'^[a-zA-Z][a-zA-Z0-9+\-.]*://.+')
_TEL_RE = re.compile(r'^[+\d(][\d\s\-().+xX#*]{4,}$')


class DialogEntry(BaseModel):
"""A single dialog entry within a vCon."""
model_config = ConfigDict(extra='allow')

duration: Optional[float] = None
start: Optional[str] = None
parties: Optional[List[int]] = None
url: Optional[str] = None
mimetype: Optional[str] = None
alg: Optional[str] = None

@field_validator("duration")
@classmethod
def duration_non_negative(cls, v):
if v is not None and v < 0:
raise ValueError("duration must be >= 0")
return v

@field_validator("url")
@classmethod
def url_valid(cls, v):
if v is not None and not _URL_RE.match(v):
raise ValueError(f"url does not look like a valid URL: {v!r}")
return v

@field_validator("mimetype")
@classmethod
def mimetype_valid(cls, v):
if v is not None and not _MIME_RE.match(v):
raise ValueError(f"mimetype has invalid format: {v!r}")
return v

@field_validator("alg")
@classmethod
def alg_known(cls, v):
if v is not None and v not in _VALID_ALG:
raise ValueError(f"alg {v!r} is not a recognised algorithm")
return v


class PartyEntry(BaseModel):
"""A single party entry within a vCon."""
model_config = ConfigDict(extra='allow')

tel: Optional[str] = None

@field_validator("tel")
@classmethod
def tel_valid(cls, v):
if v is not None and not _TEL_RE.match(v):
raise ValueError(f"tel has invalid format: {v!r}")
return v


# --- end vCon field validation ---


class Vcon(BaseModel):
"""Pydantic model representing a vCon (Voice Conversation) record.

Expand All @@ -302,11 +376,25 @@ class Vcon(BaseModel):
redacted: dict = {}
appended: Optional[dict] = None
group: List[Dict] = []
parties: List[Dict] = []
dialog: List[Dict] = []
parties: List[PartyEntry] = []
dialog: List[DialogEntry] = []
analysis: List[Dict] = []
attachments: List[Dict] = []

@model_validator(mode='after')
def check_party_refs(self) -> 'Vcon':
"""Ensure every dialog.parties index references an existing party."""
n = len(self.parties)
for i, d in enumerate(self.dialog):
if d.parties:
for ref in d.parties:
if ref < 0 or ref >= n:
raise ValueError(
f"dialog[{i}].parties contains index {ref} "
f"which is out of range (parties has {n} entries)"
)
return self


if VCON_STORAGE:
class VConPeeWee(Model):
Expand Down
27 changes: 27 additions & 0 deletions common/tests/invalid_fixtures/bad_duration.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"uuid": "22222222-2222-2222-2222-222222222222",
"vcon": "0.0.1",
"created_at": "2024-01-01T00:00:00",
"subject": null,
"redacted": {},
"appended": null,
"group": [],
"parties": [
{"name": "Agent", "tel": "+15550001111"},
{"name": "Customer", "tel": "+15550002222"}
],
"dialog": [
{
"type": "recording",
"start": "2024-01-01T00:00:00",
"duration": -30,
"parties": [0, 1],
"mimetype": "audio/x-wav",
"url": "s3://bucket/recording.wav",
"alg": "SHA-512",
"signature": "abc123"
}
],
"analysis": [],
"attachments": []
}
27 changes: 27 additions & 0 deletions common/tests/invalid_fixtures/bad_mimetype.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"uuid": "44444444-4444-4444-4444-444444444444",
"vcon": "0.0.1",
"created_at": "2024-01-01T00:00:00",
"subject": null,
"redacted": {},
"appended": null,
"group": [],
"parties": [
{"name": "Agent", "tel": "+15550001111"},
{"name": "Customer", "tel": "+15550002222"}
],
"dialog": [
{
"type": "recording",
"start": "2024-01-01T00:00:00",
"duration": 120,
"parties": [0, 1],
"mimetype": "not a valid mimetype!",
"url": "s3://bucket/recording.wav",
"alg": "SHA-512",
"signature": "abc123"
}
],
"analysis": [],
"attachments": []
}
27 changes: 27 additions & 0 deletions common/tests/invalid_fixtures/bad_party_ref.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"uuid": "11111111-1111-1111-1111-111111111111",
"vcon": "0.0.1",
"created_at": "2024-01-01T00:00:00",
"subject": null,
"redacted": {},
"appended": null,
"group": [],
"parties": [
{"name": "Agent", "tel": "+15550001111"},
{"name": "Customer", "tel": "+15550002222"}
],
"dialog": [
{
"type": "recording",
"start": "2024-01-01T00:00:00",
"duration": 120,
"parties": [0, 99],
"mimetype": "audio/x-wav",
"url": "s3://bucket/recording.wav",
"alg": "SHA-512",
"signature": "abc123"
}
],
"analysis": [],
"attachments": []
}
27 changes: 27 additions & 0 deletions common/tests/invalid_fixtures/bad_url.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"uuid": "33333333-3333-3333-3333-333333333333",
"vcon": "0.0.1",
"created_at": "2024-01-01T00:00:00",
"subject": null,
"redacted": {},
"appended": null,
"group": [],
"parties": [
{"name": "Agent", "tel": "+15550001111"},
{"name": "Customer", "tel": "+15550002222"}
],
"dialog": [
{
"type": "recording",
"start": "2024-01-01T00:00:00",
"duration": 120,
"parties": [0, 1],
"mimetype": "audio/x-wav",
"url": "not-a-url",
"alg": "SHA-512",
"signature": "abc123"
}
],
"analysis": [],
"attachments": []
}
23 changes: 23 additions & 0 deletions common/tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import json
import os

from fastapi.testclient import TestClient
from vcon_fixture import generate_mock_vcon
import pytest
Expand Down Expand Up @@ -87,6 +90,26 @@ def test_create_vcon_with_extra_attribute():
assert response.json()["meta"] == {"foo": "bar"}


_INVALID_FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "invalid_fixtures")


@pytest.mark.parametrize("filename", [
"bad_party_ref.json",
"bad_duration.json",
"bad_url.json",
"bad_mimetype.json",
])
def test_invalid_vcon_rejected(filename):
"""Malformed vCons must be rejected with 422, not silently accepted."""
with open(os.path.join(_INVALID_FIXTURES_DIR, filename)) as f:
broken_vcon = json.load(f)
with TestClient(app=api.app, headers={CONSERVER_HEADER_NAME: CONSERVER_API_TOKEN}) as client:
response = client.post("/vcon", json=broken_vcon)
assert response.status_code == 422, (
f"{filename} was unexpectedly accepted: status={response.status_code}, body={response.json()}"
)


@pytest.mark.anyio
def test_post_vcon_with_ingress_list():
# Generate a mock vCon
Expand Down
36 changes: 19 additions & 17 deletions common/tests/vcon_fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,25 @@ def generate_mock_vcon():
"attachments": [],
}

# Generate parties first so dialog can reference valid indices.
# Minimum 2 parties so the hardcoded [0, 1] party references are always valid.
num_parties = random.randint(2, 5)
for i in range(num_parties):
vcon["parties"].append(
{
"tel": fake.phone_number(),
"meta": {"role": random.choice(["agent", "customer"])},
"name": fake.name(),
"stir": None,
"jcard": None,
"gmlpos": None,
"mailto": fake.email(),
"timezone": None,
"validation": None,
"civicaddress": None,
}
)

num_dialogs = random.randint(1, 5)
for i in range(num_dialogs):
vcon["dialog"].append(
Expand All @@ -45,23 +64,6 @@ def generate_mock_vcon():
}
)

num_parties = random.randint(1, 5)
for i in range(num_parties):
vcon["parties"].append(
{
"tel": fake.phone_number(),
"meta": {"role": random.choice(["agent", "customer"])},
"name": fake.name(),
"stir": None,
"jcard": None,
"gmlpos": None,
"mailto": fake.email(),
"timezone": None,
"validation": None,
"civicaddress": None,
}
)

num_analysis = random.randint(1, 5)
for i in range(num_analysis):
vcon["analysis"].append(
Expand Down
Loading