Skip to content

Commit cfa5d68

Browse files
committed
feat(parser): add plugin-based parser strategies with ruamel-yaml support
Add comprehensive plugin architecture for OpenAPI parsing with multiple strategy support: - Add BaseParserStrategy interface for extensible parsing strategies - Implement DefaultOpenAPIParser, RuamelOpenAPIParser, and RuamelRoundTripOpenAPIParser strategies - Add plugin discovery system using importlib.metadata entry points - Support multiple parser strategies with last-successful-wins semantics - Add ruamel-yaml dependency for enhanced YAML parsing capabilities - Enhance OpenAPIParser with overloaded parse method supporting custom return types - Export load_uri function for external URI loading functionality - Add comprehensive URI resolution with HTTP/HTTPS, file://, and local path support - Implement proper error handling with UriResolutionError for resolution failures The parser now supports strategy-based parsing allowing users to choose between different parsing backends and enabling third-party extensions through the plugin system. This provides better flexibility for handling different YAML/JSON parsing requirements while maintaining backward compatibility. BREAKING CHANGE: OpenAPIParser constructor now accepts strategies parameter, changing the default initialization behavior. The parse method signature has been enhanced with optional return_type and strict parameters.
1 parent 545ab13 commit cfa5d68

File tree

12 files changed

+389
-51
lines changed

12 files changed

+389
-51
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
run: uv sync
2222
- name: Ruff + Pyright
2323
run: |
24-
uv run ruff --output-format=github .
24+
uv run ruff check --output-format=github .
2525
uv run ruff format --check .
2626
uv run pyright
2727

packages/jentic-openapi-parser/pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ readme = "README.md"
66
authors = [{ name = "Jentic", email = "hello@jentic.com" }]
77
license = { text = "Apache-2.0" }
88
requires-python = ">=3.11"
9-
dependencies = []
9+
dependencies = [
10+
"ruamel-yaml"
11+
]
1012

1113
[project.urls]
1214
Homepage = "https://github.com/jentic/jentic-openapi-tools"
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .openapi_parser import OpenAPIParser
2-
from .uri import is_uri_like, resolve_to_absolute, UriResolutionError
2+
from .uri import is_uri_like, load_uri, resolve_to_absolute, UriResolutionError
33

4-
__all__ = ["OpenAPIParser", "is_uri_like", "resolve_to_absolute", "UriResolutionError"]
4+
__all__ = ["OpenAPIParser", "is_uri_like", "load_uri", "resolve_to_absolute", "UriResolutionError"]
Lines changed: 129 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,136 @@
1-
from typing import Any, Mapping, Optional
2-
import requests
3-
import yaml
4-
import json
1+
import importlib.metadata
2+
from typing import Any, TypeVar, cast, Optional, overload, Mapping, Sequence
53

6-
from .uri import is_uri_like, resolve_to_absolute
4+
from .uri import is_uri_like, load_uri
5+
from .strategies.base import BaseParserStrategy
6+
from .strategies.default_strategy import DefaultOpenAPIParser
7+
from .strategies.ruamel_strategy import RuamelOpenAPIParser
8+
from .strategies.ruamel_roundtrip_strategy import RuamelRoundTripOpenAPIParser
9+
10+
T = TypeVar("T")
711

812

913
class OpenAPIParser:
10-
def is_uri_like(self, s: Optional[str]) -> bool:
14+
"""
15+
Provides a parser for OpenAPI specifications using customizable strategies.
16+
17+
This class is designed to facilitate the parsing of OpenAPI documents.
18+
It supports multiple strategies and can be extended through plugins.
19+
When multiple strategies are used, the returned value is the result from the last strategy of the list that succeeds.
20+
This mechanism is designed to allow passing multiple strategies to the parser and concur to the validation process
21+
by reporting the errors and success status from all strategies.
22+
23+
Attributes:
24+
strategies (list[BaseParserStrategy]): List of strategies used by the parser,
25+
each implementing the BaseParserStrategy interface.
26+
"""
27+
28+
def __init__(self, strategies: list | None = None):
29+
# If no strategies specified, use default
30+
if not strategies:
31+
strategies = ["default"]
32+
self.strategies = [] # list of BaseParserStrategy instances
33+
34+
# Discover entry points for parser plugins
35+
# (This could be a one-time load stored at class level to avoid doing it every time)
36+
eps = importlib.metadata.entry_points(group="jentic.openapi_parser_strategies")
37+
plugin_map = {ep.name: ep for ep in eps}
38+
39+
for strat in strategies:
40+
if isinstance(strat, str):
41+
name = strat
42+
if name == "default":
43+
# Use built-in default parser
44+
self.strategies.append(DefaultOpenAPIParser())
45+
elif name == "ruamel":
46+
# Use built-in ruamel parser
47+
self.strategies.append(RuamelOpenAPIParser())
48+
elif name == "ruamel-rt":
49+
# Use built-in ruamel roundtrip parser
50+
self.strategies.append(RuamelRoundTripOpenAPIParser())
51+
elif name in plugin_map:
52+
plugin_class = plugin_map[name].load() # loads the class
53+
self.strategies.append(plugin_class())
54+
else:
55+
raise ValueError(f"No parser plugin named '{name}' found")
56+
elif isinstance(strat, BaseParserStrategy):
57+
self.strategies.append(strat)
58+
elif hasattr(strat, "__call__") and issubclass(strat, BaseParserStrategy):
59+
# if a class (not instance) is passed
60+
self.strategies.append(strat())
61+
else:
62+
raise TypeError("Invalid strategy type: must be name or strategy class/instance")
63+
64+
@overload
65+
def parse(self, source: str) -> dict[str, Any]: ...
66+
67+
@overload
68+
def parse(self, source: str, *, return_type: type[T], strict: bool = False) -> T: ...
69+
70+
def parse(
71+
self, source: str, *, return_type: type[T] | None = None, strict: bool = False
72+
) -> Any:
73+
raw = self._parse(source)
74+
75+
if return_type is None:
76+
return self._to_plain(raw)
77+
78+
if strict:
79+
if not isinstance(raw, return_type):
80+
raise TypeError(
81+
f"Expected {getattr(return_type, '__name__', return_type)}, "
82+
f"got {type(raw).__name__}"
83+
)
84+
return cast(T, raw)
85+
86+
def _parse(self, source: str) -> Any:
87+
text = source
88+
is_uri = is_uri_like(source)
89+
result = None
90+
if is_uri and self.has_non_uri_strategy():
91+
text = self.load_uri(source)
92+
for strat in self.strategies:
93+
document = None
94+
if is_uri and "uri" in strat.accepts():
95+
document = source
96+
elif is_uri and "text" in strat.accepts():
97+
document = text
98+
elif not is_uri and "text" in strat.accepts():
99+
document = text
100+
101+
if document is not None:
102+
try:
103+
result = strat.parse(document)
104+
except Exception as e:
105+
# TODO add to parser/validation chain result
106+
print(f"Error parsing document: {e}")
107+
if result is None:
108+
raise ValueError("No valid document found")
109+
return result
110+
111+
def has_non_uri_strategy(self) -> bool:
112+
"""Check if any strategy accepts 'text' but not 'uri'."""
113+
for strat in self.strategies:
114+
accepted = strat.accepts()
115+
if "text" in accepted and "uri" not in accepted:
116+
return True
117+
return False
118+
119+
def _to_plain(self, obj: Any) -> Any:
120+
# Mapping?
121+
if isinstance(obj, Mapping):
122+
return {k: self._to_plain(v) for k, v in obj.items()}
123+
124+
# Sequence but NOT str/bytes
125+
if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)):
126+
return [self._to_plain(x) for x in obj]
127+
128+
# Scalar
129+
return obj
130+
131+
@staticmethod
132+
def is_uri_like(s: Optional[str]) -> bool:
11133
return is_uri_like(s)
12134

13135
def load_uri(self, uri: str) -> str:
14-
resolved_uri = resolve_to_absolute(uri)
15-
16-
if resolved_uri.startswith("http://") or uri.startswith("https://"):
17-
content = requests.get(resolved_uri).text
18-
elif resolved_uri.startswith("file://"):
19-
with open(resolved_uri, "r", encoding="utf-8") as f:
20-
content = f.read()
21-
else:
22-
# Treat as local file path
23-
with open(resolved_uri, "r", encoding="utf-8") as f:
24-
content = f.read()
25-
return content
26-
27-
def parse(self, source: str) -> dict[str, Any]:
28-
text = source
29-
try:
30-
if is_uri_like(source):
31-
text = self.load_uri(source)
32-
33-
data = self.parse_text(text)
34-
except Exception:
35-
msg = f"Unsupported document type: {type(source)!r}"
36-
raise TypeError(msg)
37-
return data
38-
39-
def parse_uri(self, uri: str) -> dict[str, Any]:
40-
return self.parse_text(self.load_uri(uri))
41-
42-
def parse_text(self, text: str) -> dict[str, Any]:
43-
try:
44-
data = yaml.safe_load(text)
45-
except Exception:
46-
if isinstance(text, (bytes, str)):
47-
text = text.decode() if isinstance(text, bytes) else text
48-
data = json.loads(text)
49-
if isinstance(data, Mapping):
50-
return dict(data)
51-
msg = f"Unsupported document type: {type(text)!r}"
52-
raise TypeError(msg)
136+
return load_uri(uri)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Any
3+
4+
5+
class BaseParserStrategy(ABC):
6+
"""Interface that all Parser plugins must implement."""
7+
8+
@abstractmethod
9+
def parse(self, source: str) -> Any:
10+
"""Parses an OpenAPI document given by URI or file path or text.
11+
Returns a dict."""
12+
pass
13+
14+
@abstractmethod
15+
def accepts(self) -> list[str]:
16+
pass
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from typing import Any, Mapping
2+
import yaml
3+
import json
4+
from .base import BaseParserStrategy
5+
from ..uri import is_uri_like, load_uri
6+
7+
8+
class DefaultOpenAPIParser(BaseParserStrategy):
9+
def parse(self, source: str) -> Any:
10+
text = source
11+
try:
12+
if is_uri_like(source):
13+
text = load_uri(source)
14+
15+
data = self.parse_text(text)
16+
except Exception:
17+
msg = f"Unsupported document type: {type(source)!r}"
18+
raise TypeError(msg)
19+
return data
20+
21+
def parse_uri(self, uri: str) -> Any:
22+
return self.parse_text(load_uri(uri))
23+
24+
def parse_text(self, text: str) -> Any:
25+
try:
26+
data = yaml.safe_load(text)
27+
except Exception:
28+
if isinstance(text, (bytes, str)):
29+
text = text.decode() if isinstance(text, bytes) else text
30+
data = json.loads(text)
31+
if isinstance(data, Mapping):
32+
return dict(data)
33+
msg = f"Unsupported document type: {type(text)!r}"
34+
raise TypeError(msg)
35+
36+
def accepts(self) -> list[str]:
37+
return ["uri", "text"]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from .ruamel_strategy import RuamelOpenAPIParser
2+
3+
4+
class RuamelRoundTripOpenAPIParser(RuamelOpenAPIParser):
5+
def __init__(self, pure: bool = True):
6+
super().__init__(typ="rt", pure=pure)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from typing import Any, Mapping
2+
from ruamel.yaml import YAML
3+
import json
4+
from .base import BaseParserStrategy
5+
from ..uri import is_uri_like, load_uri
6+
7+
8+
class RuamelOpenAPIParser(BaseParserStrategy):
9+
def __init__(self, typ: str = "safe", pure: bool = True):
10+
self.yaml = YAML(typ=typ, pure=pure)
11+
self.yaml.default_flow_style = False
12+
13+
def parse(self, source: str) -> Any:
14+
text = source
15+
try:
16+
if is_uri_like(source):
17+
text = load_uri(source)
18+
19+
data = self.parse_text(text)
20+
except Exception:
21+
msg = f"Unsupported document type: {type(source)!r}"
22+
raise TypeError(msg)
23+
return data
24+
25+
def parse_uri(self, uri: str) -> Any:
26+
return self.parse_text(load_uri(uri))
27+
28+
def parse_text(self, text: str) -> Any:
29+
try:
30+
data = self.yaml.load(text)
31+
except Exception:
32+
if isinstance(text, (bytes, str)):
33+
text = text.decode() if isinstance(text, bytes) else text
34+
data = json.loads(text)
35+
if isinstance(data, Mapping):
36+
return data
37+
msg = f"Unsupported document type: {type(text)!r}"
38+
raise TypeError(msg)
39+
40+
def accepts(self) -> list[str]:
41+
return ["uri", "text"]

packages/jentic-openapi-parser/src/jentic_openapi_parser/uri.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import os
88
import re
99
import urllib.request
10+
import requests
1011

1112
_WINDOWS_DRIVE_RE = re.compile(r"^[A-Za-z]:[\\/]")
1213
_WINDOWS_UNC_RE = re.compile(r"^(?:\\\\|//)[^\\/]+[\\/][^\\/]+")
@@ -180,3 +181,18 @@ def _resolve_path_like(value: str, base_uri: Optional[str]) -> str:
180181

181182
p = Path(value)
182183
return str(p.resolve() if p.is_absolute() else (base_path / p).resolve())
184+
185+
186+
def load_uri(uri: str) -> str:
187+
resolved_uri = resolve_to_absolute(uri)
188+
189+
if resolved_uri.startswith("http://") or uri.startswith("https://"):
190+
content = requests.get(resolved_uri).text
191+
elif resolved_uri.startswith("file://"):
192+
with open(resolved_uri, "r", encoding="utf-8") as f:
193+
content = f.read()
194+
else:
195+
# Treat as local file path
196+
with open(resolved_uri, "r", encoding="utf-8") as f:
197+
content = f.read()
198+
return content

0 commit comments

Comments
 (0)