Skip to content

Commit c119e37

Browse files
committed
feat(loader): Selectively parse AIRD fragments
This commit changes the MelodyLoader to selectively parse only the metadata section of AIRD files, and not keep the representation data in memory all the time. This dramatically reduces the runtime memory footprint and loading times for Capella models, especially when there are lots of representations in a given model.
1 parent 78aeb0b commit c119e37

3 files changed

Lines changed: 117 additions & 10 deletions

File tree

src/capellambse/loader/core.py

Lines changed: 113 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,12 @@ def __missing__(self, key: str) -> t.NoReturn:
182182

183183

184184
class ModelFile:
185-
"""Represents a single file in the model (i.e. a fragment)."""
185+
"""Represents a single file in the model (i.e. a fragment).
186+
187+
This class loads the entire XML tree into memory. This makes it
188+
unsuitable for large trees with only small interesting segments,
189+
like ``.aird`` files. See :class:`VisualFile` for an alternative.
190+
"""
186191

187192
__qtypecache: dict[etree.QName, dict[int, etree._Element]]
188193
__xtypecache: dict[str, dict[int, etree._Element]]
@@ -451,6 +456,99 @@ def unfollow_href(self, element_id: str) -> etree._Element:
451456
return self.__hrefsources[element_id]
452457

453458

459+
class VisualFile:
460+
"""Represents a visual (AIRD) fragment.
461+
462+
Visual fragments can rapidly grow very large, which makes it
463+
impractical to hold them in memory entirely all the time. This
464+
specialized class works similar to :class:`ModelFile`. However, it
465+
only keeps the central index in memory, and only loads and parses
466+
other data on request.
467+
"""
468+
469+
fragment_type: t.Final = FragmentType.VISUAL
470+
471+
def __init__(
472+
self,
473+
filename: pathlib.PurePosixPath,
474+
handler: filehandler.FileHandler,
475+
) -> None:
476+
self.filename = filename
477+
self.filehandler = handler
478+
if filename.suffix not in VISUAL_EXTS:
479+
raise ValueError(f"Bad filename for visual fragment: {filename}")
480+
481+
with handler.open(filename) as f:
482+
parser = etree.iterparse(f)
483+
for _, element in parser:
484+
parent = element.getparent()
485+
if parent is None or parent.getparent() is not None:
486+
continue
487+
488+
if element.tag == f"{{{_n.NAMESPACES['viewpoint']}}}DAnalysis":
489+
self.__analysis = element
490+
break
491+
parent.remove(element)
492+
else:
493+
raise RuntimeError(
494+
"Broken XML: No 'viewpoint:DAnalysis' element found"
495+
)
496+
parent = self.__analysis.getparent()
497+
assert parent is not None
498+
parent.remove(self.__analysis)
499+
500+
def __getitem__(self, key: str) -> etree._Element:
501+
# TODO Return a diagram root element if it's found in this fragment
502+
raise KeyError(key)
503+
504+
def referenced_files(self) -> cabc.Iterator[str]:
505+
for i in self.__analysis:
506+
if i.tag == "semanticResources" and i.text:
507+
yield i.text
508+
elif i.tag == "referencedAnalysis" and (href := i.get("href")):
509+
yield href.split("#", maxsplit=1)[0]
510+
511+
def enumerate_uuids(self) -> set[str]:
512+
"""Enumerate all UUIDs used in this fragment."""
513+
return set()
514+
515+
def idcache_index(self, subtree: etree._Element) -> None:
516+
"""Index the IDs of ``subtree``."""
517+
raise NotImplementedError("Cannot modify visual fragments")
518+
519+
def idcache_remove(self, source: str | etree._Element) -> None:
520+
"""Remove the ID or all IDs below the source from the ID cache."""
521+
raise NotImplementedError("Cannot modify visual fragments")
522+
523+
def idcache_rebuild(self) -> None:
524+
"""Invalidate and rebuild this file's ID cache."""
525+
# Nothing to do
526+
527+
def idcache_reserve(self, new_id: str) -> None:
528+
"""Reserve the given ID for an element to be inserted later."""
529+
raise NotImplementedError("Cannot modify visual fragments")
530+
531+
def iterall_xt(
532+
self, xtypes: cabc.Container[str]
533+
) -> cabc.Iterator[etree._Element]:
534+
"""Iterate over all elements in this tree by ``xsi:type``."""
535+
del xtypes
536+
yield from ()
537+
538+
def write_xml(
539+
self,
540+
filename: pathlib.PurePosixPath,
541+
encoding: str = "utf-8",
542+
) -> None:
543+
"""Do nothing."""
544+
del filename, encoding
545+
546+
# pylint: disable-next=useless-return
547+
def unfollow_href(self, element_id: str) -> etree._Element | None:
548+
del element_id
549+
return None
550+
551+
454552
class MelodyLoader:
455553
"""Facilitates extensive access to Polarsys / Capella projects."""
456554

@@ -534,7 +632,7 @@ def __init__(
534632
else:
535633
self.resources[resname] = reshdl
536634

537-
self.trees: dict[pathlib.PurePosixPath, ModelFile] = {}
635+
self.trees: dict[pathlib.PurePosixPath, ModelFile | VisualFile] = {}
538636
self.__load_referenced_files(
539637
pathlib.PurePosixPath("\0", self.entrypoint)
540638
)
@@ -587,11 +685,17 @@ def __load_referenced_files(
587685

588686
handler = self.resources[resource_path.parts[0]]
589687
filename = pathlib.PurePosixPath(*resource_path.parts[1:])
590-
frag = ModelFile(
591-
filename, handler, ignore_uuid_dups=self.__ignore_uuid_dups
592-
)
688+
frag: VisualFile | ModelFile
689+
if filename.suffix in VISUAL_EXTS:
690+
frag = VisualFile(filename, handler)
691+
refs = list(frag.referenced_files())
692+
else:
693+
frag = ModelFile(
694+
filename, handler, ignore_uuid_dups=self.__ignore_uuid_dups
695+
)
696+
refs = []
593697
self.trees[resource_path] = frag
594-
for ref in _find_refs(frag.root):
698+
for ref in refs:
595699
ref_name = helpers.normalize_pure_path(
596700
_unquote_ref(ref), base=resource_path.parent
597701
)
@@ -662,6 +766,7 @@ def update_namespaces(self) -> None:
662766
if fragment.fragment_type != FragmentType.SEMANTIC:
663767
continue
664768

769+
assert isinstance(fragment, ModelFile)
665770
LOGGER.debug("Updating namespaces on fragment %s", fname)
666771
fragment.update_namespaces(vp)
667772

@@ -938,7 +1043,7 @@ def iterall_xt(
9381043
"""
9391044
xtset = self._nonempty_hashset(xtypes)
9401045
if trees is None:
941-
files: cabc.Iterable[ModelFile] = self.trees.values()
1046+
files: cabc.Iterable[ModelFile | VisualFile] = self.trees.values()
9421047
else:
9431048
files = (v for k, v in self.trees.items() if k in trees)
9441049
return itertools.chain.from_iterable(
@@ -1273,7 +1378,7 @@ def follow_links(
12731378

12741379
def _find_fragment(
12751380
self, element: etree._Element
1276-
) -> tuple[pathlib.PurePosixPath, ModelFile]:
1381+
) -> tuple[pathlib.PurePosixPath, ModelFile | VisualFile]:
12771382
root = collections.deque(
12781383
itertools.chain([element], element.iterancestors()), 1
12791384
)[0]

src/capellambse/model/_model.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,8 @@ class as the superclass of every concrete model element
416416
trees = [
417417
t
418418
for t in self._loader.trees.values()
419-
if t.fragment_type is loader.FragmentType.SEMANTIC
419+
if isinstance(t, loader.ModelFile)
420+
and t.fragment_type is loader.FragmentType.SEMANTIC
420421
]
421422
matches: cabc.Iterable[etree._Element]
422423
if not classes:

src/capellambse/model/_obj.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
from lxml import etree
4949

5050
import capellambse
51-
from capellambse import helpers
51+
from capellambse import helpers, loader
5252

5353
from . import VIRTUAL_NAMESPACE_PREFIX, T, U, _descriptors, _pods, _styleclass
5454

@@ -736,6 +736,7 @@ def __init__(
736736
ns = self.__capella_namespace__
737737
qtype = model.qualify_classname((ns, type(self).__name__))
738738
assert qtype.namespace is not None
739+
assert isinstance(fragment, loader.ModelFile)
739740
fragment.add_namespace(qtype.namespace, ns.alias)
740741
self._element.set(helpers.ATT_XT, qtype)
741742
for key, val in kw.items():

0 commit comments

Comments
 (0)