Skip to content

Commit 7cc012d

Browse files
committed
Check Assemblies for unrepresented asym IDs
Make sure that each Assembly used by a Model only includes asym IDs that are represented by Atoms or Spheres in at least one Model. Closes #165.
1 parent 66fe7ab commit 7cc012d

File tree

2 files changed

+71
-0
lines changed

2 files changed

+71
-0
lines changed

ihm/dumper.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,6 +1797,46 @@ def _check_representation(self, obj, asym, type_check, seq_id_range):
17971797
for x in self.repr_asym_ids[asym._id])))
17981798

17991799

1800+
class _AssemblyChecker:
1801+
"""Check that all Assembly asyms are in a Model"""
1802+
def __init__(self):
1803+
# Map from Assembly id to set of Asym ids
1804+
self._asmb_asyms = {}
1805+
1806+
# Map from Assembly id to set of all represented Asym ids (in models)
1807+
self._asmb_model_asyms = {}
1808+
1809+
def add_model_asyms(self, model, seen_asym_ids):
1810+
"""Add a set of asym IDs seen in atoms or spheres in the model"""
1811+
asmb = model.assembly
1812+
# If this is the first time we've seen this assembly, get its
1813+
# declared set of asym IDs
1814+
if asmb._id not in self._asmb_asyms:
1815+
asyms = frozenset(x._id for x in asmb if hasattr(x, 'entity'))
1816+
self._asmb_asyms[asmb._id] = asyms
1817+
# Add asym IDs from model
1818+
if asmb._id not in self._asmb_model_asyms:
1819+
self._asmb_model_asyms[asmb._id] = set()
1820+
self._asmb_model_asyms[asmb._id] |= seen_asym_ids
1821+
1822+
def check(self):
1823+
"""Make sure each Assembly only references asym IDs that are
1824+
represented by atoms or spheres in at least one Model, or
1825+
raise ValueError."""
1826+
def get_extra_asyms():
1827+
for asmb_id, asyms in self._asmb_asyms.items():
1828+
extra = asyms - self._asmb_model_asyms[asmb_id]
1829+
if extra:
1830+
yield asmb_id, ", ".join(sorted(extra))
1831+
1832+
err = "; ".join("assembly ID %s, asym IDs %s" % extra
1833+
for extra in get_extra_asyms())
1834+
if err:
1835+
raise ValueError(
1836+
"The following Assemblies reference asym IDs that don't "
1837+
"have coordinates in any Model: " + err)
1838+
1839+
18001840
class _ModelDumperBase(Dumper):
18011841

18021842
def finalize(self, system):
@@ -1826,11 +1866,21 @@ def dump_atom_type(self, seen_types, system, writer):
18261866
in atom_site. This table is needed by atom_site. Note that we
18271867
output it *after* atom_site (otherwise we would need to iterate
18281868
through all atoms in the system twice)."""
1869+
# Also check all assemblies, after dumping all atoms/spheres
1870+
if self._check:
1871+
self._assembly_checker.check()
18291872
elements = [x for x in sorted(seen_types.keys()) if x is not None]
18301873
with writer.loop("_atom_type", ["symbol"]) as lp:
18311874
for element in elements:
18321875
lp.write(symbol=element)
18331876

1877+
def __get_assembly_checker(self):
1878+
if not hasattr(self, '_asmb_check'):
1879+
self._asmb_check = _AssemblyChecker()
1880+
return self._asmb_check
1881+
1882+
_assembly_checker = property(__get_assembly_checker)
1883+
18341884
def dump_atoms(self, system, writer, add_ihm=True):
18351885
seen_types = {}
18361886
ordinal = itertools.count(1)
@@ -1843,9 +1893,11 @@ def dump_atoms(self, system, writer, add_ihm=True):
18431893
it.append("ihm_model_id")
18441894
with writer.loop("_atom_site", it) as lp:
18451895
for group, model in system._all_models():
1896+
seen_asym_ids = set()
18461897
rngcheck = _RangeChecker(model, self._check)
18471898
for atom in model.get_atoms():
18481899
rngcheck(atom)
1900+
seen_asym_ids.add(atom.asym_unit._id)
18491901
seq_id = 1 if atom.seq_id is None else atom.seq_id
18501902
label_seq_id = atom.seq_id
18511903
if not atom.asym_unit.entity.is_polymeric():
@@ -1871,6 +1923,7 @@ def dump_atoms(self, system, writer, add_ihm=True):
18711923
occupancy=atom.occupancy,
18721924
pdbx_PDB_model_num=model._id,
18731925
ihm_model_id=model._id)
1926+
self._assembly_checker.add_model_asyms(model, seen_asym_ids)
18741927
return seen_types
18751928

18761929

@@ -1919,8 +1972,10 @@ def dump_spheres(self, system, writer):
19191972
"model_id"]) as lp:
19201973
for group, model in system._all_models():
19211974
rngcheck = _RangeChecker(model, self._check)
1975+
seen_asym_ids = set()
19221976
for sphere in model.get_spheres():
19231977
rngcheck(sphere)
1978+
seen_asym_ids.add(sphere.asym_unit._id)
19241979
lp.write(id=next(ordinal),
19251980
entity_id=sphere.asym_unit.entity._id,
19261981
seq_id_begin=sphere.seq_id_range[0],
@@ -1929,6 +1984,7 @@ def dump_spheres(self, system, writer):
19291984
Cartn_x=sphere.x, Cartn_y=sphere.y,
19301985
Cartn_z=sphere.z, object_radius=sphere.radius,
19311986
rmsf=sphere.rmsf, model_id=model._id)
1987+
self._assembly_checker.add_model_asyms(model, seen_asym_ids)
19321988

19331989

19341990
class _NotModeledResidueRangeDumper(Dumper):

test/test_dumper.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2805,6 +2805,21 @@ def test_model_dumper_atoms(self):
28052805
# Should work though if checks are disabled
28062806
_ = _get_dumper_output(dumper, system, check=False)
28072807

2808+
def test_model_dumper_assembly_asym_check(self):
2809+
"""Test ModelDumper Assembly asym check"""
2810+
system, model, asym = self._make_test_model()
2811+
2812+
dumper = ihm.dumper._ModelDumper()
2813+
dumper.finalize(system) # assign model/group IDs
2814+
2815+
# No atoms for assembly's asym
2816+
with self.assertRaises(ValueError) as cm:
2817+
_get_dumper_output(dumper, system)
2818+
self.assertIn("reference asym IDs that don't have coordinates",
2819+
str(cm.exception))
2820+
# Should work though if checks are disabled
2821+
_ = _get_dumper_output(dumper, system, check=False)
2822+
28082823
def test_model_dumper_water_atoms(self):
28092824
"""Test ModelDumper with water atoms"""
28102825
system, model, asym = self._make_test_model(water=True)

0 commit comments

Comments
 (0)