Skip to content

Commit 834c676

Browse files
deeenesclaude
andcommitted
Add COSMOS PKN client module (cosmos subpackage)
New subpackage omnipath_client.cosmos with: - get_pkn(organism, categories, resources, format): fetches pre-built PKNs from metabo.omnipathdb.org as DataFrame, dict, or AnnNet Graph - to_annnet(df): bulk conversion to AnnNet Graph using add_vertices_bulk and add_edges_bulk for efficient construction - categories(), organisms(), resources(), status(): convenience functions Features: - Organism normalisation via utils.ensure_ncbi_tax_id — accepts any form: 9606, 'human', 'Homo sapiens', 'hsapiens', 'hsa', etc. - Uses shared _download.Downloader (dlmachine + caching) - DataFrame output via polars (preferred) or pandas - AnnNet conversion stores entity types as vertex attributes, interaction metadata as edge attributes Usage: import omnipath_client as oc df = oc.cosmos.get_pkn('human', categories='transporters') g = oc.cosmos.get_pkn('mouse', format='annnet') Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5e67cf8 commit 834c676

4 files changed

Lines changed: 347 additions & 1 deletion

File tree

omnipath_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
'values',
3232
]
3333

34-
from omnipath_client import utils
34+
from omnipath_client import cosmos, utils
3535
from ._client import (
3636
OmniPath,
3737
params,

omnipath_client/cosmos/__init__.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""COSMOS PKN client for the OmniPath Metabolomics service.
2+
3+
Fetches pre-built COSMOS prior-knowledge networks from
4+
``metabo.omnipathdb.org`` as DataFrames or AnnNet Graph objects.
5+
6+
Example::
7+
8+
import omnipath_client as oc
9+
10+
# DataFrame (polars by default)
11+
df = oc.cosmos.get_pkn(organism='human', categories='transporters')
12+
13+
# AnnNet Graph
14+
g = oc.cosmos.get_pkn(organism='human', format='annnet')
15+
16+
# Available categories and organisms
17+
oc.cosmos.categories() # ['transporters', 'receptors', ...]
18+
oc.cosmos.organisms() # [9606, 10090, 10116]
19+
"""
20+
21+
from omnipath_client.cosmos._pkn import (
22+
categories,
23+
get_pkn,
24+
organisms,
25+
resources,
26+
status,
27+
)
28+
from omnipath_client.cosmos._annnet import to_annnet
29+
30+
__all__ = [
31+
'categories',
32+
'get_pkn',
33+
'organisms',
34+
'resources',
35+
'status',
36+
'to_annnet',
37+
]

omnipath_client/cosmos/_annnet.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""Convert COSMOS PKN DataFrames to AnnNet Graph objects.
2+
3+
Requires the ``annnet`` package (optional dependency).
4+
Uses bulk operations for efficient graph construction.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
__all__ = ['to_annnet']
10+
11+
import logging
12+
from typing import Any
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
def to_annnet(df: Any) -> Any:
18+
"""Convert a COSMOS PKN DataFrame to an AnnNet Graph.
19+
20+
Uses ``add_vertices_bulk`` and ``add_edges_bulk`` for efficient
21+
construction. Entity types (protein, small_molecule) are stored
22+
as vertex attributes; interaction metadata as edge attributes.
23+
24+
Args:
25+
df: COSMOS PKN DataFrame (polars or pandas) with columns:
26+
``source``, ``target``, ``source_type``, ``target_type``,
27+
``interaction_type``, ``resource``, ``mor``.
28+
29+
Returns:
30+
``annnet.Graph`` instance.
31+
32+
Raises:
33+
ImportError: If ``annnet`` is not installed.
34+
35+
Example::
36+
37+
import omnipath_client as oc
38+
39+
df = oc.cosmos.get_pkn('human', categories='transporters')
40+
g = oc.cosmos.to_annnet(df)
41+
"""
42+
43+
try:
44+
from annnet import Graph
45+
except ImportError as exc:
46+
raise ImportError(
47+
'annnet is required for graph conversion. '
48+
'Install with: pip install annnet'
49+
) from exc
50+
51+
# Normalise to dict-of-lists for column access
52+
# (works with both polars and pandas)
53+
try:
54+
# Polars
55+
cols = {c: df[c].to_list() for c in df.columns}
56+
except AttributeError:
57+
# Pandas
58+
cols = {c: df[c].tolist() for c in df.columns}
59+
60+
n_rows = len(cols.get('source', []))
61+
62+
# -- Collect unique entities with types -----------------------------------
63+
entity_types: dict[str, str] = {}
64+
65+
for i in range(n_rows):
66+
src = cols['source'][i]
67+
tgt = cols['target'][i]
68+
src_type = cols.get('source_type', [None] * n_rows)[i]
69+
tgt_type = cols.get('target_type', [None] * n_rows)[i]
70+
71+
if src not in entity_types and src_type:
72+
entity_types[src] = src_type
73+
if tgt not in entity_types and tgt_type:
74+
entity_types[tgt] = tgt_type
75+
76+
# -- Build graph ----------------------------------------------------------
77+
g = Graph()
78+
79+
# Bulk add vertices
80+
vertices = [
81+
{'vertex_id': vid, 'entity_type': etype}
82+
for vid, etype in entity_types.items()
83+
]
84+
g.add_vertices_bulk(vertices)
85+
86+
logger.info(
87+
'AnnNet: added %d vertices (%d protein, %d small_molecule)',
88+
len(vertices),
89+
sum(1 for v in entity_types.values() if v == 'protein'),
90+
sum(1 for v in entity_types.values() if v == 'small_molecule'),
91+
)
92+
93+
# Bulk add edges
94+
edges = []
95+
mor_col = cols.get('mor', [0] * n_rows)
96+
itype_col = cols.get('interaction_type', [''] * n_rows)
97+
resource_col = cols.get('resource', [''] * n_rows)
98+
99+
for i in range(n_rows):
100+
edges.append({
101+
'source': cols['source'][i],
102+
'target': cols['target'][i],
103+
'weight': float(mor_col[i]) if mor_col[i] is not None else 0.0,
104+
'edge_directed': True,
105+
'attributes': {
106+
'interaction_type': itype_col[i],
107+
'resource': resource_col[i],
108+
},
109+
})
110+
111+
g.add_edges_bulk(edges)
112+
113+
logger.info('AnnNet: added %d edges', len(edges))
114+
115+
return g

omnipath_client/cosmos/_pkn.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""COSMOS PKN fetch and convenience functions."""
2+
3+
from __future__ import annotations
4+
5+
import logging
6+
from typing import Any
7+
8+
from omnipath_client._download import Downloader
9+
from omnipath_client._session import get_logger
10+
11+
logger = get_logger(__name__)
12+
13+
DEFAULT_METABO_URL = 'https://metabo.omnipathdb.org'
14+
15+
_metabo_url: str = DEFAULT_METABO_URL
16+
_downloader: Downloader | None = None
17+
18+
19+
def set_url(url: str) -> None:
20+
"""Override the metabo service base URL."""
21+
22+
global _metabo_url
23+
_metabo_url = url
24+
25+
26+
def _dl() -> Downloader:
27+
"""Lazy-initialised shared downloader."""
28+
29+
global _downloader
30+
31+
if _downloader is None:
32+
_downloader = Downloader(use_cache=False)
33+
34+
return _downloader
35+
36+
37+
def _resolve_organism(organism: int | str) -> int:
38+
"""Resolve organism to NCBI taxonomy ID via the utils service."""
39+
40+
if isinstance(organism, int):
41+
return organism
42+
43+
# Try parsing as int first (e.g. '9606')
44+
try:
45+
return int(organism)
46+
except (ValueError, TypeError):
47+
pass
48+
49+
from omnipath_client.utils import ensure_ncbi_tax_id
50+
51+
result = ensure_ncbi_tax_id(organism)
52+
53+
if result is None:
54+
raise ValueError(f'Could not resolve organism: {organism!r}')
55+
56+
return result
57+
58+
59+
def get_pkn(
60+
organism: int | str = 9606,
61+
categories: str | list[str] = 'all',
62+
resources: str | list[str] | None = None,
63+
format: str = 'dataframe',
64+
) -> Any:
65+
"""Fetch COSMOS PKN from the metabo service.
66+
67+
Args:
68+
organism:
69+
Organism identifier — any form accepted by omnipath-utils
70+
taxonomy: NCBI ID (``9606``), common name (``'human'``),
71+
Latin (``'Homo sapiens'``), Ensembl (``'hsapiens'``),
72+
KEGG (``'hsa'``), etc.
73+
categories:
74+
Category names or ``'all'``. Available categories:
75+
``transporters``, ``receptors``, ``allosteric``,
76+
``enzyme_metabolite``, ``ppi``, ``grn``.
77+
resources:
78+
Optional resource filter (comma-separated string or list).
79+
format:
80+
Output format: ``'dataframe'`` (default), ``'parquet'``,
81+
``'dict'``, or ``'annnet'``.
82+
83+
Returns:
84+
DataFrame (polars/pandas), raw dict, bytes (parquet), or
85+
AnnNet Graph depending on *format*.
86+
87+
Example::
88+
89+
import omnipath_client as oc
90+
91+
# All human transporters
92+
df = oc.cosmos.get_pkn('human', categories='transporters')
93+
94+
# Full mouse PKN as AnnNet graph
95+
g = oc.cosmos.get_pkn('mouse', format='annnet')
96+
"""
97+
98+
ncbi_tax_id = _resolve_organism(organism)
99+
100+
if isinstance(categories, list):
101+
categories = ','.join(categories)
102+
103+
if isinstance(resources, list):
104+
resources = ','.join(resources)
105+
106+
params: dict[str, Any] = {
107+
'organism': ncbi_tax_id,
108+
'categories': categories,
109+
}
110+
111+
if resources:
112+
params['resources'] = resources
113+
114+
if format == 'parquet':
115+
params['format'] = 'parquet'
116+
117+
url = f'{_metabo_url}/cosmos/pkn'
118+
data = _dl().fetch_json(url, params=params)
119+
120+
if format == 'dict':
121+
return data
122+
123+
if format == 'annnet':
124+
df = _network_to_dataframe(data['network'])
125+
from omnipath_client.cosmos._annnet import to_annnet
126+
return to_annnet(df)
127+
128+
# Default: DataFrame
129+
return _network_to_dataframe(data['network'])
130+
131+
132+
def _network_to_dataframe(records: list[dict]) -> Any:
133+
"""Convert network records to a DataFrame.
134+
135+
Uses polars if available, falls back to pandas.
136+
"""
137+
138+
if not records:
139+
try:
140+
import polars as pl
141+
return pl.DataFrame()
142+
except ImportError:
143+
import pandas as pd
144+
return pd.DataFrame()
145+
146+
try:
147+
import polars as pl
148+
return pl.DataFrame(records)
149+
except ImportError:
150+
pass
151+
152+
try:
153+
import pandas as pd
154+
return pd.DataFrame(records)
155+
except ImportError:
156+
pass
157+
158+
raise ImportError(
159+
'Either polars or pandas is required for DataFrame output. '
160+
'Install with: pip install polars'
161+
)
162+
163+
164+
def categories() -> list[str]:
165+
"""List available PKN categories."""
166+
167+
url = f'{_metabo_url}/cosmos/categories'
168+
return _dl().fetch_json(url)
169+
170+
171+
def organisms() -> list[int]:
172+
"""List organisms with pre-built PKNs."""
173+
174+
url = f'{_metabo_url}/cosmos/organisms'
175+
return _dl().fetch_json(url)
176+
177+
178+
def resources(organism: int | str = 9606) -> dict[str, list[str]]:
179+
"""List resources available per category.
180+
181+
Args:
182+
organism: Organism identifier (any form).
183+
"""
184+
185+
ncbi_tax_id = _resolve_organism(organism)
186+
url = f'{_metabo_url}/cosmos/resources'
187+
return _dl().fetch_json(url, params={'organism': ncbi_tax_id})
188+
189+
190+
def status() -> dict:
191+
"""Get cache status from the metabo service."""
192+
193+
url = f'{_metabo_url}/cosmos/status'
194+
return _dl().fetch_json(url)

0 commit comments

Comments
 (0)