Skip to content

Commit b6ec756

Browse files
first draft initial layout - everything re-orged and functional
1 parent 93de4bf commit b6ec756

File tree

3 files changed

+41
-33
lines changed

3 files changed

+41
-33
lines changed

mpcontribs-client/mpcontribs/client/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from mpcontribs.client.exceptions import MPContribsClientError
77
from mpcontribs.client.settings import MPCC_SETTINGS
88

9+
__all__ = ["Client", "MPContribsClientError", "MPCC_SETTINGS"]
910

1011
try:
1112
__version__ = importlib.metadata.version("mpcontribs-client")

mpcontribs-client/mpcontribs/client/core.py

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Define core client functionality."""
22

33
import gzip
4+
import importlib.metadata
45
import sys
56
import time
67
import pandas as pd
@@ -25,14 +26,17 @@
2526
from pyisemail.diagnosis import BaseDiagnosis
2627
from swagger_spec_validator.common import SwaggerValidationError
2728
from jsonschema.exceptions import ValidationError
29+
from bravado.client import SwaggerClient
2830
from bravado_core.formatter import SwaggerFormat
2931
from bravado.swagger_model import Loader
3032
from bravado.config import bravado_config_from_config_dict
3133
from bravado_core.spec import Spec, build_api_serving_url, _identity
3234
from bravado_core.model import model_discovery
35+
from bravado.requests_client import RequestsClient
3336
from bravado_core.resource import build_resources
3437
from bravado.exception import HTTPNotFound
3538
from bravado_core.validate import validate_object
39+
from concurrent.futures import as_completed
3640
from pymatgen.core import Structure as PmgStructure
3741
from requests_futures.sessions import FuturesSession
3842
from urllib3.util.retry import Retry
@@ -45,6 +49,8 @@
4549
from mpcontribs.client.logger import MPCC_LOGGER, TqdmToLogger
4650
from mpcontribs.client.settings import MPCC_SETTINGS
4751
from mpcontribs.client.types import PrettyDict, PrettyStructure, Table, Attachment
52+
from mpcontribs.client.units import ureg
53+
from mpcontribs.client.utils import get_md5
4854

4955
classes_map = {
5056
"structures": PrettyStructure,
@@ -206,13 +212,7 @@ def _run_futures(futures, total: int = 0, timeout: int = -1, desc=None, disable=
206212
@functools.lru_cache(maxsize=1000)
207213
def _load(protocol, host, headers_json, project, version):
208214
spec_dict = _raw_specs(protocol, host, version)
209-
headers = (
210-
orjson.loads(headers_json)
211-
if isinstance(headers_json, str | bytes)
212-
else headers_json
213-
)
214-
if isinstance(headers, bytes):
215-
headers = headers.decode(encoding="utf-8")
215+
headers = orjson.loads(headers_json)
216216

217217
if not spec_dict["paths"]:
218218
url = f"{protocol}://{host}"
@@ -240,7 +240,11 @@ def _load(protocol, host, headers_json, project, version):
240240
projects = sorted(d["name"] for d in resp["data"])
241241
# expand regex-based query parameters for `data` columns
242242
spec = _expand_params(
243-
protocol, host, version, projects, api_key=headers.get("x-api-key")
243+
protocol,
244+
host,
245+
version,
246+
orjson.dumps(projects),
247+
api_key=headers.get("x-api-key"),
244248
)
245249
spec.http_client.session.headers.update(headers)
246250
return spec
@@ -289,11 +293,7 @@ def _raw_specs(protocol, host, version):
289293
)
290294
def _expand_params(protocol, host, version, projects_json, api_key=None):
291295
columns = {"string": [], "number": []}
292-
projects = (
293-
orjson.loads(projects_json)
294-
if isinstance(projects_json, str | bytes)
295-
else projects_json
296-
)
296+
projects = orjson.loads(projects_json)
297297
query = {"project__in": ",".join(projects)}
298298
query["_fields"] = "columns"
299299
url = f"{protocol}://{host}"
@@ -365,7 +365,7 @@ def _version(url):
365365
retries, max_retries = 0, 3
366366
protocol = urlparse(url).scheme
367367
if "pytest" in sys.modules and protocol == "http":
368-
return __version__
368+
return importlib.metadata.version("mpcontribs-client")
369369

370370
while retries < max_retries:
371371
try:
@@ -635,7 +635,9 @@ def available_query_params(
635635

636636
return [param for param in params if param.startswith(startswith)]
637637

638-
def get_project(self, name: str | None = None, fields: list | None = None) -> Dict:
638+
def get_project(
639+
self, name: str | None = None, fields: list | None = None
640+
) -> PrettyDict:
639641
"""Retrieve a project entry
640642
641643
Args:
@@ -851,7 +853,7 @@ def delete_project(self, name: str | None = None):
851853
if resp and "error" in resp:
852854
raise MPContribsClientError(resp["error"])
853855

854-
def get_contribution(self, cid: str, fields: list | None = None) -> Dict:
856+
def get_contribution(self, cid: str, fields: list | None = None) -> PrettyDict:
855857
"""Retrieve a contribution
856858
857859
Args:
@@ -907,7 +909,7 @@ def get_table(self, tid_or_md5: str) -> Table:
907909

908910
return Table.from_dict(table)
909911

910-
def get_structure(self, sid_or_md5: str) -> Structure:
912+
def get_structure(self, sid_or_md5: str) -> PrettyStructure:
911913
"""Retrieve pymatgen structure
912914
913915
Args:
@@ -933,7 +935,7 @@ def get_structure(self, sid_or_md5: str) -> Structure:
933935

934936
fields = list(self.get_model("StructuresSchema")._properties.keys())
935937
resp = self.structures.getStructureById(pk=sid, _fields=fields).result()
936-
return Structure.from_dict(resp)
938+
return PrettyStructure.from_dict(resp)
937939

938940
def get_attachment(self, aid_or_md5: str) -> Attachment:
939941
"""Retrieve an attachment
@@ -1659,7 +1661,7 @@ def submit_contributions(
16591661
tic = time.perf_counter()
16601662
project_names = set()
16611663
collect_ids = []
1662-
require_one_of = {"data"} | set(COMPONENTS)
1664+
require_one_of = {"data"} | set(MPCC_SETTINGS.COMPONENTS)
16631665

16641666
for idx, c in enumerate(contributions):
16651667
has_keys = require_one_of & c.keys()
@@ -1714,7 +1716,10 @@ def submit_contributions(
17141716
else {"project": project_names[0]}
17151717
)
17161718
existing = defaultdict(
1717-
dict, self.get_all_ids(query, include=COMPONENTS, timeout=timeout)
1719+
dict,
1720+
self.get_all_ids(
1721+
query, include=MPCC_SETTINGS.COMPONENTS, timeout=timeout
1722+
),
17181723
)
17191724

17201725
# prepare contributions
@@ -1723,7 +1728,7 @@ def submit_contributions(
17231728
fields = [
17241729
comp
17251730
for comp in self.get_model("ContributionsSchema")._properties.keys()
1726-
if comp not in COMPONENTS
1731+
if comp not in MPCC_SETTINGS.COMPONENTS
17271732
]
17281733
fields.remove("needs_build") # internal field
17291734

@@ -1760,13 +1765,13 @@ def submit_contributions(
17601765

17611766
contribs[project_name].append(contrib_copy)
17621767

1763-
for component in COMPONENTS:
1768+
for component in MPCC_SETTINGS.COMPONENTS:
17641769
elements = contrib.get(component, [])
17651770
nelems = len(elements)
17661771

1767-
if nelems > MAX_ELEMS:
1772+
if nelems > MPCC_SETTINGS.MAX_ELEMS:
17681773
raise MPContribsClientError(
1769-
f"Too many {component} ({nelems} > {MAX_ELEMS})!"
1774+
f"Too many {component} ({nelems} > {MPCC_SETTINGS.MAX_ELEMS})!"
17701775
)
17711776

17721777
if update and not nelems:
@@ -1894,7 +1899,7 @@ def put_future(pk, payload):
18941899
)
18951900

18961901
payload = orjson.dumps(c)
1897-
if len(payload) < MAX_PAYLOAD:
1902+
if len(payload) < MPCC_SETTINGS.MAX_PAYLOAD:
18981903
futures.append(put_future(pk, payload))
18991904
else:
19001905
MPCC_LOGGER.error(
@@ -1905,7 +1910,7 @@ def put_future(pk, payload):
19051910
next_payload = orjson.dumps(next_post_chunk)
19061911
if (
19071912
len(next_post_chunk) > nmax
1908-
or len(next_payload) >= MAX_PAYLOAD
1913+
or len(next_payload) >= MPCC_SETTINGS.MAX_PAYLOAD
19091914
):
19101915
if post_chunk:
19111916
payload = orjson.dumps(post_chunk)
@@ -1938,15 +1943,15 @@ def put_future(pk, payload):
19381943

19391944
if (
19401945
total_processed != ncontribs
1941-
and retries < RETRIES
1946+
and retries < MPCC_SETTINGS.RETRIES
19421947
and unique_identifiers.get(project_name)
19431948
):
19441949
MPCC_LOGGER.info(
19451950
f"{total_processed}/{ncontribs} processed -> retrying ..."
19461951
)
19471952
existing[project_name] = self.get_all_ids(
19481953
dict(project=project_name),
1949-
include=COMPONENTS,
1954+
include=MPCC_SETTINGS.COMPONENTS,
19501955
timeout=timeout,
19511956
).get(project_name, {"identifiers": set()})
19521957
unique_identifiers[project_name] = (
@@ -1966,9 +1971,9 @@ def put_future(pk, payload):
19661971
else:
19671972
contribs[project_name] = [] # abort retrying
19681973
if total_processed != ncontribs:
1969-
if retries >= RETRIES:
1974+
if retries >= MPCC_SETTINGS.RETRIES:
19701975
MPCC_LOGGER.error(
1971-
f"{project_name}: Tried {RETRIES} times - abort."
1976+
f"{project_name}: Tried {MPCC_SETTINGS.RETRIES} times - abort."
19721977
)
19731978
elif not unique_identifiers.get(project_name):
19741979
MPCC_LOGGER.info(
@@ -2010,9 +2015,11 @@ def download_contributions(
20102015
include = include or []
20112016
outdir = Path(outdir) or Path(".")
20122017
outdir.mkdir(parents=True, exist_ok=True)
2013-
components = {x for x in include if x in COMPONENTS}
2018+
components = {x for x in include if x in MPCC_SETTINGS.COMPONENTS}
20142019
if include and not components:
2015-
raise MPContribsClientError(f"`include` must be subset of {COMPONENTS}!")
2020+
raise MPContribsClientError(
2021+
f"`include` must be subset of {MPCC_SETTINGS.COMPONENTS}!"
2022+
)
20162023

20172024
all_ids = self.get_all_ids(query, include=list(components), timeout=timeout)
20182025
fmt = query.get("format", "json")

mpcontribs-client/tests/test_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33

44
from unittest.mock import patch, MagicMock
5-
from mpcontribs.client import validate_email, Client, email_format
5+
from mpcontribs.client.core import validate_email, Client, email_format
66
from swagger_spec_validator.common import SwaggerValidationError
77

88
logger = logging.Logger(__name__)

0 commit comments

Comments
 (0)