11"""Define core client functionality."""
22
33import gzip
4+ import importlib .metadata
45import sys
56import time
67import pandas as pd
2526from pyisemail .diagnosis import BaseDiagnosis
2627from swagger_spec_validator .common import SwaggerValidationError
2728from jsonschema .exceptions import ValidationError
29+ from bravado .client import SwaggerClient
2830from bravado_core .formatter import SwaggerFormat
2931from bravado .swagger_model import Loader
3032from bravado .config import bravado_config_from_config_dict
3133from bravado_core .spec import Spec , build_api_serving_url , _identity
3234from bravado_core .model import model_discovery
35+ from bravado .requests_client import RequestsClient
3336from bravado_core .resource import build_resources
3437from bravado .exception import HTTPNotFound
3538from bravado_core .validate import validate_object
39+ from concurrent .futures import as_completed
3640from pymatgen .core import Structure as PmgStructure
3741from requests_futures .sessions import FuturesSession
3842from urllib3 .util .retry import Retry
4549from mpcontribs .client .logger import MPCC_LOGGER , TqdmToLogger
4650from mpcontribs .client .settings import MPCC_SETTINGS
4751from mpcontribs .client .types import PrettyDict , PrettyStructure , Table , Attachment
52+ from mpcontribs .client .units import ureg
53+ from mpcontribs .client .utils import get_md5
4854
4955classes_map = {
5056 "structures" : PrettyStructure ,
@@ -206,13 +212,7 @@ def _run_futures(futures, total: int = 0, timeout: int = -1, desc=None, disable=
206212@functools .lru_cache (maxsize = 1000 )
207213def _load (protocol , host , headers_json , project , version ):
208214 spec_dict = _raw_specs (protocol , host , version )
209- headers = (
210- orjson .loads (headers_json )
211- if isinstance (headers_json , str | bytes )
212- else headers_json
213- )
214- if isinstance (headers , bytes ):
215- headers = headers .decode (encoding = "utf-8" )
215+ headers = orjson .loads (headers_json )
216216
217217 if not spec_dict ["paths" ]:
218218 url = f"{ protocol } ://{ host } "
@@ -240,7 +240,11 @@ def _load(protocol, host, headers_json, project, version):
240240 projects = sorted (d ["name" ] for d in resp ["data" ])
241241 # expand regex-based query parameters for `data` columns
242242 spec = _expand_params (
243- protocol , host , version , projects , api_key = headers .get ("x-api-key" )
243+ protocol ,
244+ host ,
245+ version ,
246+ orjson .dumps (projects ),
247+ api_key = headers .get ("x-api-key" ),
244248 )
245249 spec .http_client .session .headers .update (headers )
246250 return spec
@@ -289,11 +293,7 @@ def _raw_specs(protocol, host, version):
289293)
290294def _expand_params (protocol , host , version , projects_json , api_key = None ):
291295 columns = {"string" : [], "number" : []}
292- projects = (
293- orjson .loads (projects_json )
294- if isinstance (projects_json , str | bytes )
295- else projects_json
296- )
296+ projects = orjson .loads (projects_json )
297297 query = {"project__in" : "," .join (projects )}
298298 query ["_fields" ] = "columns"
299299 url = f"{ protocol } ://{ host } "
@@ -365,7 +365,7 @@ def _version(url):
365365 retries , max_retries = 0 , 3
366366 protocol = urlparse (url ).scheme
367367 if "pytest" in sys .modules and protocol == "http" :
368- return __version__
368+ return importlib . metadata . version ( "mpcontribs-client" )
369369
370370 while retries < max_retries :
371371 try :
@@ -635,7 +635,9 @@ def available_query_params(
635635
636636 return [param for param in params if param .startswith (startswith )]
637637
638- def get_project (self , name : str | None = None , fields : list | None = None ) -> Dict :
638+ def get_project (
639+ self , name : str | None = None , fields : list | None = None
640+ ) -> PrettyDict :
639641 """Retrieve a project entry
640642
641643 Args:
@@ -851,7 +853,7 @@ def delete_project(self, name: str | None = None):
851853 if resp and "error" in resp :
852854 raise MPContribsClientError (resp ["error" ])
853855
854- def get_contribution (self , cid : str , fields : list | None = None ) -> Dict :
856+ def get_contribution (self , cid : str , fields : list | None = None ) -> PrettyDict :
855857 """Retrieve a contribution
856858
857859 Args:
@@ -907,7 +909,7 @@ def get_table(self, tid_or_md5: str) -> Table:
907909
908910 return Table .from_dict (table )
909911
910- def get_structure (self , sid_or_md5 : str ) -> Structure :
912+ def get_structure (self , sid_or_md5 : str ) -> PrettyStructure :
911913 """Retrieve pymatgen structure
912914
913915 Args:
@@ -933,7 +935,7 @@ def get_structure(self, sid_or_md5: str) -> Structure:
933935
934936 fields = list (self .get_model ("StructuresSchema" )._properties .keys ())
935937 resp = self .structures .getStructureById (pk = sid , _fields = fields ).result ()
936- return Structure .from_dict (resp )
938+ return PrettyStructure .from_dict (resp )
937939
938940 def get_attachment (self , aid_or_md5 : str ) -> Attachment :
939941 """Retrieve an attachment
@@ -1659,7 +1661,7 @@ def submit_contributions(
16591661 tic = time .perf_counter ()
16601662 project_names = set ()
16611663 collect_ids = []
1662- require_one_of = {"data" } | set (COMPONENTS )
1664+ require_one_of = {"data" } | set (MPCC_SETTINGS . COMPONENTS )
16631665
16641666 for idx , c in enumerate (contributions ):
16651667 has_keys = require_one_of & c .keys ()
@@ -1714,7 +1716,10 @@ def submit_contributions(
17141716 else {"project" : project_names [0 ]}
17151717 )
17161718 existing = defaultdict (
1717- dict , self .get_all_ids (query , include = COMPONENTS , timeout = timeout )
1719+ dict ,
1720+ self .get_all_ids (
1721+ query , include = MPCC_SETTINGS .COMPONENTS , timeout = timeout
1722+ ),
17181723 )
17191724
17201725 # prepare contributions
@@ -1723,7 +1728,7 @@ def submit_contributions(
17231728 fields = [
17241729 comp
17251730 for comp in self .get_model ("ContributionsSchema" )._properties .keys ()
1726- if comp not in COMPONENTS
1731+ if comp not in MPCC_SETTINGS . COMPONENTS
17271732 ]
17281733 fields .remove ("needs_build" ) # internal field
17291734
@@ -1760,13 +1765,13 @@ def submit_contributions(
17601765
17611766 contribs [project_name ].append (contrib_copy )
17621767
1763- for component in COMPONENTS :
1768+ for component in MPCC_SETTINGS . COMPONENTS :
17641769 elements = contrib .get (component , [])
17651770 nelems = len (elements )
17661771
1767- if nelems > MAX_ELEMS :
1772+ if nelems > MPCC_SETTINGS . MAX_ELEMS :
17681773 raise MPContribsClientError (
1769- f"Too many { component } ({ nelems } > { MAX_ELEMS } )!"
1774+ f"Too many { component } ({ nelems } > { MPCC_SETTINGS . MAX_ELEMS } )!"
17701775 )
17711776
17721777 if update and not nelems :
@@ -1894,7 +1899,7 @@ def put_future(pk, payload):
18941899 )
18951900
18961901 payload = orjson .dumps (c )
1897- if len (payload ) < MAX_PAYLOAD :
1902+ if len (payload ) < MPCC_SETTINGS . MAX_PAYLOAD :
18981903 futures .append (put_future (pk , payload ))
18991904 else :
19001905 MPCC_LOGGER .error (
@@ -1905,7 +1910,7 @@ def put_future(pk, payload):
19051910 next_payload = orjson .dumps (next_post_chunk )
19061911 if (
19071912 len (next_post_chunk ) > nmax
1908- or len (next_payload ) >= MAX_PAYLOAD
1913+ or len (next_payload ) >= MPCC_SETTINGS . MAX_PAYLOAD
19091914 ):
19101915 if post_chunk :
19111916 payload = orjson .dumps (post_chunk )
@@ -1938,15 +1943,15 @@ def put_future(pk, payload):
19381943
19391944 if (
19401945 total_processed != ncontribs
1941- and retries < RETRIES
1946+ and retries < MPCC_SETTINGS . RETRIES
19421947 and unique_identifiers .get (project_name )
19431948 ):
19441949 MPCC_LOGGER .info (
19451950 f"{ total_processed } /{ ncontribs } processed -> retrying ..."
19461951 )
19471952 existing [project_name ] = self .get_all_ids (
19481953 dict (project = project_name ),
1949- include = COMPONENTS ,
1954+ include = MPCC_SETTINGS . COMPONENTS ,
19501955 timeout = timeout ,
19511956 ).get (project_name , {"identifiers" : set ()})
19521957 unique_identifiers [project_name ] = (
@@ -1966,9 +1971,9 @@ def put_future(pk, payload):
19661971 else :
19671972 contribs [project_name ] = [] # abort retrying
19681973 if total_processed != ncontribs :
1969- if retries >= RETRIES :
1974+ if retries >= MPCC_SETTINGS . RETRIES :
19701975 MPCC_LOGGER .error (
1971- f"{ project_name } : Tried { RETRIES } times - abort."
1976+ f"{ project_name } : Tried { MPCC_SETTINGS . RETRIES } times - abort."
19721977 )
19731978 elif not unique_identifiers .get (project_name ):
19741979 MPCC_LOGGER .info (
@@ -2010,9 +2015,11 @@ def download_contributions(
20102015 include = include or []
20112016 outdir = Path (outdir ) or Path ("." )
20122017 outdir .mkdir (parents = True , exist_ok = True )
2013- components = {x for x in include if x in COMPONENTS }
2018+ components = {x for x in include if x in MPCC_SETTINGS . COMPONENTS }
20142019 if include and not components :
2015- raise MPContribsClientError (f"`include` must be subset of { COMPONENTS } !" )
2020+ raise MPContribsClientError (
2021+ f"`include` must be subset of { MPCC_SETTINGS .COMPONENTS } !"
2022+ )
20162023
20172024 all_ids = self .get_all_ids (query , include = list (components ), timeout = timeout )
20182025 fmt = query .get ("format" , "json" )
0 commit comments