Skip to content

Commit 35e0b30

Browse files
committed
Distinguish peer-reviewed and machine-learning MPDS data
1 parent f194ac1 commit 35e0b30

File tree

3 files changed

+28
-7
lines changed

3 files changed

+28
-7
lines changed

mpds_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11

2-
from .retrieve_MPDS import MPDSDataRetrieval, APIError
2+
from .retrieve_MPDS import MPDSDataTypes, APIError, MPDSDataRetrieval
33
from .export_MPDS import MPDSExport

mpds_client/retrieve_MPDS.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@
3636
__copyright__ = 'Copyright (c) 2017-2018, Evgeny Blokhin, Tilde Materials Informatics'
3737
__license__ = 'MIT'
3838

39+
class MPDSDataTypes(object):
40+
PEER_REVIEWED = 1
41+
MACHINE_LEARNING = 2
42+
ALL = 7
43+
3944
class APIError(Exception):
4045
"""
4146
Simple error handling
@@ -44,6 +49,7 @@ def __init__(self, msg, code=0):
4449
Exception.__init__(self)
4550
self.msg = msg
4651
self.code = code
52+
4753
def __str__(self):
4854
return repr(self.msg)
4955

@@ -55,6 +61,7 @@ def _massage_atsymb(sequence):
5561
"""
5662
if sys.version_info[0] < 3:
5763
return [i.encode('ascii') for i in sequence]
64+
5865
return sequence
5966

6067
class MPDSDataRetrieval(object):
@@ -118,7 +125,7 @@ class MPDSDataRetrieval(object):
118125
maxnphases = 1500 # more phases require additional requests
119126
chillouttime = 2 # please, do not use values < 2, because the server may burn out
120127

121-
def __init__(self, api_key=None, endpoint=None):
128+
def __init__(self, api_key=None, endpoint=None, dtype=None):
122129
"""
123130
MPDS API consumer constructor
124131
@@ -131,6 +138,7 @@ def __init__(self, api_key=None, endpoint=None):
131138
self.api_key = api_key if api_key else os.environ['MPDS_KEY']
132139
self.network = httplib2.Http()
133140
self.endpoint = endpoint or MPDSDataRetrieval.endpoint
141+
self.dtype = dtype or MPDSDataTypes.PEER_REVIEWED
134142

135143
def _request(self, query, phases=(), page=0, pagesize=None):
136144
phases = ','.join([str(int(x)) for x in phases]) if phases else ''
@@ -140,20 +148,24 @@ def _request(self, query, phases=(), page=0, pagesize=None):
140148
'q': json.dumps(query),
141149
'phases': phases,
142150
'page': page,
143-
'pagesize': pagesize or self.pagesize
151+
'pagesize': pagesize or self.pagesize,
152+
'dtype': self.dtype
144153
}),
145154
method='GET',
146155
headers={'Key': self.api_key}
147156
)
148157

149158
if response.status != 200:
150159
return {'error': 'HTTP error code %s' % response.status, 'code': response.status}
160+
151161
try:
152162
content = json.loads(content)
153163
except:
154164
return {'error': 'Unreadable data obtained'}
165+
155166
if content.get('error'):
156167
return {'error': content['error']}
168+
157169
if not content['out']:
158170
return {'error': 'No hits', 'code': 1}
159171

@@ -167,6 +179,7 @@ def _massage(self, array, fields):
167179

168180
for item in array:
169181
filtered = []
182+
170183
for object_type in ['S', 'P', 'C']:
171184
if item['object_type'] == object_type:
172185
for expr in fields.get(object_type, []):
@@ -176,7 +189,7 @@ def _massage(self, array, fields):
176189
filtered.append(expr)
177190
break
178191
else:
179-
raise APIError("API error: unknown data type")
192+
raise APIError("API error: unknown entry type")
180193

181194
output.append(filtered)
182195

@@ -199,11 +212,13 @@ def count_data(self, search, phases=(), **kwargs):
199212

200213
if result['error']:
201214
raise APIError(result['error'], result.get('code', 0))
215+
202216
if result['npages'] > self.maxnpages:
203217
warnings.warn(
204218
"\r\nDataset is too big, to retrieve it you may risk to change maxnpages from %s to %s" % \
205219
(self.maxnpages, int(math.ceil(result['count']/self.pagesize)))
206220
)
221+
207222
return result['count']
208223

209224
def get_data(self, search, phases=(), fields=default_fields):
@@ -229,9 +244,11 @@ def get_data(self, search, phases=(), fields=default_fields):
229244
key: [jmespath.compile(item) if isinstance(item, str) else item() for item in value]
230245
for key, value in fields.items()
231246
} if fields else None
247+
232248
tot_count = 0
233249

234250
phases = list(set(phases))
251+
235252
if len(phases) > self.maxnphases:
236253
all_phases = array_split(phases, int(math.ceil(
237254
len(phases)/self.maxnphases
@@ -243,6 +260,7 @@ def get_data(self, search, phases=(), fields=default_fields):
243260
for step, current_phases in enumerate(all_phases, start=1):
244261

245262
counter, hits_count = 0, 0
263+
246264
while True:
247265
result = self._request(search, phases=list(current_phases), page=counter)
248266
if result['error']:
@@ -252,12 +270,13 @@ def get_data(self, search, phases=(), fields=default_fields):
252270
raise APIError(
253271
"Too many hits (%s > %s), please, be more specific" % \
254272
(result['count'], MPDSDataRetrieval.maxnpages*MPDSDataRetrieval.pagesize),
255-
1
273+
2
256274
)
257275
output.extend(self._massage(result['out'], fields))
258276

259277
if hits_count and hits_count != result['count']:
260278
raise APIError("API error: hits count has been changed during the query")
279+
261280
hits_count = result['count']
262281

263282
time.sleep(MPDSDataRetrieval.chillouttime)
@@ -277,6 +296,7 @@ def get_data(self, search, phases=(), fields=default_fields):
277296

278297
sys.stdout.write("\r\nGot %s hits\r\n" % tot_count)
279298
sys.stdout.flush()
299+
280300
return output
281301

282302
def get_dataframe(self, *args, **kwargs):
@@ -307,6 +327,7 @@ def get_crystals(self, search={}, phases=(), flavor='pmg'):
307327

308328
crystals = []
309329
for crystal_struct in self.get_data(search, phases, fields={'S':['cell_abc', 'sg_n', 'setting', 'basis_noneq', 'els_noneq']}):
330+
310331
crobj = self.compile_crystal(crystal_struct, flavor)
311332
if crobj is not None:
312333
crystals.append(crobj)
@@ -319,7 +340,7 @@ def compile_crystal(datarow, flavor='pmg'):
319340
Helper method for representing the MPDS crystal structures in two flavors:
320341
either as a Pymatgen Structure object, or as an ASE Atoms object.
321342
322-
Attention #1. Disordered structures (i.e. fractional indices in the chemical formulae)
343+
Attention #1. Disordered structures (e.g. fractional indices in the chemical formulae)
323344
are not supported by this method, and hence the occupancies are not retrieved.
324345
Currently it's up to the user to take care of that (see e.g.
325346
https://doi.org/10.1186/s13321-016-0129-3 etc.).

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
setup(
1414
name='mpds_client',
15-
version='0.0.15',
15+
version='0.0.16',
1616
author='Evgeny Blokhin',
1717
author_email='eb@tilde.pro',
1818
description='MPDS platform API client',

0 commit comments

Comments
 (0)