3636__copyright__ = 'Copyright (c) 2017-2018, Evgeny Blokhin, Tilde Materials Informatics'
3737__license__ = 'MIT'
3838
39+ class MPDSDataTypes (object ):
40+ PEER_REVIEWED = 1
41+ MACHINE_LEARNING = 2
42+ ALL = 7
43+
3944class APIError (Exception ):
4045 """
4146 Simple error handling
@@ -44,6 +49,7 @@ def __init__(self, msg, code=0):
4449 Exception .__init__ (self )
4550 self .msg = msg
4651 self .code = code
52+
4753 def __str__ (self ):
4854 return repr (self .msg )
4955
@@ -55,6 +61,7 @@ def _massage_atsymb(sequence):
5561 """
5662 if sys .version_info [0 ] < 3 :
5763 return [i .encode ('ascii' ) for i in sequence ]
64+
5865 return sequence
5966
6067class MPDSDataRetrieval (object ):
@@ -118,7 +125,7 @@ class MPDSDataRetrieval(object):
118125 maxnphases = 1500 # more phases require additional requests
119126 chillouttime = 2 # please, do not use values < 2, because the server may burn out
120127
121- def __init__ (self , api_key = None , endpoint = None ):
128+ def __init__ (self , api_key = None , endpoint = None , dtype = None ):
122129 """
123130 MPDS API consumer constructor
124131
@@ -131,6 +138,7 @@ def __init__(self, api_key=None, endpoint=None):
131138 self .api_key = api_key if api_key else os .environ ['MPDS_KEY' ]
132139 self .network = httplib2 .Http ()
133140 self .endpoint = endpoint or MPDSDataRetrieval .endpoint
141+ self .dtype = dtype or MPDSDataTypes .PEER_REVIEWED
134142
135143 def _request (self , query , phases = (), page = 0 , pagesize = None ):
136144 phases = ',' .join ([str (int (x )) for x in phases ]) if phases else ''
@@ -140,20 +148,24 @@ def _request(self, query, phases=(), page=0, pagesize=None):
140148 'q' : json .dumps (query ),
141149 'phases' : phases ,
142150 'page' : page ,
143- 'pagesize' : pagesize or self .pagesize
151+ 'pagesize' : pagesize or self .pagesize ,
152+ 'dtype' : self .dtype
144153 }),
145154 method = 'GET' ,
146155 headers = {'Key' : self .api_key }
147156 )
148157
149158 if response .status != 200 :
150159 return {'error' : 'HTTP error code %s' % response .status , 'code' : response .status }
160+
151161 try :
152162 content = json .loads (content )
153163 except :
154164 return {'error' : 'Unreadable data obtained' }
165+
155166 if content .get ('error' ):
156167 return {'error' : content ['error' ]}
168+
157169 if not content ['out' ]:
158170 return {'error' : 'No hits' , 'code' : 1 }
159171
@@ -167,6 +179,7 @@ def _massage(self, array, fields):
167179
168180 for item in array :
169181 filtered = []
182+
170183 for object_type in ['S' , 'P' , 'C' ]:
171184 if item ['object_type' ] == object_type :
172185 for expr in fields .get (object_type , []):
@@ -176,7 +189,7 @@ def _massage(self, array, fields):
176189 filtered .append (expr )
177190 break
178191 else :
179- raise APIError ("API error: unknown data type" )
192+ raise APIError ("API error: unknown entry type" )
180193
181194 output .append (filtered )
182195
@@ -199,11 +212,13 @@ def count_data(self, search, phases=(), **kwargs):
199212
200213 if result ['error' ]:
201214 raise APIError (result ['error' ], result .get ('code' , 0 ))
215+
202216 if result ['npages' ] > self .maxnpages :
203217 warnings .warn (
204218 "\r \n Dataset is too big, to retrieve it you may risk to change maxnpages from %s to %s" % \
205219 (self .maxnpages , int (math .ceil (result ['count' ]/ self .pagesize )))
206220 )
221+
207222 return result ['count' ]
208223
209224 def get_data (self , search , phases = (), fields = default_fields ):
@@ -229,9 +244,11 @@ def get_data(self, search, phases=(), fields=default_fields):
229244 key : [jmespath .compile (item ) if isinstance (item , str ) else item () for item in value ]
230245 for key , value in fields .items ()
231246 } if fields else None
247+
232248 tot_count = 0
233249
234250 phases = list (set (phases ))
251+
235252 if len (phases ) > self .maxnphases :
236253 all_phases = array_split (phases , int (math .ceil (
237254 len (phases )/ self .maxnphases
@@ -243,6 +260,7 @@ def get_data(self, search, phases=(), fields=default_fields):
243260 for step , current_phases in enumerate (all_phases , start = 1 ):
244261
245262 counter , hits_count = 0 , 0
263+
246264 while True :
247265 result = self ._request (search , phases = list (current_phases ), page = counter )
248266 if result ['error' ]:
@@ -252,12 +270,13 @@ def get_data(self, search, phases=(), fields=default_fields):
252270 raise APIError (
253271 "Too many hits (%s > %s), please, be more specific" % \
254272 (result ['count' ], MPDSDataRetrieval .maxnpages * MPDSDataRetrieval .pagesize ),
255- 1
273+ 2
256274 )
257275 output .extend (self ._massage (result ['out' ], fields ))
258276
259277 if hits_count and hits_count != result ['count' ]:
260278 raise APIError ("API error: hits count has been changed during the query" )
279+
261280 hits_count = result ['count' ]
262281
263282 time .sleep (MPDSDataRetrieval .chillouttime )
@@ -277,6 +296,7 @@ def get_data(self, search, phases=(), fields=default_fields):
277296
278297 sys .stdout .write ("\r \n Got %s hits\r \n " % tot_count )
279298 sys .stdout .flush ()
299+
280300 return output
281301
282302 def get_dataframe (self , * args , ** kwargs ):
@@ -307,6 +327,7 @@ def get_crystals(self, search={}, phases=(), flavor='pmg'):
307327
308328 crystals = []
309329 for crystal_struct in self .get_data (search , phases , fields = {'S' :['cell_abc' , 'sg_n' , 'setting' , 'basis_noneq' , 'els_noneq' ]}):
330+
310331 crobj = self .compile_crystal (crystal_struct , flavor )
311332 if crobj is not None :
312333 crystals .append (crobj )
@@ -319,7 +340,7 @@ def compile_crystal(datarow, flavor='pmg'):
319340 Helper method for representing the MPDS crystal structures in two flavors:
320341 either as a Pymatgen Structure object, or as an ASE Atoms object.
321342
322- Attention #1. Disordered structures (i.e . fractional indices in the chemical formulae)
343+ Attention #1. Disordered structures (e.g . fractional indices in the chemical formulae)
323344 are not supported by this method, and hence the occupancies are not retrieved.
324345 Currently it's up to the user to take care of that (see e.g.
325346 https://doi.org/10.1186/s13321-016-0129-3 etc.).
0 commit comments