4
4
)
5
5
from collections .abc import (
6
6
Mapping ,
7
- Sequence ,
8
7
)
9
8
from datetime import (
10
9
datetime ,
18
17
ClassVar ,
19
18
Generic ,
20
19
Optional ,
20
+ Sequence ,
21
+ Tuple ,
22
+ Type ,
21
23
TypeVar ,
22
24
Union ,
25
+ get_args ,
23
26
)
24
27
25
28
import attr
40
43
SimpleSourceSpec ,
41
44
SourceRef ,
42
45
)
46
+ from azul .openapi import (
47
+ schema ,
48
+ )
43
49
from azul .time import (
44
50
format_dcp2_datetime ,
45
51
parse_dcp2_datetime ,
@@ -228,15 +234,24 @@ def __str__(self) -> str:
228
234
return f'aggregate for { self .entity } '
229
235
230
236
237
+ # The native type of the field in documents as they are being created by a
238
+ # transformer or processed by an aggregator.
231
239
N = TypeVar ('N' )
232
240
241
+ # The type of the field in a document just before it's being written to the
242
+ # index. Think "translated type".
233
243
T = TypeVar ('T' , bound = AnyJSON )
234
244
235
245
236
246
class FieldType (Generic [N , T ], metaclass = ABCMeta ):
237
247
shadowed : bool = False
238
248
es_sort_mode : str = 'min'
239
249
allow_sorting_by_empty_lists : bool = True
250
+ operators : Tuple [str , ...] = ('is' ,)
251
+
252
+ def __init__ (self , native_type : Type [N ], translated_type : Type [T ]):
253
+ self .native_type = native_type
254
+ self .translated_type = translated_type
240
255
241
256
@property
242
257
@abstractmethod
@@ -254,12 +269,16 @@ def from_index(self, value: T) -> N:
254
269
def to_tsv (self , value : N ) -> str :
255
270
return '' if value is None else str (value )
256
271
272
+ @property
273
+ def api_type (self ) -> JSON :
274
+ return schema .make_type (self .native_type )
275
+
257
276
258
277
class PassThrough (Generic [T ], FieldType [T , T ]):
259
278
allow_sorting_by_empty_lists = False
260
279
261
- def __init__ (self , * , es_type : Optional [str ]):
262
- super ().__init__ ()
280
+ def __init__ (self , translated_type , * , es_type : Optional [str ]):
281
+ super ().__init__ (translated_type , translated_type )
263
282
self ._es_type = es_type
264
283
265
284
@property
@@ -272,21 +291,78 @@ def to_index(self, value: T) -> T:
272
291
def from_index (self , value : T ) -> T :
273
292
return value
274
293
294
+ @property
295
+ def operators (self ) -> Tuple [str , ...]:
296
+ if self .native_type == int :
297
+ return 'is' , 'within'
298
+ else :
299
+ return 'is' ,
300
+
275
301
276
- pass_thru_str : PassThrough [str ] = PassThrough (es_type = 'keyword' )
277
- pass_thru_int : PassThrough [int ] = PassThrough (es_type = 'long' )
278
- pass_thru_bool : PassThrough [bool ] = PassThrough (es_type = 'boolean' )
302
+ pass_thru_str : PassThrough [str ] = PassThrough (str , es_type = 'keyword' )
303
+ pass_thru_int : PassThrough [int ] = PassThrough (int , es_type = 'long' )
304
+ pass_thru_bool : PassThrough [bool ] = PassThrough (bool , es_type = 'boolean' )
279
305
# FIXME: change the es_type for JSON to `nested`
280
306
# https://github.com/DataBiosphere/azul/issues/2621
281
- pass_thru_json : PassThrough [JSON ] = PassThrough (es_type = None )
307
+ pass_thru_json : PassThrough [JSON ] = PassThrough (JSON , es_type = None )
308
+
309
+
310
+ class ClosedRange (PassThrough [JSON ]):
311
+ operators = ('is' , 'within' , 'contains' , 'intersects' )
312
+ valid_keys = {'gte' , 'lte' }
313
+
314
+ def __init__ (self , translated_type ):
315
+ super ().__init__ (translated_type , es_type = None )
316
+
317
+ def to_index (self , value : T ) -> T :
318
+ assert self .valid_keys == value .keys (), value
319
+ return super ().to_index (value )
320
+
321
+ def from_index (self , value : T ) -> T :
322
+ assert self .valid_keys == value .keys (), value
323
+ return super ().from_index (value )
324
+
325
+ @property
326
+ def api_type (self ) -> JSON :
327
+ return schema .make_type (int )
328
+
329
+
330
+ closed_range = ClosedRange (JSON )
331
+
332
+
333
+ class Nullable (FieldType [Optional [N ], T ]):
282
334
335
+ def __init__ (self , native_type_ : Type [N ], translated_type : Type [T ]) -> None :
336
+ super ().__init__ (Optional [native_type_ ], translated_type )
337
+ self .native_type_ = native_type_
283
338
284
- class NullableString (FieldType [Optional [str ], str ]):
339
+ @property
340
+ @abstractmethod
341
+ def es_type (self ) -> Optional [str ]:
342
+ raise NotImplementedError
343
+
344
+ @abstractmethod
345
+ def to_index (self , value : N ) -> T :
346
+ raise NotImplementedError
347
+
348
+ @abstractmethod
349
+ def from_index (self , value : T ) -> N :
350
+ raise NotImplementedError
351
+
352
+ @property
353
+ def api_type (self ) -> JSON :
354
+ return schema .make_type (self .native_type_ )
355
+
356
+
357
+ class NullableString (Nullable [str , str ]):
285
358
# Note that the replacement values for `None` used for each data type
286
359
# ensure that `None` values are placed at the end of a sorted list.
287
360
null_string = '~null'
288
361
es_type = 'keyword'
289
362
363
+ def __init__ (self ):
364
+ super ().__init__ (str , str )
365
+
290
366
def to_index (self , value : Optional [str ]) -> str :
291
367
return self .null_string if value is None else value
292
368
@@ -298,16 +374,25 @@ def from_index(self, value: str) -> Optional[str]:
298
374
299
375
Number = Union [float , int ]
300
376
301
- N_ = TypeVar ('N_' , bound = Number )
377
+ # `N_` is the same as `N`, except for numeric types. We would specify a bound
378
+ # for this type variable if it weren't for a limitation of the PyCharm type
379
+ # checker: with the bound set, PyCharm does not emit a warning when passing an
380
+ # int to a method of NullableNumber(float).
381
+ N_ = TypeVar ('N_' )
302
382
303
383
304
- class NullableNumber (Generic [N_ ], FieldType [ Optional [ N_ ] , Number ]):
384
+ class NullableNumber (Generic [N_ ], Nullable [ N_ , Number ]):
305
385
shadowed = True
306
386
# Maximum int that can be represented as a 64-bit int and double IEEE
307
387
# floating point number. This prevents loss when converting between the two.
308
388
null_int = sys .maxsize - 1023
309
389
assert null_int == int (float (null_int ))
310
390
es_type = 'long'
391
+ operators = ('is' , 'within' )
392
+
393
+ def __init__ (self , native_type_ : Type [N_ ]) -> None :
394
+ assert issubclass (native_type_ , get_args (Number ))
395
+ super ().__init__ (native_type_ , Number )
311
396
312
397
def to_index (self , value : Optional [N_ ]) -> Number :
313
398
return self .null_int if value is None else value
@@ -316,14 +401,18 @@ def from_index(self, value: Number) -> Optional[N_]:
316
401
return None if value == self .null_int else value
317
402
318
403
319
- null_int : NullableNumber [ int ] = NullableNumber ()
404
+ null_int = NullableNumber (int )
320
405
321
- null_float : NullableNumber [ float ] = NullableNumber ()
406
+ null_float = NullableNumber (float )
322
407
323
408
324
409
class NullableBool (NullableNumber [bool ]):
325
410
shadowed = False
326
411
es_type = 'boolean'
412
+ operators = ('is' ,)
413
+
414
+ def __init__ (self ):
415
+ super ().__init__ (bool )
327
416
328
417
def to_index (self , value : Optional [bool ]) -> Number :
329
418
value = {False : 0 , True : 1 , None : None }[value ]
@@ -334,10 +423,10 @@ def from_index(self, value: Number) -> Optional[bool]:
334
423
return {0 : False , 1 : True , None : None }[value ]
335
424
336
425
337
- null_bool : NullableBool = NullableBool ()
426
+ null_bool = NullableBool ()
338
427
339
428
340
- class NullableDateTime (FieldType [ Optional [ str ] , str ]):
429
+ class NullableDateTime (Nullable [ str , str ]):
341
430
es_type = 'date'
342
431
null = format_dcp2_datetime (datetime (9999 , 1 , 1 , tzinfo = timezone .utc ))
343
432
@@ -355,16 +444,26 @@ def from_index(self, value: str) -> Optional[str]:
355
444
return value
356
445
357
446
358
- null_datetime : NullableDateTime = NullableDateTime ()
447
+ null_datetime : NullableDateTime = NullableDateTime (str , str )
359
448
360
449
361
450
class Nested (PassThrough [JSON ]):
362
451
properties : Mapping [str , FieldType ]
363
452
364
453
def __init__ (self , ** properties ):
365
- super ().__init__ (es_type = 'nested' )
454
+ super ().__init__ (JSON , es_type = 'nested' )
366
455
self .properties = properties
367
456
457
+ @property
458
+ def api_type (self ) -> JSON :
459
+ properties = dict ()
460
+ for field , field_type in self .properties .items ():
461
+ if isinstance (field_type , Nullable ):
462
+ properties [field ] = schema .optional (field_type .native_type_ )
463
+ else :
464
+ properties [field ] = field_type .native_type
465
+ return schema .object (** properties )
466
+
368
467
369
468
FieldTypes4 = Union [Mapping [str , FieldType ], Sequence [FieldType ], FieldType ]
370
469
FieldTypes3 = Union [Mapping [str , FieldTypes4 ], Sequence [FieldType ], FieldType ]
0 commit comments