Skip to content

Commit bcc78d6

Browse files
amarjanduachave11-ucsc
authored andcommitted
[1/2] Spec for filters argument should be driven by field types (#2254)
This commit breaks azul.openapi.schema.make_type and everything that depends on it. (@hannes-ucsc)
1 parent 034f4fd commit bcc78d6

File tree

5 files changed

+161
-36
lines changed

5 files changed

+161
-36
lines changed

lambdas/service/app.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
HealthController,
6262
)
6363
from azul.indexer.document import (
64+
FieldType,
6465
Nested,
6566
)
6667
from azul.logging import (
@@ -1087,28 +1088,41 @@ def list_catalogs():
10871088
termFacets=generic_object_spec
10881089
)
10891090

1091+
1092+
def generate_operator_spec(field_type: FieldType) -> Sequence[JSON]:
1093+
array_schema = schema.array({},
1094+
items=schema.array(field_type.api_type,
1095+
minItems=2,
1096+
maxItems=2),
1097+
minItems=1,
1098+
maxItems=16)
1099+
operator_spec = []
1100+
for operator in field_type.operators:
1101+
if operator == 'is':
1102+
operator_spec.append(
1103+
schema.object(is_=schema.array(field_type.api_type))
1104+
)
1105+
else:
1106+
operator_spec.append(
1107+
schema.object_type(additionalProperties=False,
1108+
properties={operator: array_schema})
1109+
)
1110+
return operator_spec
1111+
1112+
10901113
filters_param_spec = params.query(
10911114
'filters',
10921115
schema.optional(application_json(schema.object_type(
10931116
default='{}',
10941117
example={'cellCount': {'within': [[10000, 1000000000]]}},
10951118
properties={
10961119
field: {
1097-
'oneOf': [
1098-
schema.object(is_=schema.array({})),
1099-
*(
1100-
schema.object_type({
1101-
op: schema.array({}, minItems=2, maxItems=2)
1102-
})
1103-
for op in ['contains', 'within', 'intersects']
1104-
)
1105-
]
1120+
'oneOf': generate_operator_spec(field_type)
11061121
}
1107-
for field in app.fields
1122+
for field, field_type
1123+
in app.repository_controller.field_types(app.catalog).items()
11081124
}
11091125
))),
1110-
# FIXME: Spec for `filters` argument should be driven by field types
1111-
# https://github.com/DataBiosphere/azul/issues/2254
11121126
description=format_description('''
11131127
Criteria to filter entities from the search results.
11141128

src/azul/indexer/document.py

Lines changed: 115 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
)
55
from collections.abc import (
66
Mapping,
7-
Sequence,
87
)
98
from datetime import (
109
datetime,
@@ -18,8 +17,12 @@
1817
ClassVar,
1918
Generic,
2019
Optional,
20+
Sequence,
21+
Tuple,
22+
Type,
2123
TypeVar,
2224
Union,
25+
get_args,
2326
)
2427

2528
import attr
@@ -40,6 +43,9 @@
4043
SimpleSourceSpec,
4144
SourceRef,
4245
)
46+
from azul.openapi import (
47+
schema,
48+
)
4349
from azul.time import (
4450
format_dcp2_datetime,
4551
parse_dcp2_datetime,
@@ -228,15 +234,24 @@ def __str__(self) -> str:
228234
return f'aggregate for {self.entity}'
229235

230236

237+
# The native type of the field in documents as they are being created by a
238+
# transformer or processed by an aggregator.
231239
N = TypeVar('N')
232240

241+
# The type of the field in a document just before it's being written to the
242+
# index. Think "translated type".
233243
T = TypeVar('T', bound=AnyJSON)
234244

235245

236246
class FieldType(Generic[N, T], metaclass=ABCMeta):
237247
shadowed: bool = False
238248
es_sort_mode: str = 'min'
239249
allow_sorting_by_empty_lists: bool = True
250+
operators: Tuple[str, ...] = ('is',)
251+
252+
def __init__(self, native_type: Type[N], translated_type: Type[T]):
253+
self.native_type = native_type
254+
self.translated_type = translated_type
240255

241256
@property
242257
@abstractmethod
@@ -254,12 +269,16 @@ def from_index(self, value: T) -> N:
254269
def to_tsv(self, value: N) -> str:
255270
return '' if value is None else str(value)
256271

272+
@property
273+
def api_type(self) -> JSON:
274+
return schema.make_type(self.native_type)
275+
257276

258277
class PassThrough(Generic[T], FieldType[T, T]):
259278
allow_sorting_by_empty_lists = False
260279

261-
def __init__(self, *, es_type: Optional[str]):
262-
super().__init__()
280+
def __init__(self, translated_type, *, es_type: Optional[str]):
281+
super().__init__(translated_type, translated_type)
263282
self._es_type = es_type
264283

265284
@property
@@ -272,21 +291,78 @@ def to_index(self, value: T) -> T:
272291
def from_index(self, value: T) -> T:
273292
return value
274293

294+
@property
295+
def operators(self) -> Tuple[str, ...]:
296+
if self.native_type == int:
297+
return 'is', 'within'
298+
else:
299+
return 'is',
300+
275301

276-
pass_thru_str: PassThrough[str] = PassThrough(es_type='keyword')
277-
pass_thru_int: PassThrough[int] = PassThrough(es_type='long')
278-
pass_thru_bool: PassThrough[bool] = PassThrough(es_type='boolean')
302+
pass_thru_str: PassThrough[str] = PassThrough(str, es_type='keyword')
303+
pass_thru_int: PassThrough[int] = PassThrough(int, es_type='long')
304+
pass_thru_bool: PassThrough[bool] = PassThrough(bool, es_type='boolean')
279305
# FIXME: change the es_type for JSON to `nested`
280306
# https://github.com/DataBiosphere/azul/issues/2621
281-
pass_thru_json: PassThrough[JSON] = PassThrough(es_type=None)
307+
pass_thru_json: PassThrough[JSON] = PassThrough(JSON, es_type=None)
308+
309+
310+
class ClosedRange(PassThrough[JSON]):
311+
operators = ('is', 'within', 'contains', 'intersects')
312+
valid_keys = {'gte', 'lte'}
313+
314+
def __init__(self, translated_type):
315+
super().__init__(translated_type, es_type=None)
316+
317+
def to_index(self, value: T) -> T:
318+
assert self.valid_keys == value.keys(), value
319+
return super().to_index(value)
320+
321+
def from_index(self, value: T) -> T:
322+
assert self.valid_keys == value.keys(), value
323+
return super().from_index(value)
324+
325+
@property
326+
def api_type(self) -> JSON:
327+
return schema.make_type(int)
328+
329+
330+
closed_range = ClosedRange(JSON)
331+
332+
333+
class Nullable(FieldType[Optional[N], T]):
282334

335+
def __init__(self, native_type_: Type[N], translated_type: Type[T]) -> None:
336+
super().__init__(Optional[native_type_], translated_type)
337+
self.native_type_ = native_type_
283338

284-
class NullableString(FieldType[Optional[str], str]):
339+
@property
340+
@abstractmethod
341+
def es_type(self) -> Optional[str]:
342+
raise NotImplementedError
343+
344+
@abstractmethod
345+
def to_index(self, value: N) -> T:
346+
raise NotImplementedError
347+
348+
@abstractmethod
349+
def from_index(self, value: T) -> N:
350+
raise NotImplementedError
351+
352+
@property
353+
def api_type(self) -> JSON:
354+
return schema.make_type(self.native_type_)
355+
356+
357+
class NullableString(Nullable[str, str]):
285358
# Note that the replacement values for `None` used for each data type
286359
# ensure that `None` values are placed at the end of a sorted list.
287360
null_string = '~null'
288361
es_type = 'keyword'
289362

363+
def __init__(self):
364+
super().__init__(str, str)
365+
290366
def to_index(self, value: Optional[str]) -> str:
291367
return self.null_string if value is None else value
292368

@@ -298,16 +374,25 @@ def from_index(self, value: str) -> Optional[str]:
298374

299375
Number = Union[float, int]
300376

301-
N_ = TypeVar('N_', bound=Number)
377+
# `N_` is the same as `N`, except for numeric types. We would specify a bound
378+
# for this type variable if it weren't for a limitation of the PyCharm type
379+
# checker: with the bound set, PyCharm does not emit a warning when passing an
380+
# int to a method of NullableNumber(float).
381+
N_ = TypeVar('N_')
302382

303383

304-
class NullableNumber(Generic[N_], FieldType[Optional[N_], Number]):
384+
class NullableNumber(Generic[N_], Nullable[N_, Number]):
305385
shadowed = True
306386
# Maximum int that can be represented as a 64-bit int and double IEEE
307387
# floating point number. This prevents loss when converting between the two.
308388
null_int = sys.maxsize - 1023
309389
assert null_int == int(float(null_int))
310390
es_type = 'long'
391+
operators = ('is', 'within')
392+
393+
def __init__(self, native_type_: Type[N_]) -> None:
394+
assert issubclass(native_type_, get_args(Number))
395+
super().__init__(native_type_, Number)
311396

312397
def to_index(self, value: Optional[N_]) -> Number:
313398
return self.null_int if value is None else value
@@ -316,14 +401,18 @@ def from_index(self, value: Number) -> Optional[N_]:
316401
return None if value == self.null_int else value
317402

318403

319-
null_int: NullableNumber[int] = NullableNumber()
404+
null_int = NullableNumber(int)
320405

321-
null_float: NullableNumber[float] = NullableNumber()
406+
null_float = NullableNumber(float)
322407

323408

324409
class NullableBool(NullableNumber[bool]):
325410
shadowed = False
326411
es_type = 'boolean'
412+
operators = ('is',)
413+
414+
def __init__(self):
415+
super().__init__(bool)
327416

328417
def to_index(self, value: Optional[bool]) -> Number:
329418
value = {False: 0, True: 1, None: None}[value]
@@ -334,10 +423,10 @@ def from_index(self, value: Number) -> Optional[bool]:
334423
return {0: False, 1: True, None: None}[value]
335424

336425

337-
null_bool: NullableBool = NullableBool()
426+
null_bool = NullableBool()
338427

339428

340-
class NullableDateTime(FieldType[Optional[str], str]):
429+
class NullableDateTime(Nullable[str, str]):
341430
es_type = 'date'
342431
null = format_dcp2_datetime(datetime(9999, 1, 1, tzinfo=timezone.utc))
343432

@@ -355,16 +444,26 @@ def from_index(self, value: str) -> Optional[str]:
355444
return value
356445

357446

358-
null_datetime: NullableDateTime = NullableDateTime()
447+
null_datetime: NullableDateTime = NullableDateTime(str, str)
359448

360449

361450
class Nested(PassThrough[JSON]):
362451
properties: Mapping[str, FieldType]
363452

364453
def __init__(self, **properties):
365-
super().__init__(es_type='nested')
454+
super().__init__(JSON, es_type='nested')
366455
self.properties = properties
367456

457+
@property
458+
def api_type(self) -> JSON:
459+
properties = dict()
460+
for field, field_type in self.properties.items():
461+
if isinstance(field_type, Nullable):
462+
properties[field] = schema.optional(field_type.native_type_)
463+
else:
464+
properties[field] = field_type.native_type
465+
return schema.object(**properties)
466+
368467

369468
FieldTypes4 = Union[Mapping[str, FieldType], Sequence[FieldType], FieldType]
370469
FieldTypes3 = Union[Mapping[str, FieldTypes4], Sequence[FieldType], FieldType]

src/azul/openapi/schema.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,9 @@ def make_type(t: TYPE) -> JSON:
409409
>>> make_type(str)
410410
{'type': 'string'}
411411
412+
>>> make_type(JSON)
413+
{'type': 'object'}
414+
412415
A JSON schema type name may be used instead:
413416
414417
>>> make_type('string')
@@ -427,8 +430,8 @@ def make_type(t: TYPE) -> JSON:
427430
"""
428431
if t is None or isinstance(t, type):
429432
return _primitive_types[t]
430-
# We can't use `JSON` directly because it is generic and parameterized
431-
# but __origin__ yields the unparameterized generic type.
433+
elif t == JSON:
434+
return {'type': 'object'}
432435
elif isinstance(t, str):
433436
return {'type': t}
434437
elif isinstance(t, get_origin(JSON)):

0 commit comments

Comments
 (0)