Skip to content

Commit ad75603

Browse files
apronchenkovcopybara-github
authored andcommitted
Change the default value kd.map_py(fn, ..., include_missing=False)
Set the default value of `include_missing` to `False`, so that, by default, the function `fn` is applied only for items present in all `args` and `kwargs`. PiperOrigin-RevId: 714964283 Change-Id: Ic82580783c00c4df2cd03a405a3eaa44885d4110
1 parent 78049c6 commit ad75603

File tree

5 files changed

+31
-54
lines changed

5 files changed

+31
-54
lines changed

py/koladata/ext/py_cloudpickle_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_apply_py(self):
5656

5757
def test_map_py(self):
5858
def f(x):
59-
return x + 1 if x is not None else None
59+
return x + 1
6060

6161
x = ds([[1, 2, None, 4], [None, None], [7, 8, 9]])
6262
res = kd.map_py(py_cloudpickle.py_cloudpickle(f), x)

py/koladata/functor/functor_factories.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -448,9 +448,9 @@ def map_py_fn(
448448
schema: The schema to use for resulting DataSlice.
449449
max_threads: maximum number of threads to use.
450450
ndim: Dimensionality of items to pass to `f`.
451-
include_missing: Specifies whether `f` should be applied to the missing
452-
items. By default, the function is applied to all items including the
453-
missing. `include_missing=False` can only be used with `ndim=0`.
451+
include_missing: Specifies whether `f` applies to all items (`=True`) or
452+
only to items present in all `args` and `kwargs` (`=False`, valid only
453+
when `ndim=0`); defaults to `False` when `ndim=0`.
454454
**defaults: Keyword defaults to pass to the function. The values in this map
455455
may be kde expressions, format strings, or 0-dim DataSlices. See the
456456
docstring for py_fn for more details.

py/koladata/functor/functor_factories_test.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -459,16 +459,12 @@ def test_py_fn_explicit_return_type(self):
459459

460460
def test_fstr_fn_simple(self):
461461
testing.assert_equal(
462-
kd.call(
463-
functor_factories.fstr_fn(f'{I.x:s} {I.y:s}'), x=1, y=2
464-
),
462+
kd.call(functor_factories.fstr_fn(f'{I.x:s} {I.y:s}'), x=1, y=2),
465463
ds('1 2'),
466464
)
467465

468466
testing.assert_equal(
469-
kd.call(
470-
functor_factories.fstr_fn(f'{(I.x + I.y):s}'), x=1, y=2
471-
),
467+
kd.call(functor_factories.fstr_fn(f'{(I.x + I.y):s}'), x=1, y=2),
472468
ds('3'),
473469
)
474470

@@ -483,21 +479,15 @@ def test_fstr_fn_expr(self):
483479

484480
def test_fstr_fn_variable(self):
485481
testing.assert_equal(
486-
kd.call(
487-
functor_factories.fstr_fn(f'{V.x:s} {I.y:s}', x=1), y=2
488-
),
482+
kd.call(functor_factories.fstr_fn(f'{V.x:s} {I.y:s}', x=1), y=2),
489483
ds('1 2'),
490484
)
491485

492486
def test_fstr_fn_no_substitutions(self):
493-
with self.assertRaisesRegex(
494-
ValueError, 'FString has nothing to format'
495-
):
487+
with self.assertRaisesRegex(ValueError, 'FString has nothing to format'):
496488
_ = kd.call(functor_factories.fstr_fn('abc'))
497489

498-
with self.assertRaisesRegex(
499-
ValueError, 'FString has nothing to format'
500-
):
490+
with self.assertRaisesRegex(ValueError, 'FString has nothing to format'):
501491
# we need to use fstring to avoid the error (f'{I.x}')
502492
_ = kd.call(functor_factories.fstr_fn('{I.x}'), x=1)
503493

@@ -779,7 +769,7 @@ def fn(x):
779769
kd.call(
780770
functor_factories.map_py_fn(fn), x=kdi.slice([1, None, None])
781771
).to_py(),
782-
[2, -1, -1],
772+
[2, None, None],
783773
)
784774
self.assertEqual(
785775
kd.call(

py/koladata/operators/py.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -552,9 +552,9 @@ def my_expansion(x):
552552
schema: The schema to use for resulting DataSlice.
553553
max_threads: maximum number of threads to use.
554554
ndim: Dimensionality of items to pass to `fn`.
555-
include_missing: Specifies whether `fn` should be computed to the missing
556-
items. By default, the function is applied to all items including the
557-
missing. `include_missing=False` can only be used with `ndim=0`.
555+
include_missing: Specifies whether `fn` applies to all items (`=True`) or
556+
only to items present in all `args` and `kwargs` (`=False`, valid only
557+
when `ndim=0`); defaults to `False` when `ndim=0`.
558558
item_completed_callback: A callback that will be called after each item is
559559
processed. It will be called in the original thread that called `map_py`
560560
in case `max_threads` is greater than 1, as we rely on this property for
@@ -573,8 +573,8 @@ def my_expansion(x):
573573
include_missing, param_name='include_missing'
574574
)
575575
if include_missing is None:
576-
include_missing = True
577-
if not include_missing and ndim != 0:
576+
include_missing = ndim != 0
577+
elif not include_missing and ndim != 0:
578578
raise ValueError('`include_missing=False` can only be used with `ndim=0`')
579579
if not args and not kwargs:
580580
raise TypeError('expected at least one input DataSlice, got none')

py/koladata/operators/tests/py_map_py_test.py

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class PyMapPyTest(parameterized.TestCase):
4242

4343
def test_map_py_single_arg(self):
4444
def add_one(x):
45-
return x + 1 if x is not None else None
45+
return x + 1
4646

4747
x = ds([[1, 2, None, 4], [None, None], [7, 8, 9]])
4848
res = expr_eval.eval(kde.py.map_py(add_one, x))
@@ -118,8 +118,6 @@ def add_one(x):
118118

119119
def test_map_py_multi_args(self):
120120
def add_all(x, y, z):
121-
if x is None or y is None or z is None:
122-
return None
123121
return x + y + z
124122

125123
val1 = ds([[1, 2, None, 4], [None, None], [7, 8, 9]])
@@ -132,7 +130,7 @@ def add_all(x, y, z):
132130

133131
def test_map_py_texting_output(self):
134132
def as_string(x):
135-
return str(x) if x is not None else None
133+
return str(x)
136134

137135
val = ds([[1, 2, None, 4], [None, None], [7, 8, 9]])
138136
res = expr_eval.eval(kde.py.map_py(as_string, val))
@@ -142,7 +140,7 @@ def as_string(x):
142140

143141
def test_map_py_texting_input(self):
144142
def as_string(x):
145-
return int(x) if x is not None else None
143+
return int(x)
146144

147145
val = ds([['1', '2', None, '4'], [None, None], ['7', '8', '9']])
148146
res = expr_eval.eval(kde.py.map_py(as_string, val))
@@ -152,7 +150,7 @@ def as_string(x):
152150

153151
def test_map_py_with_qtype(self):
154152
def add_one(x):
155-
return x + 1 if x is not None else None
153+
return x + 1
156154

157155
val = ds([[1, 2, None, 4], [None, None], [7, 8, 9]])
158156
res = expr_eval.eval(
@@ -175,17 +173,13 @@ def test_map_py_with_schema(self):
175173
)
176174

177175
def my_func_dynamic_schema(x):
178-
return None if x is None else functions.new(u=x, v=x + 1)
176+
return functions.new(u=x, v=x + 1)
179177

180178
def my_func_any_schema(x):
181-
return (
182-
None
183-
if x is None
184-
else functions.new(u=x, v=x + 1, schema=schema_constants.ANY)
185-
)
179+
return functions.new(u=x, v=x + 1, schema=schema_constants.ANY)
186180

187181
def my_func_correct_schema(x):
188-
return None if x is None else functions.new(u=x, v=x + 1, schema=schema)
182+
return functions.new(u=x, v=x + 1, schema=schema)
189183

190184
res = expr_eval.eval(
191185
kde.py.map_py(my_func_correct_schema, val, schema=schema)
@@ -246,7 +240,7 @@ def my_func_correct_schema(x):
246240
)
247241

248242
def my_func_same_bag(schema, x):
249-
return None if x is None else db.new(u=x, v=x + 1, schema=schema)
243+
return db.new(u=x, v=x + 1, schema=schema)
250244

251245
res = expr_eval.eval(
252246
kde.py.map_py(
@@ -368,16 +362,14 @@ def my_fn(x):
368362

369363
def test_map_py_scalar_input(self):
370364
def add_one(x):
371-
return x + 1 if x is not None else None
365+
return x + 1
372366

373367
val = ds(5)
374368
res = expr_eval.eval(kde.py.map_py(add_one, val))
375369
testing.assert_equal(res.no_bag(), ds(6))
376370

377371
def test_map_py_auto_expand(self):
378372
def my_add(x, y):
379-
if x is None or y is None:
380-
return None
381373
return x + y
382374

383375
val1 = ds(1)
@@ -387,8 +379,6 @@ def my_add(x, y):
387379

388380
def test_map_py_raw_input(self):
389381
def my_add(x, y):
390-
if x is None or y is None:
391-
return None
392382
return x + y
393383

394384
res = expr_eval.eval(
@@ -398,7 +388,7 @@ def my_add(x, y):
398388

399389
def test_map_py_dict(self):
400390
def as_dict(x):
401-
return {'x': x, 'y': x + 1 if x is not None else 57}
391+
return {'x': x, 'y': x + 1}
402392

403393
val = ds([[1, 2, None, 4], [None, None], [7, 8, 9]])
404394
res = expr_eval.eval(kde.py.map_py(as_dict, val))
@@ -408,7 +398,9 @@ def as_dict(x):
408398
)
409399
testing.assert_equal(
410400
res['y'].no_bag(),
411-
ds([[2, 3, 57, 5], [57, 57], [8, 9, 10]], schema_constants.OBJECT),
401+
ds(
402+
[[2, 3, None, 5], [None, None], [8, 9, 10]], schema_constants.OBJECT
403+
),
412404
)
413405

414406
def test_map_py_invalid_qtype(self):
@@ -422,8 +414,6 @@ def as_set(x):
422414

423415
def test_map_py_incompatible_inputs(self):
424416
def add_x_y(x, y):
425-
if x is None or y is None:
426-
return None
427417
return x + y
428418

429419
val1 = ds([[1, 2, None, 4], [None, None], [7, 8, 9]])
@@ -439,8 +429,6 @@ def add_x_y(x, y):
439429

440430
def test_map_py_kwargs(self):
441431
def my_fn(x, y, z=2, **kwargs):
442-
if x is None or y is None or z is None:
443-
return None
444432
return x + y + z + kwargs.get('w', 5)
445433

446434
x = ds([[0, 0, 1], [None, 1, 0]])
@@ -531,13 +519,12 @@ def test_map_py_expanded_results(self):
531519
with self.subTest('expand_one_dim'):
532520

533521
def ranges(x):
534-
return list(range(x or 0))
522+
return list(range(x))
535523

536524
res = expr_eval.eval(kde.py.map_py(ranges, val))
537525
self.assertEqual(res.get_ndim(), 2)
538526
self.assertEqual(
539-
res.to_py(),
540-
[[[0], [0, 1], []], [[0, 1, 2, 3], [0, 1, 2, 3, 4]]],
527+
res.to_py(), [[[0], [0, 1], None], [[0, 1, 2, 3], [0, 1, 2, 3, 4]]]
541528
)
542529

543530
with self.subTest('expand_several_dims'):
@@ -549,7 +536,7 @@ def expnd(x):
549536
self.assertEqual(res.get_ndim(), 2)
550537
self.assertEqual(
551538
res.to_py(),
552-
[[[[1, -1]], [[2, -1]], [[None, -1]]], [[[4, -1]], [[5, -1]]]],
539+
[[[[1, -1]], [[2, -1]], None], [[[4, -1]], [[5, -1]]]],
553540
)
554541

555542
with self.subTest('agg_and_expand'):

0 commit comments

Comments
 (0)