Skip to content

Commit 8693d33

Browse files
abhinavmuk04meta-codesync[bot]
authored andcommitted
Add map_trim_values UDF (facebookincubator#15973)
Summary: Pull Request resolved: facebookincubator#15973 Implements a new Velox-only function `map_trim_values(map(K, array(V)), n)` that trims the value arrays in a map to a specified maximum size. This is useful for optimizing memory usage and performance for large feature maps where value arrays may grow unbounded. Behavior: - If n >= 0: Each value array is trimmed to at most n elements - If n < 0: The original map is returned unchanged - Null elements in arrays are preserved in the output The implementation follows the simple function pattern used by other map functions like `map_subset`. Reviewed By: zacw7 Differential Revision: D90476691 fbshipit-source-id: 35ee713ca0dbf55857f189086311cea2919d3911
1 parent 15c37e9 commit 8693d33

File tree

6 files changed

+567
-0
lines changed

6 files changed

+567
-0
lines changed

velox/docs/functions/presto/map.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,24 @@ Map Functions
192192
SELECT map_top_n(map(ARRAY['a', 'b', 'c'], ARRAY[2, 3, 1]), 2) --- {'b' -> 3, 'a' -> 2}
193193
SELECT map_top_n(map(ARRAY['a', 'b', 'c'], ARRAY[NULL, 3, NULL]), 2) --- {'b' -> 3, 'c' -> NULL}
194194

195+
.. function:: map_trim_values(map(K, array(V)), n) -> map(K, array(V))
196+
197+
Trims the value arrays in a map to a specified maximum size.
198+
This function is useful for optimizing memory usage and performance for large feature maps
199+
where the value arrays may grow unbounded.
200+
201+
Returns a map where each value array is trimmed to at most n elements.
202+
If n is negative, returns the original map unchanged.
203+
If n is 0, returns a map where all values are empty arrays.
204+
If a value array has fewer than n elements, it is left unchanged.
205+
Null elements in the arrays are preserved in the output. ::
206+
207+
SELECT map_trim_values(MAP(ARRAY['a', 'b'], ARRAY[ARRAY[1, 2, 3], ARRAY[4, 5, 6, 7]]), 2); -- {a -> [1, 2], b -> [4, 5]}
208+
SELECT map_trim_values(MAP(ARRAY['a'], ARRAY[ARRAY[1, 2]]), 5); -- {a -> [1, 2]}
209+
SELECT map_trim_values(MAP(ARRAY['a'], ARRAY[ARRAY[1, NULL, 3]]), 2); -- {a -> [1, NULL]}
210+
SELECT map_trim_values(MAP(ARRAY['a'], ARRAY[ARRAY[1, 2, 3]]), 0); -- {a -> []}
211+
SELECT map_trim_values(MAP(ARRAY['a'], ARRAY[ARRAY[1, 2, 3]]), -1); -- {a -> [1, 2, 3]}
212+
195213
.. function:: map_keys_by_top_n_values(map(K,V), n) -> array(K)
196214

197215
Returns an array of the top N keys from a map. Keeps only the top N elements by value. Keys are used to break ties with the max key being chosen. Both keys and values should be orderable.

velox/expression/fuzzer/ExpressionFuzzerTest.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ std::unordered_set<std::string> skipFunctionsSOT = {
304304
"map_keys_overlap", // Velox-only function, not available in Presto
305305
"map_append", // Velox-only function, not available in Presto
306306
"map_update", // Velox-only function, not available in Presto
307+
"map_trim_values", // Velox-only function, not available in Presto
307308
"noisy_empty_approx_set_sfm", // non-deterministic because of privacy.
308309
// https://github.com/facebookincubator/velox/issues/11034
309310
"cast(real) -> varchar",
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <algorithm>
19+
#include <cstddef>
20+
#include <cstdint>
21+
22+
#include "velox/expression/ComplexViewTypes.h"
23+
#include "velox/functions/Macros.h"
24+
25+
namespace facebook::velox::functions {
26+
27+
/// map_trim_values(map(K, array(V)), n) -> map(K, array(V))
28+
///
29+
/// Trims the value arrays in a map to a specified maximum size.
30+
/// This function is useful for optimizing memory usage and performance
31+
/// for large feature maps where the value arrays may grow unbounded.
32+
///
33+
/// Returns a map where each value array is trimmed to at most n elements.
34+
/// If n is negative, returns the original map unchanged.
35+
/// If n is 0, returns a map where all values are empty arrays.
36+
/// If a value array has fewer than n elements, it is left unchanged.
37+
/// Null map values are preserved in the output.
38+
/// Null elements within arrays are also preserved.
39+
template <typename TExec>
40+
struct MapTrimValuesFunction {
41+
VELOX_DEFINE_FUNCTION_TYPES(TExec);
42+
43+
void call(
44+
out_type<Map<Generic<T1>, Array<Generic<T2>>>>& out,
45+
const arg_type<Map<Generic<T1>, Array<Generic<T2>>>>& inputMap,
46+
int64_t n) {
47+
// If n is negative, preserve the original map as-is
48+
if (n < 0) {
49+
out.copy_from(inputMap);
50+
return;
51+
}
52+
53+
for (const auto& entry : inputMap) {
54+
if (!entry.second.has_value()) {
55+
auto& keyWriter = out.add_null();
56+
keyWriter.copy_from(entry.first);
57+
} else {
58+
auto [keyWriter, valueWriter] = out.add_item();
59+
keyWriter.copy_from(entry.first);
60+
61+
const auto& valueArray = entry.second.value();
62+
const auto arraySize = static_cast<size_t>(valueArray.size());
63+
const auto trimSize = std::min(static_cast<size_t>(n), arraySize);
64+
65+
size_t count = 0;
66+
for (const auto& element : valueArray) {
67+
if (count >= trimSize) {
68+
break;
69+
}
70+
if (element.has_value()) {
71+
auto& elementWriter = valueWriter.add_item();
72+
elementWriter.copy_from(element.value());
73+
} else {
74+
valueWriter.add_null();
75+
}
76+
++count;
77+
}
78+
}
79+
}
80+
}
81+
};
82+
83+
} // namespace facebook::velox::functions

velox/functions/prestosql/registration/MapFunctionsRegistration.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "velox/functions/prestosql/MapTopN.h"
3131
#include "velox/functions/prestosql/MapTopNKeys.h"
3232
#include "velox/functions/prestosql/MapTopNValues.h"
33+
#include "velox/functions/prestosql/MapTrimValues.h"
3334
#include "velox/functions/prestosql/MapUpdate.h"
3435
#include "velox/functions/prestosql/MapValuesInRange.h"
3536
#include "velox/functions/prestosql/MultimapFromEntries.h"
@@ -355,6 +356,12 @@ void registerMapFunctions(const std::string& prefix) {
355356
Map<Generic<T1>, double>,
356357
double,
357358
double>({prefix + "map_values_in_range"});
359+
360+
registerFunction<
361+
MapTrimValuesFunction,
362+
Map<Generic<T1>, Array<Generic<T2>>>,
363+
Map<Generic<T1>, Array<Generic<T2>>>,
364+
int64_t>({prefix + "map_trim_values"});
358365
}
359366

360367
void registerMapAllowingDuplicates(

velox/functions/prestosql/tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ add_executable(
9393
MapTopNTest.cpp
9494
MapTopNKeysTest.cpp
9595
MapTopNValuesTest.cpp
96+
MapTrimValuesTest.cpp
9697
MapKeysAndValuesTest.cpp
9798
MapKeysByTopNValuesTest.cpp
9899
MapMatchTest.cpp

0 commit comments

Comments
 (0)