Skip to content

Commit de6b3ce

Browse files
authored
Handle large text strings as binary attributes (#122)
1 parent 1ceb24d commit de6b3ce

File tree

4 files changed

+56
-11
lines changed

4 files changed

+56
-11
lines changed

tests/test_client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import v3io.dataplane.output
2828
import v3io.dataplane.response
2929
import v3io.logger
30+
from v3io.dataplane.kv_large_string import LARGE_STRING_MIN_SIZE
3031

3132

3233
class Test(unittest.TestCase):
@@ -431,6 +432,7 @@ def _get_float_array():
431432
"array_with_ints": _get_int_array(),
432433
"array_with_floats": _get_float_array(),
433434
"now": datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc),
435+
"large_string": "a" * 10 * LARGE_STRING_MIN_SIZE,
434436
}
435437
}
436438

v3io/dataplane/kv_large_string.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright 2024 Iguazio
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
import zlib
17+
18+
LARGE_STRING_MIN_SIZE = 60000
19+
20+
prefix = b"_v3io_large_string"
21+
22+
23+
def is_large_bstring(attribute_value):
24+
return attribute_value[: len(prefix)] == prefix
25+
26+
27+
def large_bstring_to_string(attribute_value):
28+
compressed_value = attribute_value[len(prefix) :]
29+
return zlib.decompress(compressed_value).decode("utf-8")
30+
31+
32+
def string_to_large_bstring(attribute_value):
33+
bvalue = zlib.compress(attribute_value.encode("utf-8"))
34+
return prefix + bvalue

v3io/dataplane/output.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import v3io.dataplane.kv_array
2020
import v3io.dataplane.kv_timestamp
21+
from v3io.dataplane.kv_large_string import is_large_bstring, large_bstring_to_string
2122

2223

2324
class Output(object):
@@ -33,12 +34,14 @@ def _decode_typed_attributes(self, typed_attributes):
3334
decoded_attribute = float(attribute_value)
3435
elif attribute_type == "B":
3536
decoded_attribute = base64.b64decode(attribute_value)
36-
37-
# try to decode as an array
38-
try:
39-
decoded_attribute = v3io.dataplane.kv_array.decode(decoded_attribute)
40-
except BaseException:
41-
pass
37+
if is_large_bstring(decoded_attribute):
38+
decoded_attribute = large_bstring_to_string(decoded_attribute)
39+
else:
40+
# try to decode as an array
41+
try:
42+
decoded_attribute = v3io.dataplane.kv_array.decode(decoded_attribute)
43+
except BaseException:
44+
pass
4245

4346
elif attribute_type == "S":
4447
if type(attribute_value) in [float, int]:

v3io/dataplane/request.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
import v3io.common.helpers
3030
import v3io.dataplane.kv_array
3131
import v3io.dataplane.kv_timestamp
32+
from v3io.dataplane.kv_large_string import (
33+
LARGE_STRING_MIN_SIZE,
34+
string_to_large_bstring,
35+
)
3236

3337
#
3438
# Request
@@ -419,12 +423,14 @@ def _dict_to_typed_attributes(d):
419423
attribute_type = type(value)
420424
type_value = None
421425

422-
if isinstance(value, future.utils.text_type):
423-
type_key = "S"
424-
type_value = value
425-
elif isinstance(value, future.utils.string_types):
426-
type_key = "S"
426+
if isinstance(value, future.utils.text_type) or isinstance(value, future.utils.string_types):
427427
type_value = str(value)
428+
if len(value) > LARGE_STRING_MIN_SIZE:
429+
type_key = "B"
430+
type_value = string_to_large_bstring(type_value)
431+
type_value = base64.b64encode(type_value)
432+
else:
433+
type_key = "S"
428434
elif attribute_type in [int, float]:
429435
type_key = "N"
430436
type_value = str(value)

0 commit comments

Comments
 (0)