Skip to content
7 changes: 7 additions & 0 deletions metaflow/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,13 @@ def compress_list(lst, separator=",", rangedelim=":", zlibmarker="!", zlibmin=50


def decompress_list(lststr, separator=",", rangedelim=":", zlibmarker="!"):
# Handle the empty-list round-trip: compress_list([]) == "" so we must
# return [] without touching lststr[0] (which would raise IndexError).
# Note: This introduces a minor ambiguity where both compress_list([])
# and compress_list([""]) return "". Decompressing "" now consistently
# returns [], favoring the empty list.
if lststr == "":
return []
Comment thread
greptile-apps[bot] marked this conversation as resolved.
# Three input modes:
if lststr[0] == zlibmarker:
# 3. zlib-compressed, base64-encoded
Expand Down
51 changes: 51 additions & 0 deletions test/unit/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from metaflow.util import compress_list, decompress_list


def test_compress_decompress_empty_list():
# round-trip must not raise and must return []
assert decompress_list(compress_list([])) == []


def test_decompress_empty_string():
# direct empty-string input
assert decompress_list("") == []
Comment thread
greptile-apps[bot] marked this conversation as resolved.


def test_compress_decompress_single_element():
lst = ["abc"]
assert decompress_list(compress_list(lst)) == lst


def test_compress_decompress_plain_csv():
lst = ["a", "b", "c"]
assert decompress_list(compress_list(lst)) == lst


def test_compress_decompress_prefix_encoded():
# Test with a longer list that triggers prefix encoding if applicable
# or just test with a manual prefix string
# Prefix encoding (Mode 2)
lst = ["test_1", "test_2", "test_3"]
# Round-trip test for a list with a shared prefix ("test_") - tests Mode 2 indirectly
compressed = compress_list(lst)
assert decompress_list(compressed) == lst
Comment thread
greptile-apps[bot] marked this conversation as resolved.


def test_compress_decompress_zlib():
# Test with a very long list to trigger zlib compression (Mode 3)
lst = [str(i) for i in range(1000)]
compressed = compress_list(lst)
ZLIB_MARKER = "!"
assert compressed.startswith(ZLIB_MARKER)
assert decompress_list(compressed) == lst


def test_compress_empty_string_element_ambiguity():
# Document the current behavior/limitation:
# both [] and [""] compress to ""
assert compress_list([]) == ""
assert compress_list([""]) == ""
# decompressing "" returns []
assert decompress_list("") == []
Comment thread
greptile-apps[bot] marked this conversation as resolved.
# Known limitation: round-trip for [""] loses data (returns [] instead of [""])
assert decompress_list(compress_list([""])) == []
Loading