Skip to content

Commit 59cf052

Browse files
committed
[evaluation] fix comments for Video-mme dataset support
This commit fixes comments for Video-mme dataset support TICO-DCO-1.0-Signed-off-by: Evgenii Maltsev <e.maltsev@samsung.com>
1 parent 7359f32 commit 59cf052

7 files changed

Lines changed: 318 additions & 92 deletions

File tree

test/quantization/recipes/test_video_mme_evaluation.py

Lines changed: 159 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,9 @@ def fake_print_lmms_eval_results(results):
9090
processor=MagicMock(),
9191
device="cpu",
9292
use_cache="/tmp/cache",
93-
cache_dir="/tmp/hf_cache",
9493
)
9594

9695
self.assertEqual(captured["use_cache"], "/tmp/cache")
97-
self.assertEqual(captured["cache_dir"], "/tmp/hf_cache")
9896

9997

10098
class TestLmmsEvalUtils(unittest.TestCase):
@@ -109,22 +107,10 @@ def test_build_model_args_infers_qwen3_vl(self):
109107
model_name_str = "Qwen/Qwen3-VL-2B-Instruct"
110108
model.config._name_or_path = model_name_str
111109

112-
processor = MagicMock()
113-
processor.tokenizer.name_or_path = model_name_str
114-
115-
model_name, model_args = _build_model_args(
116-
model, processor, device="cuda", batch_size=2, max_new_tokens=16
117-
)
110+
model_name, model_args = _build_model_args(model)
118111

119112
self.assertEqual(model_name, "qwen3_vl")
120113
self.assertEqual(model_args["pretrained"], model_name_str)
121-
# Only 'pretrained' is in model_args; batch_size, device,
122-
# max_new_tokens, and tokenizer are NOT (they'd cause errors
123-
# with lmms-eval model constructors like Qwen3_VL).
124-
self.assertNotIn("batch_size", model_args)
125-
self.assertNotIn("device", model_args)
126-
self.assertNotIn("max_new_tokens", model_args)
127-
self.assertNotIn("tokenizer", model_args)
128114

129115
def test_build_model_args_passes_max_num_frames(self):
130116
"""_build_model_args should include max_num_frames in model_args."""
@@ -134,14 +120,8 @@ def test_build_model_args_passes_max_num_frames(self):
134120
type(model).__name__ = "Qwen3VLForConditionalGeneration"
135121
model.config._name_or_path = "Qwen/Qwen3-VL-2B-Instruct"
136122

137-
processor = MagicMock()
138-
139123
model_name, model_args = _build_model_args(
140124
model,
141-
processor,
142-
device="cuda",
143-
batch_size=1,
144-
max_new_tokens=16,
145125
max_num_frames=5,
146126
)
147127

@@ -164,7 +144,7 @@ def test_get_custom_tasks_dir_finds_lmms_tasks(self):
164144

165145
tasks_dir = _get_custom_tasks_dir()
166146
self.assertIsNotNone(tasks_dir)
167-
self.assertTrue(tasks_dir.endswith("lmms_tasks"))
147+
self.assertTrue(tasks_dir.endswith("lmms_tasks")) # type: ignore[union-attr]
168148

169149
def test_print_results_fallback(self):
170150
"""Fallback printer should handle float and non-float values."""
@@ -205,16 +185,18 @@ def test_limit_1_downloads_1_chunk(self):
205185
self.assertIn("videos_chunked_01.zip", patterns)
206186
self.assertNotIn("videos_chunked_02.zip", patterns)
207187

208-
def test_limit_41_downloads_2_chunks(self):
209-
"""A limit of 41 (> _SAMPLES_PER_CHUNK) should download 2 chunks."""
188+
def test_limit_61_downloads_3_chunks(self):
189+
"""A limit of 61 (> 2*_SAMPLES_PER_CHUNK) should download 3 chunks."""
210190
from tico.quantization.evaluation.lmms_eval_utils import (
211191
_compute_video_chunk_patterns,
212192
)
213193

214-
patterns = _compute_video_chunk_patterns(limit=41)
194+
# _SAMPLES_PER_CHUNK = 30, so ceil(61/30) = 3 chunks
195+
patterns = _compute_video_chunk_patterns(limit=61)
215196
self.assertIn("videos_chunked_01.zip", patterns)
216197
self.assertIn("videos_chunked_02.zip", patterns)
217-
self.assertNotIn("videos_chunked_03.zip", patterns)
198+
self.assertIn("videos_chunked_03.zip", patterns)
199+
self.assertNotIn("videos_chunked_04.zip", patterns)
218200

219201
def test_limit_none_downloads_all_chunks(self):
220202
"""No limit (None) should download all 20 chunks."""
@@ -268,6 +250,144 @@ def test_non_videomme_repo_passes_through(self):
268250
pass
269251

270252

253+
class TestGetDownloadedVideommeChunks(unittest.TestCase):
254+
"""Tests for _get_downloaded_videomme_chunks."""
255+
256+
def test_returns_empty_set_when_no_cache(self):
257+
"""Should return empty set when HF cache dir doesn't exist."""
258+
# With a non-existent HF_HOME, should return empty set
259+
import os
260+
261+
from tico.quantization.evaluation.lmms_eval_utils import (
262+
_get_downloaded_videomme_chunks,
263+
)
264+
265+
original_hf = os.environ.get("HF_HOME")
266+
try:
267+
os.environ["HF_HOME"] = "/tmp/nonexistent_hf_cache_12345"
268+
result = _get_downloaded_videomme_chunks()
269+
self.assertIsInstance(result, set)
270+
self.assertEqual(len(result), 0)
271+
finally:
272+
if original_hf is not None:
273+
os.environ["HF_HOME"] = original_hf
274+
else:
275+
os.environ.pop("HF_HOME", None)
276+
277+
def test_finds_chunks_in_fake_cache(self):
278+
"""Should find chunk zips in a fake cache directory."""
279+
import os
280+
import tempfile
281+
282+
from tico.quantization.evaluation.lmms_eval_utils import (
283+
_get_downloaded_videomme_chunks,
284+
)
285+
286+
with tempfile.TemporaryDirectory() as tmpdir:
287+
# Create the expected directory structure
288+
repo_dir = os.path.join(tmpdir, "hub", "datasets--lmms-lab--Video-MME")
289+
snap_dir = os.path.join(repo_dir, "snapshots", "abc123")
290+
os.makedirs(snap_dir)
291+
292+
# Create fake chunk files
293+
for name in ["videos_chunked_01.zip", "videos_chunked_02.zip"]:
294+
with open(os.path.join(snap_dir, name), "w") as f:
295+
f.write("fake zip content")
296+
297+
# Create a non-chunk file that should be ignored
298+
with open(os.path.join(snap_dir, "subtitle.zip"), "w") as f:
299+
f.write("fake subtitle")
300+
301+
original_hf = os.environ.get("HF_HOME")
302+
try:
303+
os.environ["HF_HOME"] = tmpdir
304+
result = _get_downloaded_videomme_chunks()
305+
self.assertIn("videos_chunked_01.zip", result)
306+
self.assertIn("videos_chunked_02.zip", result)
307+
self.assertNotIn("subtitle.zip", result)
308+
self.assertEqual(len(result), 2)
309+
finally:
310+
if original_hf is not None:
311+
os.environ["HF_HOME"] = original_hf
312+
else:
313+
os.environ.pop("HF_HOME", None)
314+
315+
316+
class TestEnsureVideommeChunksDownloaded(unittest.TestCase):
317+
"""Tests for _ensure_videomme_chunks_downloaded."""
318+
319+
def test_skips_download_when_all_chunks_cached(self):
320+
"""Should not call snapshot_download when all needed chunks are cached."""
321+
import os
322+
import tempfile
323+
from unittest.mock import MagicMock, patch
324+
325+
from tico.quantization.evaluation.lmms_eval_utils import (
326+
_ensure_videomme_chunks_downloaded,
327+
)
328+
329+
with tempfile.TemporaryDirectory() as tmpdir:
330+
# Create the cache with chunk 01 already present
331+
repo_dir = os.path.join(tmpdir, "hub", "datasets--lmms-lab--Video-MME")
332+
snap_dir = os.path.join(repo_dir, "snapshots", "abc123")
333+
os.makedirs(snap_dir)
334+
with open(os.path.join(snap_dir, "videos_chunked_01.zip"), "w") as f:
335+
f.write("fake")
336+
337+
original_hf = os.environ.get("HF_HOME")
338+
try:
339+
os.environ["HF_HOME"] = tmpdir
340+
341+
with patch("huggingface_hub.snapshot_download") as mock_dl:
342+
_ensure_videomme_chunks_downloaded(limit=1)
343+
# Should NOT call snapshot_download since chunk 01 is cached
344+
mock_dl.assert_not_called()
345+
finally:
346+
if original_hf is not None:
347+
os.environ["HF_HOME"] = original_hf
348+
else:
349+
os.environ.pop("HF_HOME", None)
350+
351+
def test_downloads_missing_chunks(self):
352+
"""Should download only missing chunks when some are cached."""
353+
import os
354+
import tempfile
355+
from unittest.mock import patch
356+
357+
from tico.quantization.evaluation.lmms_eval_utils import (
358+
_ensure_videomme_chunks_downloaded,
359+
)
360+
361+
with tempfile.TemporaryDirectory() as tmpdir:
362+
# Create the cache with chunk 01 already present
363+
repo_dir = os.path.join(tmpdir, "hub", "datasets--lmms-lab--Video-MME")
364+
snap_dir = os.path.join(repo_dir, "snapshots", "abc123")
365+
os.makedirs(snap_dir)
366+
with open(os.path.join(snap_dir, "videos_chunked_01.zip"), "w") as f:
367+
f.write("fake")
368+
369+
original_hf = os.environ.get("HF_HOME")
370+
try:
371+
os.environ["HF_HOME"] = tmpdir
372+
373+
with patch("huggingface_hub.snapshot_download") as mock_dl:
374+
# limit=31 needs 2 chunks (01 and 02), but 01 is cached
375+
_ensure_videomme_chunks_downloaded(limit=31)
376+
mock_dl.assert_called_once()
377+
call_kwargs = mock_dl.call_args
378+
allow_patterns = call_kwargs.kwargs.get(
379+
"allow_patterns"
380+
) or call_kwargs[1].get("allow_patterns")
381+
# Should only download chunk 02 (01 is cached)
382+
self.assertIn("videos_chunked_02.zip", allow_patterns)
383+
self.assertNotIn("videos_chunked_01.zip", allow_patterns)
384+
finally:
385+
if original_hf is not None:
386+
os.environ["HF_HOME"] = original_hf
387+
else:
388+
os.environ.pop("HF_HOME", None)
389+
390+
271391
class TestVerboseFlagPropagation(unittest.TestCase):
272392
"""Tests for verbose flag propagation via LMMS_VERBOSE env var."""
273393

@@ -386,20 +506,25 @@ def test_process_docs_filters_by_available_videos(self):
386506
finally:
387507
vm_utils._data_dir = original_data_dir
388508

389-
def test_verbose_flag_controls_print(self):
390-
"""Print statements should be suppressed when LMMS_VERBOSE is not set."""
509+
def test_is_verbose_reflects_runtime_env_changes(self):
510+
"""_is_verbose() should reflect runtime changes to LMMS_VERBOSE env var."""
391511
import os
392512

393-
from tico.quantization.evaluation.lmms_tasks.videomme_mini import (
394-
utils as vm_utils,
513+
from tico.quantization.evaluation.lmms_tasks.videomme_mini.utils import (
514+
_is_verbose,
395515
)
396516

397517
# Ensure verbose is off
398518
os.environ.pop("LMMS_VERBOSE", None)
399-
# Re-evaluate _VERBOSE by reimporting the module is tricky,
400-
# so we just test the _VERBOSE flag directly.
401-
# Since _VERBOSE is evaluated at import time, we test the env var logic.
402-
self.assertFalse(os.getenv("LMMS_VERBOSE", "").lower() in ("1", "true", "yes"))
519+
self.assertFalse(_is_verbose())
520+
521+
# Set verbose on at runtime – _is_verbose() should pick it up immediately
522+
os.environ["LMMS_VERBOSE"] = "1"
523+
self.assertTrue(_is_verbose())
524+
525+
# Turn it off again
526+
os.environ.pop("LMMS_VERBOSE", None)
527+
self.assertFalse(_is_verbose())
403528

404529

405530
if __name__ == "__main__":

0 commit comments

Comments
 (0)