Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ permissions:

env:
UV_HTTP_TIMEOUT: 60
PYTHONFAULTHANDLER: 1

jobs:
pre-flight:
Expand Down Expand Up @@ -70,6 +71,7 @@ jobs:
pyproject.toml
docker/**
tests/**
uv.lock
base_sha: ${{ steps.base-ref.outputs.base }}

- name: Check if docs only
Expand Down Expand Up @@ -183,7 +185,7 @@ jobs:
uv add InternVideo/InternVideo2/multi_modality
FOLDER="${{ matrix.folder }}"
FOLDER="${FOLDER/stages-/stages/}"
uv run coverage run --branch --source=nemo_curator -m pytest -v "tests/$FOLDER" -m "not gpu"
uv run coverage run --branch --source=nemo_curator -m pytest -vv -s "tests/$FOLDER" -m "not gpu"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change originates from commit bb2daef with message "add verbose logging in pytest; DO NOT MERGE". The -vv -s flags increase pytest verbosity and disable output capture, which may significantly increase CI log volume. Verify this debugging configuration should be merged to main. Should these verbose pytest flags be included in main, or were they intended for debugging only?


- name: Generate report
id: check
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ deduplication_cuda12 = [
"pylibraft-cu12==25.10.*",
"raft-dask-cu12==25.10.*",
"rapidsmpf-cu12==25.10.*",
"scikit-learn<1.8.0", # cuml 25.10 is not compatible with sklearn 1.8+
]

audio_cpu = [
Expand Down Expand Up @@ -166,7 +167,7 @@ test = [
"pytest-asyncio",
"pytest-cov",
"pytest-loguru",
"scikit-learn",
"scikit-learn<1.8.0", # cuml 25.10 is not compatible with sklearn 1.8+
"s3fs", # added for testing cloud fs
]

Expand Down
10 changes: 8 additions & 2 deletions tests/backends/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,12 @@ def test_ray_data_execution_plan(self):
"""Test that Ray Data creates the expected execution plan with correct stage organization."""
if self.backend_cls != RayDataExecutor:
pytest.skip("Execution plan test only applies to RayDataExecutor")
from packaging import version
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move import to top of file with other imports (line 24)

Suggested change
from packaging import version
import ray

Then add at line 24 after other imports:

from packaging import version

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!


if version.parse(ray.__version__) >= version.parse("2.53.0"):
streaming_partitioning_stage = "StreamingRepartition[num_rows_per_block=1]"
else:
streaming_partitioning_stage = "StreamingRepartition"

# Look for execution plan in logs with multiple possible patterns
matches = re.findall(r"Execution plan of Dataset.*?:\s*(.+)", self.all_logs, re.MULTILINE)
Expand All @@ -205,10 +211,10 @@ def test_ray_data_execution_plan(self):
expected_stages = [
"InputDataBuffer[Input]",
"TaskPoolMapOperator[MapBatches(FilePartitioningStageTask)]",
"TaskPoolMapOperator[StreamingRepartition]",
f"TaskPoolMapOperator[{streaming_partitioning_stage}]",
"ActorPoolMapOperator[MapBatches(JsonlReaderStageTask)->MapBatches(AddLengthStageActor)]",
"ActorPoolMapOperator[MapBatches(SplitIntoRowsStageActor)]",
"TaskPoolMapOperator[StreamingRepartition]",
f"TaskPoolMapOperator[{streaming_partitioning_stage}]",
"ActorPoolMapOperator[MapBatches(AddLengthStageActor)]",
"ActorPoolMapOperator[MapBatches(StageWithSetupActor)]",
"TaskPoolMapOperator[MapBatches(JsonlWriterTask)]",
Expand Down
Loading
Loading