finish

hiyouga · hiyouga · commit ca02c5d0182e · 2025-06-12T11:56:56.000Z
diff --git a/.github/requirements-test.txt b/.github/requirements-test.txt
@@ -0,0 +1,6 @@
+datasets
+pillow
+pytest
+ruff
+torch
+transformers
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,6 +1,7 @@
 name: tests
 
 on:
+  workflow_dispatch:
   push:
     branches:
       - "main"
@@ -28,6 +29,10 @@ jobs:
 
     runs-on: ${{ matrix.os }}
 
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python-version }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -37,13 +42,22 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
           cache: "pip"
-          cache-dependency-path: "setup.py"
+          cache-dependency-path: "**/requirements*.txt"
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install ruff
+          python -m pip install -r .github/requirements-test.txt --index-url https://download.pytorch.org/whl/cpu
+          python -m pip install --no-deps .
 
       - name: Check quality
         run: |
           make style && make quality
+
+      - name: Check license
+        run: |
+          make license
+
+      - name: Test with pytest
+        run: |
+          make test
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
-.PHONY: build commit quality style
+.PHONY: build commit license quality style test
 
-check_dirs := examples scripts verl setup.py
+check_dirs := examples scripts tests verl setup.py
 
 build:
 	python3 setup.py sdist bdist_wheel
@@ -9,10 +9,16 @@ commit:
 	pre-commit install
 	pre-commit run --all-files
 
+license:
+	python3 tests/check_license.py $(check_dirs)
+
 quality:
 	ruff check $(check_dirs)
 	ruff format --check $(check_dirs)
 
 style:
 	ruff check $(check_dirs) --fix
 	ruff format $(check_dirs)
+
+test:
+	pytest -vv tests/
diff --git a/examples/config.yaml b/examples/config.yaml
@@ -13,8 +13,8 @@ data:
   override_chat_template: null
   shuffle: true
   seed: 1
-  max_pixels: 4194304
   min_pixels: 262144
+  max_pixels: 4194304
   filter_overlong_prompts: true
 
 algorithm:
diff --git a/tests/check_license.py b/tests/check_license.py
@@ -0,0 +1,39 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from pathlib import Path
+
+
+KEYWORDS = ("Copyright", "2024", "Bytedance")
+
+
+def main():
+    path_list: list[Path] = []
+    for check_dir in sys.argv[1:]:
+        path_list.extend(Path(check_dir).glob("**/*.py"))
+
+    for path in path_list:
+        with open(path.absolute(), encoding="utf-8") as f:
+            file_content = f.read().strip().split("\n")
+            license = "\n".join(file_content[:5])
+            if not license:
+                continue
+
+            print(f"Check license: {path}")
+            assert all(keyword in license for keyword in KEYWORDS), f"File {path} does not contain license."
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -0,0 +1,60 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from PIL.Image import Image
+
+from verl.utils.dataset import RLHFDataset
+from verl.utils.tokenizer import get_processor, get_tokenizer
+
+
+def test_image_dataset():
+    tokenizer = get_tokenizer("Qwen/Qwen2.5-VL-7B-Instruct", use_fast=True)
+    processor = get_processor("Qwen/Qwen2.5-VL-7B-Instruct", use_fast=True)
+    dataset = RLHFDataset(
+        data_path="hiyouga/geometry3k@test",
+        tokenizer=tokenizer,
+        processor=processor,
+        prompt_key="problem",
+        answer_key="answer",
+        image_key="images",
+        max_prompt_length=16,
+        truncation="right",
+        filter_overlong_prompts=False,
+    )
+    token_ids = [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 151652, 151655]
+    assert set(dataset[0].keys()) == {
+        "problem",
+        "ground_truth",
+        "input_ids",
+        "attention_mask",
+        "position_ids",
+        "raw_prompt_ids",
+        "multi_modal_data",
+    }
+    assert dataset[0]["problem"] == (
+        "<image>Chords $\\overline{A C}$ and $\\overline{D F}$ are equidistant from the center. "
+        "If the radius of $\\odot G$ is 26 find $A C$"
+    )
+    assert dataset[0]["ground_truth"] == "48"
+    assert torch.all(dataset[0]["input_ids"] == torch.tensor(token_ids))
+    assert torch.all(dataset[0]["attention_mask"] == torch.ones(16))
+    assert torch.all(dataset[0]["position_ids"] == torch.arange(16).unsqueeze(0).expand(3, -1))
+    assert list(dataset[0]["position_ids"].size()) == [3, 16]  # avoid fake positive caused by broadcasting
+    assert dataset[0]["raw_prompt_ids"] == token_ids
+    assert isinstance(dataset[0]["multi_modal_data"]["images"][0], Image)
+
+
+if __name__ == "__main__":
+    test_image_dataset()
diff --git a/verl/protocol.py b/verl/protocol.py
@@ -384,11 +384,11 @@ def pop(
         meta_info_keys = meta_info_keys or []
 
         tensors = {}
-        for key in batch_keys and key in self.batch:
+        for key in filter(lambda k: k in self.batch, batch_keys):
             tensors[key] = self.batch.pop(key)
 
         non_tensors = {}
-        for key in non_tensor_batch_keys and key in self.non_tensor_batch:
+        for key in filter(lambda k: k in self.non_tensor_batch, non_tensor_batch_keys):
             non_tensors[key] = self.non_tensor_batch.pop(key)
 
         meta_info = {}
diff --git a/verl/trainer/config.py b/verl/trainer/config.py
@@ -47,8 +47,8 @@ class DataConfig:
     override_chat_template: Optional[str] = None
     shuffle: bool = True
     seed: int = 1
-    max_pixels: int = 4194304
-    min_pixels: int = 262144
+    min_pixels: Optional[int] = 262144
+    max_pixels: Optional[int] = 4194304
     filter_overlong_prompts: bool = True
 
     def post_init(self):
diff --git a/verl/trainer/ray_trainer.py b/verl/trainer/ray_trainer.py
@@ -475,16 +475,14 @@ def fit(self):
                     break
 
                 metrics, timing_raw = {}, {}
-                batch: DataProto = DataProto.from_single_dict(batch_dict)
-                batch.meta_info = {
-                    "min_pixels": self.config.data.min_pixels,
-                    "max_pixels": self.config.data.max_pixels,
-                }
+                meta_info = {"min_pixels": self.config.data.min_pixels, "max_pixels": self.config.data.max_pixels}
+                batch: DataProto = DataProto.from_single_dict(batch_dict, meta_info=meta_info)
 
                 # pop those keys for generation
                 gen_batch = batch.pop(
                     batch_keys=["input_ids", "attention_mask", "position_ids"],
                     non_tensor_batch_keys=["raw_prompt_ids", "multi_modal_data"],
+                    meta_info_keys=["min_pixels", "max_pixels"],
                 )
                 with timer("step", timing_raw):
                     # generate a batch
diff --git a/verl/utils/dataset.py b/verl/utils/dataset.py
@@ -50,7 +50,9 @@ def collate_fn(features: List[Dict[str, Any]]) -> Dict[str, Any]:
     return {**tensors, **non_tensors}
 
 
-def process_image(image: Union[Dict[str, Any], ImageObject, str], min_pixels: int, max_pixels: int) -> ImageObject:
+def process_image(
+    image: Union[Dict[str, Any], ImageObject, str], min_pixels: Optional[int], max_pixels: Optional[int]
+) -> ImageObject:
     if isinstance(image, str):
         image = Image.open(image)
     elif isinstance(image, dict):
@@ -59,12 +61,12 @@ def process_image(image: Union[Dict[str, Any], ImageObject, str], min_pixels: in
         image = Image.open(BytesIO(image))
 
     image.load()  # avoid "Too many open files" errors
-    if (image.width * image.height) > max_pixels:
+    if max_pixels is not None and (image.width * image.height) > max_pixels:
         resize_factor = math.sqrt(max_pixels / (image.width * image.height))
         width, height = int(image.width * resize_factor), int(image.height * resize_factor)
         image = image.resize((width, height))
 
-    if (image.width * image.height) < min_pixels:
+    if min_pixels is not None and (image.width * image.height) < min_pixels:
         resize_factor = math.sqrt(min_pixels / (image.width * image.height))
         width, height = int(image.width * resize_factor), int(image.height * resize_factor)
         image = image.resize((width, height))
@@ -92,8 +94,8 @@ def __init__(
         max_prompt_length: int = 1024,
         truncation: str = "error",
         format_prompt: Optional[str] = None,
-        max_pixels: Optional[int] = None,
         min_pixels: Optional[int] = None,
+        max_pixels: Optional[int] = None,
         filter_overlong_prompts: bool = True,
     ):
         self.tokenizer = tokenizer
@@ -104,8 +106,8 @@ def __init__(
         self.image_dir = image_dir
         self.max_prompt_length = max_prompt_length
         self.truncation = truncation
-        self.max_pixels = max_pixels
         self.min_pixels = min_pixels
+        self.max_pixels = max_pixels
         self.filter_overlong_prompts = filter_overlong_prompts
 
         if "@" in data_path:
@@ -169,17 +171,16 @@ def __getitem__(self, index):
         if self.image_key in example:
             prompt = self.processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
             images = example.pop(self.image_key)
-            if self.image_dir is not None and len(images) != 0 and isinstance(images[0], str): # image paths
+            if self.image_dir is not None and len(images) != 0 and isinstance(images[0], str):  # image paths
                 images = [os.path.join(self.image_dir, image) for image in images]
 
             resized_images = [
-                process_image(image, min_pixels=self.min_pixels, max_pixels=self.max_pixels)
-                for image in images
+                process_image(image, min_pixels=self.min_pixels, max_pixels=self.max_pixels) for image in images
             ]
             model_inputs = self.processor(resized_images, [prompt], add_special_tokens=False, return_tensors="pt")
             input_ids = model_inputs.pop("input_ids")[0]
             attention_mask = model_inputs.pop("attention_mask")[0]
-            example["multi_modal_inputs"] = {"images": images}
+            example["multi_modal_data"] = {"images": images}
         else:
             prompt = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
             model_inputs = self.tokenizer([prompt], add_special_tokens=False, return_tensors="pt")
diff --git a/verl/workers/fsdp_workers.py b/verl/workers/fsdp_workers.py
diff --git a/verl/workers/reward/__init__.py b/verl/workers/reward/__init__.py
diff --git a/verl/workers/rollout/vllm_rollout_spmd.py b/verl/workers/rollout/vllm_rollout_spmd.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +datasets
 +pillow
 +pytest
 +ruff
 +torch
 +transformers