Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
- Added the ability to set active tools on a per-sample basis. See the PR for more details: https://github.com/allenai/open-instruct/pull/1382
- Added a new changelog Github Action that makes sure you contribute to the changelog! https://github.com/allenai/open-instruct/pull/1276
- Now, we type check `open_instruct/dataset_transformation.py` (https://github.com/allenai/open-instruct/pull/1390).
- Added a GRPO implementation that uses olmo-core (https://github.com/allenai/open-instruct/pull/1389).
- Added a linter rule that imports go at the top of the file (https://github.com/allenai/open-instruct/pull/1394).

### Changed
Expand Down
14 changes: 7 additions & 7 deletions open_instruct/benchmark_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import vllm
from ray.util import queue as ray_queue

from open_instruct import data_loader, dataset_transformation, grpo_fast, logger_utils, model_utils, utils, vllm_utils
from open_instruct import data_loader, dataset_transformation, grpo_utils, logger_utils, model_utils, utils, vllm_utils
from open_instruct.actor_manager import ActorManager
from open_instruct.data_types import PromptRequest

Expand Down Expand Up @@ -211,7 +211,7 @@ def free_all_gpu_memory(device: int | str = 0) -> None:


def setup_dataset(
args: grpo_fast.Args,
args: grpo_utils.ExperimentConfig,
streaming_config: data_loader.StreamingDataLoaderConfig,
tokenizer_config: dataset_transformation.TokenizerConfig,
) -> datasets.Dataset:
Expand Down Expand Up @@ -244,7 +244,7 @@ def setup_dataset(


def setup_vllm_engines(
args: grpo_fast.Args,
args: grpo_utils.ExperimentConfig,
streaming_config: data_loader.StreamingDataLoaderConfig,
vllm_config: data_loader.VLLMConfig,
tokenizer_config: dataset_transformation.TokenizerConfig,
Expand Down Expand Up @@ -292,7 +292,7 @@ def setup_vllm_engines(


def simulate_weight_sync(
actor_manager: ray.actor.ActorHandle, vllm_engines: list[ray.actor.ActorHandle], args: grpo_fast.Args
actor_manager: ray.actor.ActorHandle, vllm_engines: list[ray.actor.ActorHandle], args: grpo_utils.ExperimentConfig
) -> float:
"""Simulate weight sync by pausing all actors.

Expand Down Expand Up @@ -363,7 +363,7 @@ def run_benchmark(
param_prompt_Q: ray_queue.Queue,
inference_results_Q: ray_queue.Queue,
actor_manager: ray.actor.ActorHandle,
args: grpo_fast.Args,
args: grpo_utils.ExperimentConfig,
streaming_config: data_loader.StreamingDataLoaderConfig,
vllm_config: data_loader.VLLMConfig,
model_config: model_utils.ModelConfig,
Expand Down Expand Up @@ -670,7 +670,7 @@ def main() -> None:
# Parse arguments using ArgumentParserPlus
parser = utils.ArgumentParserPlus(
(
grpo_fast.Args,
grpo_utils.ExperimentConfig,
dataset_transformation.TokenizerConfig,
model_utils.ModelConfig,
data_loader.StreamingDataLoaderConfig,
Expand All @@ -680,7 +680,7 @@ def main() -> None:

args, tokenizer_config, model_config, streaming_config, vllm_config = cast(
tuple[
grpo_fast.Args,
grpo_utils.ExperimentConfig,
dataset_transformation.TokenizerConfig,
model_utils.ModelConfig,
data_loader.StreamingDataLoaderConfig,
Expand Down
2 changes: 1 addition & 1 deletion open_instruct/ground_truth_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ class RewardConfig:
apply_r1_style_format_reward: bool = False
r1_style_format_reward: float = 1.0
apply_verifiable_reward: bool = True
verification_reward: int = 10
verification_reward: float = 10.0
non_stop_penalty: bool = False
non_stop_penalty_value: float = -10.0
only_reward_good_outputs: bool = False
Expand Down
Loading
Loading