-
Notifications
You must be signed in to change notification settings - Fork 220
Open
Labels
bugSomething isn't workingSomething isn't workinghelp wantedExtra attention is neededExtra attention is neededskyrl-train
Description
Currently we are skipping the verifiers tests, since there were some major refactors to the verifiers API that change the way we should use the library. We should update this to keep the verifiers integration running!
Example of the error that happens when running
uv run --isolated --extra dev --extra vllm --with verifiers pytest tests/gpu/gpu_ci/test_verifiers_generator.py
Processing 2 groups (2 total rollouts): 0%| | 0/2 [00:00<?, ?it/s, reward=?]2026-01-07 23:08:15.054 | DEBUG | skyrl_train.inference_engines.inference_engine_client:_chat_completion_with_retry:324 - /chat/completions request sent (including potential retries): {'messages': [{'role': 'user', 'content': 'You are playing Wordle. Think step-by-step and propose the next guess based on previous feedback.'}], 'model': 'Qwen/Qwen2.5-1.5B-Instruct', 'logprobs': True, 'max_completion_tokens': 256, 'n': 1, 'temperature': 0.7, 'top_logprobs': 1, 'top_p': 0.95, 'return_tokens_as_token_ids': True, 'min_tokens': 1, 'skip_special_tokens': True, 'include_stop_str_in_output': True, 'top_k': -1, 'min_p': 0.0}
2026-01-07 23:08:15.055 | DEBUG | skyrl_train.inference_engines.inference_engine_client:_chat_completion_with_retry:324 - /chat/completions request sent (including potential retries): {'messages': [{'role': 'user', 'content': 'You are playing Wordle. Think step-by-step and propose the next guess based on previous feedback.'}], 'model': 'Qwen/Qwen2.5-1.5B-Instruct', 'logprobs': True, 'max_completion_tokens': 256, 'n': 1, 'temperature': 0.7, 'top_logprobs': 1, 'top_p': 0.95, 'return_tokens_as_token_ids': True, 'min_tokens': 1, 'skip_special_tokens': True, 'include_stop_str_in_output': True, 'top_k': -1, 'min_p': 0.0}
(AsyncVLLMInferenceEngine pid=137452) INFO 01-07 23:08:15 [chat_utils.py:560] Detected the chat template content format to be 'string'. You can set `--chat-template-content-format` to override this.
Processing 2 groups (2 total rollouts): 0%| | 0/2 [00:05<?, ?it/s, reward=?]
FAILED
=================================================================== FAILURES ====================================================================
________________________________________________________ test_verifiers_e2e_wordle_http _________________________________________________________
verifiers_runtime = {'client': <skyrl_train.inference_engines.inference_engine_client.InferenceEngineClient object at 0x779d37984d10>, 'ht...51664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
}
)}
@pytest.mark.asyncio
async def test_verifiers_e2e_wordle_http(verifiers_runtime):
rt = verifiers_runtime
> out = await _run_verifiers_end_to_end(
existing_client=rt["client"],
model=rt["model"],
num_prompts=2,
max_input_length=2048,
max_generate_length=256,
http_host="127.0.0.1",
http_port=rt["http_port"],
existing_tokenizer=rt["tokenizer"],
)
tests/gpu/gpu_ci/test_verifiers_generator.py:145:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/gpu/gpu_ci/test_verifiers_generator.py:138: in _run_verifiers_end_to_end
output = await generator.generate(input_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
integrations/verifiers/verifiers_generator.py:88: in generate
generate_outputs: GenerateOutputs = await vf_env.generate(
../../../.cache/uv/archive-v0/0f5ExAcj7AVy1HvdV3H9I/lib/python3.12/site-packages/verifiers/envs/environment.py:878: in generate
group_states = await coro
^^^^^^^^^^
../../../anaconda3/lib/python3.12/asyncio/tasks.py:631: in _wait_for_one
return f.result() # May raise f.exception().
^^^^^^^^^^
../../../.cache/uv/archive-v0/0f5ExAcj7AVy1HvdV3H9I/lib/python3.12/site-packages/verifiers/envs/environment.py:717: in run_group
group_states = await asyncio.gather(*rollout_tasks)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
../../../.cache/uv/archive-v0/0f5ExAcj7AVy1HvdV3H9I/lib/python3.12/site-packages/verifiers/envs/environment.py:687: in run_rollout
state = await self.rollout(
../../../.cache/uv/archive-v0/0f5ExAcj7AVy1HvdV3H9I/lib/python3.12/site-packages/verifiers/envs/multiturn_env.py:147: in rollout
await self.add_model_response(state, prompt_messages, response)
../../../.cache/uv/archive-v0/0f5ExAcj7AVy1HvdV3H9I/lib/python3.12/site-packages/verifiers/envs/multiturn_env.py:109: in add_model_response
tokens = await parse_response_tokens(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
response = ChatCompletion(id='chatcmpl-8f705375c79f48bd9246c7ffcf68ac32', choices=[Choice(finish_reason='length', index=0, logpro...tokens_details=None, prompt_tokens_details=None), prompt_logprobs=None, prompt_token_ids=None, kv_transfer_params=None)
message_type = 'chat', max_seq_len = None
async def parse_response_tokens(
response: ModelResponse, message_type: MessageType, max_seq_len: int | None = None
) -> TrajectoryStepTokens | None:
if message_type == "chat":
assert isinstance(response, ChatCompletion)
assert len(response.choices) == 1, "Response should always have one choice"
if not hasattr(response.choices[0], "token_ids"):
return None
if not hasattr(response, "prompt_token_ids"):
return None
if not hasattr(response.choices[0], "logprobs"):
return None
if response.choices[0].logprobs is None:
return None
has_logprobs_obj = (
hasattr(response.choices[0].logprobs, "content")
and response.choices[0].logprobs.content is not None
)
has_logprobs_dict = (
isinstance(response.choices[0].logprobs, dict)
and "content" in response.choices[0].logprobs.keys()
and response.choices[0].logprobs["content"] is not None
)
if not (has_logprobs_obj or has_logprobs_dict):
return None
prompt_ids = getattr(response, "prompt_token_ids")
> prompt_mask = [0] * len(prompt_ids)
^^^^^^^^^^^^^^^
E TypeError: object of type 'NoneType' has no len()Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workinghelp wantedExtra attention is neededExtra attention is neededskyrl-train