Skip to content

Commit 5c05fba

Browse files
committed
Add alignment vLLM metrics and opposite-mode runs
1 parent 4f5c1de commit 5c05fba

14 files changed

Lines changed: 1627 additions & 55 deletions

.agents/logbooks/alignment_function.md

Lines changed: 611 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# Copyright The Marin Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""
5+
Debug heterogeneous alignment pipeline with rejected-only opposite prompting.
6+
7+
Chosen / infrastructure roles use Llama 3.3 70B Instruct.
8+
Rejected responses use Mixtral 8x7B Instruct with rejected_prompt_strategy=opposite.
9+
10+
This is the F2 heterogeneous local-local `auto` validation with opposite-mode
11+
rejected responses. The expected response-orchestration shape is:
12+
13+
- separate `chosen` and `rejected` child jobs
14+
- no forced dependency between them
15+
- rejected prompt construction uses the explicit opposite-mode system prompt
16+
17+
Submit to Iris:
18+
19+
uv run iris --config lib/iris/examples/marin.yaml job run \
20+
--no-wait \
21+
--job-name align-debug-vllm-70b-mixtral-rejected-opposite-auto \
22+
--cpu 4 \
23+
--memory 16GB \
24+
--disk 10GB \
25+
--region us-central1 \
26+
-- python experiments/align_debug_vllm_70b_mixtral_rejected_opposite.py
27+
"""
28+
29+
from pathlib import Path
30+
31+
from experiments.llama import llama_70b
32+
from experiments.models import llama_3_3_70b_instruct
33+
from marin.alignment.align import AlignConfig, align
34+
from marin.alignment.generate_responses import RejectedPromptStrategy
35+
from marin.alignment.inference_config import VLLMConfig
36+
from marin.execution.executor import executor_main
37+
38+
SPEC_PATH = str(Path(__file__).parent / "posttrain" / "specs" / "openai_model_spec.jsonl")
39+
LLAMA_70B_GCS_PATH = "gs://marin-us-central1/models/meta-llama--Llama-3-3-70B-Instruct--6f6073b"
40+
MIXTRAL_8X7B_INSTRUCT_GCS_PATH = "gs://marin-us-central1/models/mistralai--Mixtral-8x7B-Instruct-v0-1--eba9230"
41+
42+
llama_vllm = VLLMConfig(
43+
model=LLAMA_70B_GCS_PATH,
44+
tensor_parallel_size=4,
45+
max_model_len=4096,
46+
gpu_memory_utilization=0.9,
47+
tpu_type="v5p-8",
48+
disk="10g",
49+
ram="256g",
50+
)
51+
52+
mixtral_vllm = VLLMConfig(
53+
model=MIXTRAL_8X7B_INSTRUCT_GCS_PATH,
54+
tensor_parallel_size=4,
55+
max_model_len=4096,
56+
gpu_memory_utilization=0.9,
57+
tpu_type="v5p-8",
58+
disk="10g",
59+
ram="256g",
60+
)
61+
62+
align_config = AlignConfig(
63+
ideation_model=llama_vllm,
64+
extract_model=llama_vllm,
65+
judge_model=llama_vllm,
66+
covering_strength=2,
67+
covering_seed=42,
68+
ideation_workers=1,
69+
concretize_workers=1,
70+
extract_workers=1,
71+
prompt_batch_size=4,
72+
understanding_max_tokens=1024,
73+
understanding_temperature=1.0,
74+
concretize_max_tokens=1536,
75+
concretize_temperature=1.0,
76+
concretize_max_attempts=5,
77+
extract_max_tokens=1024,
78+
judge_workers=1,
79+
judge_batch_size=4,
80+
teacher_n=1,
81+
teacher_temperature=0.7,
82+
teacher_max_tokens=512,
83+
rejected_n=1,
84+
rejected_temperature=0.7,
85+
rejected_max_tokens=512,
86+
rejected_prompt_strategy=RejectedPromptStrategy.OPPOSITE,
87+
judge_min_chosen_score=1.0,
88+
judge_min_gap=0.0,
89+
tokenizer="meta-llama/Llama-3.3-70B-Instruct",
90+
statement_ids=["ask_clarifying_questions"],
91+
)
92+
93+
dataset_steps = align(
94+
name="debug_vllm_70b_mixtral_rejected_opposite_auto_smoke",
95+
pretrained_model=llama_3_3_70b_instruct,
96+
spec=SPEC_PATH,
97+
model_config=llama_70b,
98+
teacher_model=llama_vllm,
99+
align_config=align_config,
100+
dpo_config=None,
101+
rejected_model=mixtral_vllm,
102+
tags=["debug", "vllm", "70b", "mixtral-rejected", "opposite-mode"],
103+
)
104+
105+
if __name__ == "__main__":
106+
executor_main(
107+
steps=dataset_steps,
108+
description="Debug heterogeneous auto alignment with Mixtral rejected opposite-mode responses",
109+
)
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Copyright The Marin Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""
5+
Debug heterogeneous alignment pipeline with rejected-only opposite prompting.
6+
7+
This is a fresh metrics-focused one-statement end-to-end rerun of the validated
8+
opposite-mode smoke so we can materialize standardized `vllm_metrics.json`
9+
artifacts without reusing the earlier cached outputs.
10+
11+
Submit to Iris:
12+
13+
uv run iris --config lib/iris/examples/marin.yaml job run \
14+
--no-wait \
15+
--job-name align-debug-vllm-70b-mixtral-rejected-opposite-auto-metrics-fresh \
16+
--cpu 4 \
17+
--memory 16GB \
18+
--disk 10GB \
19+
--region us-central1 \
20+
-- python experiments/align_debug_vllm_70b_mixtral_rejected_opposite_metrics.py
21+
"""
22+
23+
from pathlib import Path
24+
25+
from experiments.llama import llama_70b
26+
from experiments.models import llama_3_3_70b_instruct
27+
from marin.alignment.align import AlignConfig, align
28+
from marin.alignment.generate_responses import RejectedPromptStrategy
29+
from marin.alignment.inference_config import VLLMConfig
30+
from marin.execution.executor import executor_main
31+
32+
SPEC_PATH = str(Path(__file__).parent / "posttrain" / "specs" / "openai_model_spec.jsonl")
33+
LLAMA_70B_GCS_PATH = "gs://marin-us-central1/models/meta-llama--Llama-3-3-70B-Instruct--6f6073b"
34+
MIXTRAL_8X7B_INSTRUCT_GCS_PATH = "gs://marin-us-central1/models/mistralai--Mixtral-8x7B-Instruct-v0-1--eba9230"
35+
DESCRIPTION = "Debug heterogeneous auto alignment with opposite-mode rejected responses and standardized vLLM metrics"
36+
37+
llama_vllm = VLLMConfig(
38+
model=LLAMA_70B_GCS_PATH,
39+
tensor_parallel_size=4,
40+
max_model_len=4096,
41+
gpu_memory_utilization=0.9,
42+
tpu_type="v5p-8",
43+
disk="10g",
44+
ram="256g",
45+
)
46+
47+
mixtral_vllm = VLLMConfig(
48+
model=MIXTRAL_8X7B_INSTRUCT_GCS_PATH,
49+
tensor_parallel_size=4,
50+
max_model_len=4096,
51+
gpu_memory_utilization=0.9,
52+
tpu_type="v5p-8",
53+
disk="10g",
54+
ram="256g",
55+
)
56+
57+
align_config = AlignConfig(
58+
ideation_model=llama_vllm,
59+
extract_model=llama_vllm,
60+
judge_model=llama_vllm,
61+
covering_strength=2,
62+
covering_seed=42,
63+
ideation_workers=1,
64+
concretize_workers=1,
65+
extract_workers=1,
66+
prompt_batch_size=4,
67+
understanding_max_tokens=1024,
68+
understanding_temperature=1.0,
69+
concretize_max_tokens=1536,
70+
concretize_temperature=1.0,
71+
concretize_max_attempts=5,
72+
extract_max_tokens=1024,
73+
judge_workers=1,
74+
judge_batch_size=4,
75+
teacher_n=1,
76+
teacher_temperature=0.7,
77+
teacher_max_tokens=512,
78+
rejected_n=1,
79+
rejected_temperature=0.7,
80+
rejected_max_tokens=512,
81+
rejected_prompt_strategy=RejectedPromptStrategy.OPPOSITE,
82+
judge_min_chosen_score=1.0,
83+
judge_min_gap=0.0,
84+
tokenizer="meta-llama/Llama-3.3-70B-Instruct",
85+
statement_ids=["ask_clarifying_questions"],
86+
)
87+
88+
dataset_steps = align(
89+
name="debug_vllm_70b_mixtral_rejected_opposite_auto_smoke_metrics",
90+
pretrained_model=llama_3_3_70b_instruct,
91+
spec=SPEC_PATH,
92+
model_config=llama_70b,
93+
teacher_model=llama_vllm,
94+
align_config=align_config,
95+
dpo_config=None,
96+
rejected_model=mixtral_vllm,
97+
tags=["debug", "vllm", "70b", "mixtral-rejected", "opposite-mode", "metrics"],
98+
)
99+
100+
if __name__ == "__main__":
101+
executor_main(
102+
steps=dataset_steps,
103+
description=DESCRIPTION,
104+
)
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Copyright The Marin Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""
5+
Run standalone rejected-side opposite-mode response generation on a known-good prompts artifact.
6+
7+
This validates the new fail-closed rejected-only `opposite` prompt strategy
8+
without involving the full alignment pipeline. The run uses the staged
9+
`us-central1` Llama 3.3 70B checkpoint and an existing prompt artifact that was
10+
generated successfully from the OpenAI model-spec smoke statement.
11+
12+
Submit to Iris:
13+
14+
uv run iris --config lib/iris/examples/marin.yaml job run \
15+
--no-wait \
16+
--job-name generate-rejected-opposite-llama-3-3-70b-existing-prompts \
17+
--cpu 4 \
18+
--memory 16GB \
19+
--disk 10GB \
20+
--region us-central1 \
21+
-- python experiments/generate_rejected_opposite_llama_3_3_70b_existing_prompts.py
22+
"""
23+
24+
from __future__ import annotations
25+
26+
from pathlib import Path
27+
28+
from experiments.models import llama_3_3_70b_instruct
29+
from marin.alignment.generate_responses import (
30+
RejectedPromptStrategy,
31+
ResponseGenConfig,
32+
ResponseRole,
33+
generate_responses,
34+
)
35+
from marin.alignment.inference_config import VLLMConfig
36+
from marin.execution.executor import ExecutorStep, executor_main, output_path_of, this_output_path
37+
from marin.execution.remote import remote
38+
39+
PROMPTS_PATH = "gs://marin-us-central1/align/debug_generate_prompts_llama_3_3_70b_refactored/prompts-f29568"
40+
SPEC_PATH = str(Path(__file__).parent / "posttrain" / "specs" / "openai_model_spec.jsonl")
41+
MODEL_STEP = llama_3_3_70b_instruct
42+
DESCRIPTION = (
43+
"Standalone rejected-only opposite-mode generation on existing prompts with staged us-central1 Llama 3.3 70B"
44+
)
45+
46+
47+
llama_3_3_70b_vllm = VLLMConfig(
48+
model=output_path_of(MODEL_STEP),
49+
tensor_parallel_size=4,
50+
max_model_len=4096,
51+
gpu_memory_utilization=0.9,
52+
tpu_type="v5p-8",
53+
disk="5g",
54+
ram="256g",
55+
)
56+
57+
response_step = ExecutorStep(
58+
name="align/debug_generate_rejected_opposite_llama_3_3_70b_existing_prompts/responses",
59+
description="Generate rejected-only opposite-mode responses on a known-good prompts artifact",
60+
fn=remote(
61+
generate_responses,
62+
resources=llama_3_3_70b_vllm.resources,
63+
env_vars={"MARIN_VLLM_MODE": "native"},
64+
pip_dependency_groups=["vllm", "tpu"],
65+
),
66+
config=ResponseGenConfig(
67+
prompts_path=PROMPTS_PATH,
68+
output_path=this_output_path(),
69+
model_config=llama_3_3_70b_vllm,
70+
role=ResponseRole.REJECTED,
71+
rejected_prompt_strategy=RejectedPromptStrategy.OPPOSITE,
72+
n=1,
73+
temperature=0.7,
74+
max_tokens=512,
75+
behavior_statements_path=SPEC_PATH,
76+
),
77+
)
78+
79+
80+
if __name__ == "__main__":
81+
executor_main(
82+
steps=[response_step],
83+
description=DESCRIPTION,
84+
)

experiments/generate_responses_llama_3_3_70b_existing_prompts.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,12 @@
2323
from __future__ import annotations
2424

2525
from experiments.models import llama_3_3_70b_instruct
26-
from marin.alignment.generate_responses import ResponseGenConfig, generate_responses
26+
from marin.alignment.generate_responses import (
27+
RejectedPromptStrategy,
28+
ResponseGenConfig,
29+
ResponseRole,
30+
generate_responses,
31+
)
2732
from marin.alignment.inference_config import VLLMConfig
2833
from marin.execution.executor import ExecutorStep, executor_main, output_path_of, this_output_path
2934
from marin.execution.remote import remote
@@ -54,10 +59,11 @@
5459
prompts_path=PROMPTS_PATH,
5560
output_path=this_output_path(),
5661
model_config=llama_3_3_70b_vllm,
62+
role=ResponseRole.REJECTED,
63+
rejected_prompt_strategy=RejectedPromptStrategy.UNGUIDED,
5764
n=1,
5865
temperature=0.7,
5966
max_tokens=512,
60-
behavior_statements_path=None,
6167
),
6268
)
6369

experiments/generate_responses_llama_3_3_70b_existing_prompts_refactored.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@
2222
from __future__ import annotations
2323

2424
from experiments.models import llama_3_3_70b_instruct
25-
from marin.alignment.generate_responses import ResponseGenConfig, generate_responses
25+
from marin.alignment.generate_responses import (
26+
RejectedPromptStrategy,
27+
ResponseGenConfig,
28+
ResponseRole,
29+
generate_responses,
30+
)
2631
from marin.alignment.inference_config import VLLMConfig
2732
from marin.execution.executor import ExecutorStep, executor_main, output_path_of, this_output_path
2833
from marin.execution.remote import remote
@@ -60,10 +65,11 @@
6065
prompts_path=PROMPTS_PATH,
6166
output_path=this_output_path(),
6267
model_config=llama_3_3_70b_vllm,
68+
role=ResponseRole.REJECTED,
69+
rejected_prompt_strategy=RejectedPromptStrategy.UNGUIDED,
6370
n=1,
6471
temperature=0.7,
6572
max_tokens=512,
66-
behavior_statements_path=None,
6773
),
6874
)
6975

experiments/generate_responses_llama_3_3_70b_existing_prompts_vllm_serve.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,12 @@
3838

3939
from experiments.models import llama_3_3_70b_instruct
4040
from marin.alignment.generate_prompts import load_sharded_jsonl_gz, write_sharded_jsonl_gz
41-
from marin.alignment.generate_responses import _build_messages, _make_response_record
41+
from marin.alignment.generate_responses import (
42+
RejectedPromptStrategy,
43+
ResponseRole,
44+
_build_rejected_messages,
45+
_make_response_record,
46+
)
4247
from marin.alignment.inference_config import VLLMConfig
4348
from marin.evaluation.evaluators.evaluator import ModelConfig
4449
from marin.execution.executor import ExecutorStep, executor_main, output_path_of, this_output_path
@@ -91,7 +96,7 @@ def _request_one(
9196
max_tokens: int,
9297
n: int,
9398
) -> dict[str, Any]:
94-
messages = _build_messages(prompt, behavior_statements=None)
99+
messages = _build_rejected_messages(prompt, RejectedPromptStrategy.UNGUIDED, None)
95100
response = requests.post(
96101
f"{server_url}/chat/completions",
97102
json={
@@ -108,7 +113,13 @@ def _request_one(
108113
responses = [
109114
{"content": choice["message"]["content"] or "", "index": idx} for idx, choice in enumerate(payload["choices"])
110115
]
111-
return _make_response_record(prompt, model_path, responses)
116+
return _make_response_record(
117+
prompt,
118+
model_path,
119+
responses,
120+
role=ResponseRole.REJECTED,
121+
rejected_prompt_strategy=RejectedPromptStrategy.UNGUIDED,
122+
)
112123

113124

114125
def generate_responses_via_vllm_serve(config: VllmServeResponseGenConfig) -> None:

0 commit comments

Comments
 (0)