Skip to content

Commit a20d0d2

Browse files
authored
[cleanup] remove metric checker and long tests (#1603)
1 parent d129c85 commit a20d0d2

File tree

12 files changed

+6
-773
lines changed

12 files changed

+6
-773
lines changed

.github/workflows/pr-test.yml

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ jobs:
9595
strategy:
9696
fail-fast: false
9797
matrix:
98-
info: [{"num_gpus": 4, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py --colocated"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}]
98+
info: [{"num_gpus": 4, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py --colocated"}, {"num_gpus": 8, "test_file": "test_qwen3_vl_4B_fsdp.py"}, {"num_gpus": 4, "test_file": "test_qwen3_0.6B_megatron_fsdp_align.py"}]
9999
defaults:
100100
run:
101101
working-directory: ${{ github.workspace }}
@@ -260,53 +260,6 @@ jobs:
260260
shell: bash
261261
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
262262

263-
e2e-test-long:
264-
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-long'))
265-
runs-on: self-hosted
266-
container:
267-
image: slimerl/slime:latest
268-
options: >
269-
--gpus all
270-
--ipc=host
271-
--shm-size=16g
272-
--ulimit memlock=-1
273-
--ulimit stack=67108864
274-
--memory=0
275-
--memory-swap=0
276-
-e http_proxy=$http_proxy
277-
-e https_proxy=$https_proxy
278-
-e HTTP_PROXY=$HTTP_PROXY
279-
-e HTTPS_PROXY=$HTTPS_PROXY
280-
-v /mnt/nvme0n1/slime_ci:/data/slime_ci
281-
-v /mnt/nvme0n1/slime_ci/models:/root/models
282-
-v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
283-
strategy:
284-
fail-fast: false
285-
matrix:
286-
info: [{"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k.py"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k_async.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_colocated_2xGPU.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}]
287-
defaults:
288-
run:
289-
working-directory: ${{ github.workspace }}
290-
env:
291-
GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
292-
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
293-
SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
294-
SLIME_TEST_USE_DEEPEP: ${{ matrix.info.use_deepep || '0' }}
295-
SLIME_TEST_USE_FP8_ROLLOUT: ${{ matrix.info.use_fp8_rollout || '0' }}
296-
SLIME_TEST_ENABLE_EVAL: ${{ matrix.info.enable_eval || '1' }}
297-
298-
steps:
299-
- name: Checkout repository
300-
uses: actions/checkout@v4
301-
302-
- name: Install
303-
shell: bash
304-
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
305-
306-
- name: Execute
307-
shell: bash
308-
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
309-
310263
e2e-test-image:
311264
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-image'))
312265
runs-on: self-hosted

.github/workflows/pr-test.yml.j2

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
'tests': [
1212
{'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py --colocated', 'num_gpus': 4},
1313
{'test_file': 'test_qwen3_vl_4B_fsdp.py', 'num_gpus': 8},
14-
{'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
1514
{'test_file': 'test_qwen3_0.6B_megatron_fsdp_align.py', 'num_gpus': 4},
1615
],
1716
},
@@ -43,25 +42,15 @@
4342
{'test_file': 'test_qwen3_4B_ckpt.py --async-save', 'num_gpus': 8},
4443
],
4544
},
46-
'e2e-test-long': {
47-
'label': 'run-ci-long',
48-
'tests': [
49-
{'test_file': 'test_qwen2.5_0.5B_gsm8k.py', 'num_gpus': 2},
50-
{'test_file': 'test_qwen2.5_0.5B_gsm8k_async.py', 'num_gpus': 2},
51-
{'test_file': 'test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 2},
52-
{'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
53-
],
54-
},
45+
5546
'e2e-test-image': {
5647
'label': 'run-ci-image',
5748
'image': 'slimerl/slime-test:latest',
5849
'tests': [
5950
{'test_file': 'test_qwen2.5_0.5B_gsm8k_async_short.py', 'num_gpus': 4},
6051
{'test_file': 'test_qwen2.5_0.5B_gsm8k_short.py', 'num_gpus': 4},
61-
{'test_file': 'test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 2},
6252
{'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 2},
6353
{'test_file': 'test_qwen3_vl_4B_fsdp.py', 'num_gpus': 8},
64-
{'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
6554
{'test_file': 'test_quick_start_glm4_9B.py', 'num_gpus': 8},
6655
{'test_file': 'test_qwen3_30B_A3B.py', 'num_gpus': 8},
6756
{'test_file': 'test_qwen3_4B_ppo.py', 'num_gpus': 8},

examples/true_on_policy/run_simple.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,7 @@ def execute():
8484
"--update-weight-buffer-size 536870912 " # 512MB
8585
)
8686

87-
ci_args = (
88-
"--ci-test "
89-
"--ci-disable-kl-checker "
90-
"--ci-metric-checker-key eval/gsm8k "
91-
"--ci-metric-checker-threshold 0.71 " # loose threshold at 60 step
92-
)
87+
ci_args = "--ci-test " "--ci-disable-kl-checker "
9388

9489
misc_args = "--actor-num-nodes 1 " f"--actor-num-gpus-per-node {NUM_GPUS} " "--colocate " "--train-backend fsdp "
9590

examples/true_on_policy_vlm/run_simple.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,7 @@ def execute():
8282
"--attn-implementation flash_attention_3 "
8383
)
8484

85-
ci_args = (
86-
"--ci-test "
87-
"--ci-disable-kl-checker "
88-
"--ci-metric-checker-key eval/geo3k "
89-
"--ci-metric-checker-threshold 0.5 " # loose threshold at 60 step
90-
)
85+
ci_args = "--ci-test " "--ci-disable-kl-checker "
9186

9287
misc_args = "--actor-num-nodes 1 " f"--actor-num-gpus-per-node {NUM_GPUS} " "--colocate "
9388

slime/ray/rollout.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,7 @@
1919
from slime.utils.health_monitor import RolloutHealthMonitor
2020
from slime.utils.http_utils import _wrap_ipv6, find_available_port, get_host_info, init_http_client
2121
from slime.utils.logging_utils import configure_logger, init_tracking
22-
from slime.utils.metric_utils import (
23-
MetricChecker,
24-
compute_pass_rate,
25-
compute_rollout_step,
26-
compute_statistics,
27-
dict_add_prefix,
28-
)
22+
from slime.utils.metric_utils import compute_pass_rate, compute_rollout_step, compute_statistics, dict_add_prefix
2923
from slime.utils.misc import Box, group_by, load_function
3024
from slime.utils.seqlen_balancing import get_seqlen_balanced_partitions
3125
from slime.utils.types import Sample
@@ -80,7 +74,6 @@ def __init__(self, args, pg):
8074
self.rollout_engine_lock = Lock.options(num_cpus=1, num_gpus=0).remote()
8175
self.rollout_id = -1
8276

83-
self._metric_checker = MetricChecker.maybe_create(args)
8477
self._health_monitor = None
8578
if not self.args.debug_train_only and self.args.use_fault_tolerance:
8679
self._health_monitor = RolloutHealthMonitor(self, args)
@@ -109,8 +102,6 @@ def _try_ci_fault_injection(self):
109102
logger.warning(f"CI Fault Injection failed: {e}")
110103

111104
def dispose(self):
112-
if self._metric_checker is not None:
113-
self._metric_checker.dispose()
114105
if self._health_monitor is not None:
115106
self._health_monitor.stop()
116107

@@ -153,9 +144,7 @@ def eval(self, rollout_id):
153144
result = call_rollout_fn(self.eval_generate_rollout, self.args, rollout_id, self.data_source, evaluation=True)
154145
data = result.data
155146
self._save_debug_rollout_data(data, rollout_id=rollout_id, evaluation=True)
156-
metrics = _log_eval_rollout_data(rollout_id, self.args, data, result.metrics)
157-
if self._metric_checker is not None:
158-
self._metric_checker.on_eval(metrics)
147+
_log_eval_rollout_data(rollout_id, self.args, data, result.metrics)
159148

160149
def save(self, rollout_id):
161150
self.data_source.save(rollout_id)

slime/utils/arguments.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,16 +1379,6 @@ def add_ci_arguments(parser):
13791379
"--ci-disable-kl-checker",
13801380
action="store_true",
13811381
)
1382-
parser.add_argument(
1383-
"--ci-metric-checker-key",
1384-
type=str,
1385-
default=None,
1386-
)
1387-
parser.add_argument(
1388-
"--ci-metric-checker-threshold",
1389-
type=float,
1390-
default=None,
1391-
)
13921382
parser.add_argument(
13931383
"--ci-save-grad-norm",
13941384
type=str,

slime/utils/metric_utils.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -121,28 +121,3 @@ def compute_rollout_step(args, rollout_id):
121121
if args.wandb_always_use_train_step:
122122
return rollout_id * args.rollout_batch_size * args.n_samples_per_prompt // args.global_batch_size
123123
return rollout_id
124-
125-
126-
class MetricChecker:
127-
@staticmethod
128-
def maybe_create(args):
129-
if args.ci_test and (args.ci_metric_checker_key is not None):
130-
return MetricChecker(args)
131-
return None
132-
133-
def __init__(self, args):
134-
self.args = args
135-
self._exists_check_success = False
136-
137-
def on_eval(self, metrics: dict[str, float]):
138-
actual_value = metrics.get(self.args.ci_metric_checker_key)
139-
assert actual_value is not None, f"{metrics=} {self.args.ci_metric_checker_key=}"
140-
141-
check_success = actual_value >= self.args.ci_metric_checker_threshold
142-
logger.info(f"[MetricChecker] {check_success=} {actual_value=} {self.args.ci_metric_checker_threshold=}")
143-
144-
self._exists_check_success |= check_success
145-
146-
def dispose(self):
147-
assert self._exists_check_success, "[MetricChecker] accuracy check failed"
148-
logger.info("[MetricChecker] pass dispose check")

0 commit comments

Comments
 (0)