Skip to content

Commit 70964fc

Browse files
committed
fix repobench
1 parent aa7b451 commit 70964fc

File tree

5 files changed

+6
-7
lines changed

5 files changed

+6
-7
lines changed

eval/chat_benchmarks/HumanEval/eval_instruct.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def generate_responses(self, model: LM) -> Dict[str, Any]:
112112
self.logger.info("Generating responses for Human Eval...")
113113
outputs = self.compute(model, all_instances)
114114

115-
is_main_process = lm.accelerator.process_index == 0 if hasattr(lm, 'accelerator') else lm.world_size <= 1
115+
is_main_process = model.accelerator.process_index == 0 if hasattr(model, 'accelerator') else model.world_size <= 1
116116
if not is_main_process:
117117
continue
118118

eval/chat_benchmarks/RepoBench/eval_instruct.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def _generate_responses_legacy(self, model: LM) -> Dict[str, Any]:
137137
temp_dir_obj = tempfile.TemporaryDirectory()
138138
temp_dir = temp_dir_obj.name
139139

140+
is_main_process = model.accelerator.process_index == 0 if hasattr(model, 'accelerator') else model.world_size <= 1
140141
for lang in self.languages:
141142
for subset in self.subsets:
142143
dataset = load_data(split="test", task="completion", language=lang, length="2k", setting=subset)
@@ -163,8 +164,8 @@ def _generate_responses_legacy(self, model: LM) -> Dict[str, Any]:
163164
)
164165

165166
outputs = self.compute(model, all_instances, do_slice=False)
166-
167-
if model.accelerator.process_index != 0:
167+
168+
if not is_main_process:
168169
continue
169170

170171
generated_examples = []

eval/chat_benchmarks/alpaca_eval/src/alpaca_eval/leaderboards/data_AlpacaEval_2/weighted_alpaca_eval_gpt4_turbo_leaderboard.csv

+1-1
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,4 @@ oasst-sft-pythia-12b,1.790114083180124,0.3985580883049341,13,790,2,805,1.7391304
208208
guanaco-13b,3.469596859739131,0.5518606725700214,22,780,3,805,2.919254658385093,verified,1774,3.003787329611614,
209209
guanaco-7b,2.880002266173913,0.5202924149314048,21,783,1,805,2.670807453416149,verified,1364,2.871116813131697,
210210
Qwen1.5-1.8B-Chat,3.70555681579365,0.5811750995496215,27,774,3,804,3.544776119402985,verified,2673,2.588498849185137,
211-
baichuan-13b-chat,1.9921455615279504,0.4176985079331233,14,790,1,805,1.8012422360248446,community,1727,2.062170253598568,
211+
baichuan-13b-chat,1.9921455615279504,0.4176985079331233,14,790,1,805,1.8012422360248446,community,1727,2.062170253598568,

eval/chat_benchmarks/alpaca_eval/src/alpaca_eval/metrics/weights/weighted_alpaca_eval_gpt4_turbo/length_controlled_v1/baseline_gpt4_1106_preview.csv

-2
Original file line numberDiff line numberDiff line change
@@ -186,5 +186,3 @@ Mistral-7B-Instruct-v0.3,-1.5007159011881868,0.9845683091847074,-1.7652759895328
186186
Shopee-SlimMoA-v1,-0.6930943742294789,0.5778443790027642,1.4506276222723822
187187
blendaxai-gm-l6-vo31,-1.4827230167114802,0.8256378421072179,1.5942312525409852
188188
REBEL-Llama-3-8B-Instruct-Armo,-1.0427168605260002,0.6464073051877255,0.0395191056877229
189-
model_hf_model_args_pretrained=mlfoundations-dev__gemma-simpo-reproduction,-1.1818376919023723,0.6835318362039150,1.1479555832649320
190-
model_hf_model_args_pretrained=mlfoundations-dev__gemma-oh-preferences,-1.8345282763259563,0.7434213717748921,-9.8937244442602008

eval/eval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def evaluate(
247247
cpu_count = os.cpu_count()
248248

249249
max_workers = min(len(valid_tasks), cpu_count * 2)
250-
if lm.world_size <= 1 or lm.accelerator.process_index == 0:
250+
if (hasattr(lm, 'accelerator') and lm.accelerator.process_index == 0) or lm.world_size <= 1 or :
251251
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
252252
evaluate_results = list(
253253
executor.map(

0 commit comments

Comments
 (0)