Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions lm_eval/tasks/americasnlp_mt/_americasnlp_mt_common_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
dataset_path: mariagrandury/americasnlp-mt
output_type: generate_until
training_split: train
validation_split: train
test_split: train
num_fewshot: 2
fewshot_split: train
target_delimiter: ''
generation_kwargs:
until:
- "\n"
metric_list:
- metric: bleu
aggregation: bleu
higher_is_better: true
- metric: ter
aggregation: ter
higher_is_better: false
- metric: chrf
aggregation: chrf
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
8 changes: 8 additions & 0 deletions lm_eval/tasks/americasnlp_mt/americasnlp-mt_esp-aym.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# File generated by `create_yamls_americasnlp_mt.py`
include: _americasnlp_mt_common_yaml
dataset_name: esp-aym
task: americasnlp-mt_esp-aym
doc_to_text: 'Frase en Español: {{text_from}}

Frase en Aymara:'
doc_to_target: '{{text_to}}'
8 changes: 8 additions & 0 deletions lm_eval/tasks/americasnlp_mt/americasnlp-mt_esp-grn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# File generated by `create_yamls_americasnlp_mt.py`
include: _americasnlp_mt_common_yaml
dataset_name: esp-grn
task: americasnlp-mt_esp-grn
doc_to_text: 'Frase en Español: {{text_from}}

Frase en Guaraní:'
doc_to_target: '{{text_to}}'
8 changes: 8 additions & 0 deletions lm_eval/tasks/americasnlp_mt/americasnlp-mt_esp-nhn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# File generated by `create_yamls_americasnlp_mt.py`
include: _americasnlp_mt_common_yaml
dataset_name: esp-nhn
task: americasnlp-mt_esp-nhn
doc_to_text: 'Frase en Español: {{text_from}}

Frase en Náhuatl:'
doc_to_target: '{{text_to}}'
8 changes: 8 additions & 0 deletions lm_eval/tasks/americasnlp_mt/americasnlp-mt_esp-que.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# File generated by `create_yamls_americasnlp_mt.py`
include: _americasnlp_mt_common_yaml
dataset_name: esp-que
task: americasnlp-mt_esp-que
doc_to_text: 'Frase en Español: {{text_from}}

Frase en Quechua:'
doc_to_target: '{{text_to}}'
12 changes: 12 additions & 0 deletions lm_eval/tasks/americasnlp_mt/americasnlp_mt.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
group: americasnlp_mt
task:
- americasnlp-mt_esp-aym
- americasnlp-mt_esp-grn
- americasnlp-mt_esp-nhn
- americasnlp-mt_esp-que
aggregate_metric_list:
- metric: bleu
aggregation: mean
weight_by_size: false
metadata:
version: 1.0
106,117 changes: 106,117 additions & 0 deletions lm_eval/tasks/include_es/Spanish.json

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions lm_eval/tasks/include_es/_include_es_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
dataset_path: mariagrandury/include_es
test_split: test
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
weight_by_size: true
metadata:
version: 1.0
70 changes: 70 additions & 0 deletions lm_eval/tasks/include_es/generate_include_es_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import json
import os
import re
import unicodedata

root = "/Users/mariagrandury/Projects/somosnlp/lm-evaluation-harness"
json_path = os.path.join(root, "lm_eval/tasks/include_es/Spanish.json")
tasks_dir = os.path.join(root, "lm_eval/tasks/include_es")
include_file = os.path.join(tasks_dir, "include_es.yaml")


def slug_dash(name: str) -> str:
s = unicodedata.normalize("NFKD", name).encode("ascii", "ignore").decode("ascii")
s = s.lower().strip()
s = re.sub(r"[^a-z0-9]+", "-", s).strip("-")
return s or "default"


def slug_underscore(name: str) -> str:
s = unicodedata.normalize("NFKD", name).encode("ascii", "ignore").decode("ascii")
s = s.lower().strip()
s = re.sub(r"[^a-z0-9]+", "_", s).strip("_")
return s or "default"


with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)

countries = sorted({rec.get("country", "unknown") for rec in data})

# Create per-country YAML files
for country in countries:
cfg = slug_dash(country)
task_id = f"include_es_{slug_underscore(country)}"
out_path = os.path.join(tasks_dir, f"{task_id}.yaml")
content = (
"include: _include_es_yaml\n" f"dataset_name: {cfg}\n" f"task: {task_id}\n"
)
with open(out_path, "w", encoding="utf-8") as out:
out.write(content)

# Update include_es.yaml task list (append missing)
with open(include_file, "r", encoding="utf-8") as f:
lines = f.read().splitlines()

try:
start = next(i for i, line in enumerate(lines) if line.strip() == "task:")
except StopIteration:
raise SystemExit("No 'task:' section found in include_es.yaml")

end = start + 1
while end < len(lines) and lines[end].startswith(" - "):
end += 1

existing = {
line.strip().split(" ", 1)[1]
for line in lines[start + 1 : end]
if line.strip().startswith("- ")
}

new_tasks = []
for country in countries:
task_id = f"include_es_{slug_underscore(country)}"
if task_id not in existing:
new_tasks.append(f" - {task_id}")

if new_tasks:
lines[start + 1 : end] = lines[start + 1 : end] + new_tasks
with open(include_file, "w", encoding="utf-8") as f:
f.write("\n".join(lines) + "\n")
26 changes: 26 additions & 0 deletions lm_eval/tasks/include_es/include_es.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
group: include_es
task:
- include_es_argentina
- include_es_chile
- include_es_colombia
- include_es_costa_rica
- include_es_cuba
- include_es_ecuador
- include_es_el_salvador
- include_es_espana
- include_es_mexico
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
- metric: perplexity
aggregation: mean
weight_by_size: true
- metric: f1
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_argentina.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: argentina
task: include_es_argentina
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_chile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: chile
task: include_es_chile
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_colombia.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: colombia
task: include_es_colombia
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_costa_rica.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: costa-rica
task: include_es_costa_rica
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_cuba.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: cuba
task: include_es_cuba
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_ecuador.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: ecuador
task: include_es_ecuador
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_el_salvador.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: el-salvador
task: include_es_el_salvador
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_espana.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: espana
task: include_es_espana
3 changes: 3 additions & 0 deletions lm_eval/tasks/include_es/include_es_mexico.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include: _include_es_yaml
dataset_name: mexico
task: include_es_mexico
Loading