|
1 |
| -from datasets import get_dataset_config_names |
| 1 | +import unitxt |
2 | 2 | from unitxt import add_to_catalog
|
3 | 3 | from unitxt.blocks import (
|
4 | 4 | LoadHF,
|
5 | 5 | Rename,
|
6 | 6 | Set,
|
7 | 7 | TaskCard,
|
8 | 8 | )
|
9 |
| -from unitxt.settings_utils import get_settings |
10 | 9 | from unitxt.test_utils.card import test_card
|
11 | 10 |
|
12 |
| -settings = get_settings() |
13 |
| - |
14 |
| -dataset_name = "head_qa" |
15 |
| - |
16 |
| -categories = [ |
17 |
| - "biology", |
18 |
| - "chemistry", |
19 |
| - "medicine", |
20 |
| - "nursery", |
21 |
| - "pharmacology", |
22 |
| - "psychology", |
23 |
| -] |
24 |
| -for subset in get_dataset_config_names( |
25 |
| - dataset_name, trust_remote_code=settings.allow_unverified_code |
26 |
| -): |
27 |
| - card = TaskCard( |
28 |
| - loader=LoadHF(path=f"{dataset_name}", name=subset), |
29 |
| - preprocess_steps=[ |
30 |
| - Rename(field_to_field={"qtext": "text", "category": "label"}), |
31 |
| - Set( |
32 |
| - fields={ |
33 |
| - "classes": categories, |
34 |
| - "text_type": "question", |
35 |
| - } |
| 11 | +with unitxt.settings.context(allow_unverified_code=True): |
| 12 | + for subset in ["es", "en"]: |
| 13 | + card = TaskCard( |
| 14 | + loader=LoadHF(path="dvilares/head_qa", name=subset), |
| 15 | + preprocess_steps=[ |
| 16 | + Rename(field_to_field={"qtext": "text", "category": "label"}), |
| 17 | + Set( |
| 18 | + fields={ |
| 19 | + "classes": [ |
| 20 | + "biology", |
| 21 | + "chemistry", |
| 22 | + "medicine", |
| 23 | + "nursery", |
| 24 | + "pharmacology", |
| 25 | + "psychology", |
| 26 | + ], |
| 27 | + "text_type": "question", |
| 28 | + } |
| 29 | + ), |
| 30 | + ], |
| 31 | + task="tasks.classification.multi_class.topic_classification", |
| 32 | + templates="templates.classification.multi_class.all", |
| 33 | + __description__=( |
| 34 | + "HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio de Sanidad, Consumo y Bienestar Social. The dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology… See the full description on the dataset page: https://huggingface.co/datasets/head_qa" |
36 | 35 | ),
|
37 |
| - ], |
38 |
| - task="tasks.classification.multi_class.topic_classification", |
39 |
| - templates="templates.classification.multi_class.all", |
40 |
| - __description__=( |
41 |
| - "HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio de Sanidad, Consumo y Bienestar Social. The dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology… See the full description on the dataset page: https://huggingface.co/datasets/head_qa" |
42 |
| - ), |
43 |
| - __tags__={ |
44 |
| - "annotations_creators": "no-annotation", |
45 |
| - "language": ["en", "es"], |
46 |
| - "language_creators": "expert-generated", |
47 |
| - "license": "mit", |
48 |
| - "multilinguality": "monolingual", |
49 |
| - "region": "us", |
50 |
| - "size_categories": "1K<n<10K", |
51 |
| - "source_datasets": "original", |
52 |
| - "task_categories": "question-answering", |
53 |
| - "task_ids": "multiple-choice-qa", |
54 |
| - }, |
55 |
| - ) |
56 |
| - test_card(card, debug=False) |
57 |
| - add_to_catalog(card, f"cards.{dataset_name}.{subset}", overwrite=True) |
| 36 | + __tags__={ |
| 37 | + "annotations_creators": "no-annotation", |
| 38 | + "language": ["en", "es"], |
| 39 | + "language_creators": "expert-generated", |
| 40 | + "license": "mit", |
| 41 | + "multilinguality": "monolingual", |
| 42 | + "region": "us", |
| 43 | + "size_categories": "1K<n<10K", |
| 44 | + "source_datasets": "original", |
| 45 | + "task_categories": "question-answering", |
| 46 | + "task_ids": "multiple-choice-qa", |
| 47 | + }, |
| 48 | + ) |
| 49 | + test_card(card, debug=False) |
| 50 | + add_to_catalog(card, f"cards.head_qa.{subset}", overwrite=True) |
0 commit comments