Skip to content

Commit 7152be4

Browse files
authored
Revisit huggingface cache policy (#1564)
* Revisit huggingface cache policy Signed-off-by: elronbandel <[email protected]> * Enable streaming for LoadFromHFSpace and clean up commented code Signed-off-by: elronbandel <[email protected]> * Disable Hugging Face datasets cache in CatalogPreparationTestCase Signed-off-by: elronbandel <[email protected]> * Enable streaming for wiki_bio loader in TaskCard and update JSON configuration Signed-off-by: elronbandel <[email protected]> * Add conditional test card execution for 'doqa_travel' subset in chat_rag_bench Signed-off-by: elronbandel <[email protected]> * Enhance memory and performance logging in catalog preparation tests Signed-off-by: elronbandel <[email protected]> * Return parallel execution to 1 and adjust modulo for deterministic test runs Signed-off-by: elronbandel <[email protected]> * Try 1 Signed-off-by: elronbandel <[email protected]> * try 1 fixed Signed-off-by: elronbandel <[email protected]> * trial 2 Signed-off-by: elronbandel <[email protected]> * Stop testing social iqa until problem resolved Signed-off-by: elronbandel <[email protected]> * Update social iqa card to use specific revision and enable testing Signed-off-by: elronbandel <[email protected]> * Refactor translation card testing logic and remove unused dataset loading Signed-off-by: elronbandel <[email protected]> * Update head_qa card loader path and streamline dataset configuration Signed-off-by: elronbandel <[email protected]> * Enable streaming for websrc card loader in configuration Signed-off-by: elronbandel <[email protected]> * Add revision reference to Winogrande card loaders Signed-off-by: elronbandel <[email protected]> * Add revision reference to PIQA card loader Signed-off-by: elronbandel <[email protected]> * Update Signed-off-by: elronbandel <[email protected]> * Another trial Signed-off-by: elronbandel <[email protected]> * Refactor dataset loading to support dynamic streaming and improve configuration settings Signed-off-by: elronbandel <[email protected]> * Add streaming support to turl_col_type configuration Signed-off-by: elronbandel <[email protected]> * Remove unused skip files from test preparation Signed-off-by: elronbandel <[email protected]> * Refactor LoadHF class to improve dataset filtering and add streaming support Signed-off-by: elronbandel <[email protected]> * Update load_dataset function documentation to clarify caching behavior and usage Signed-off-by: elronbandel <[email protected]> * Update dataset loading to support caching and streaming options Signed-off-by: elronbandel <[email protected]> * Import UnitxtDataset in load_dataset function for improved dataset handling Signed-off-by: elronbandel <[email protected]> * Remove unused load function import from __init__.py Signed-off-by: elronbandel <[email protected]> * Remove streaming option from SEED-Bench loader configuration Signed-off-by: elronbandel <[email protected]> * Refactor dataset loading to utilize caching and improve dataset handling Signed-off-by: elronbandel <[email protected]> * Add missing imports for dataset module functionality Signed-off-by: elronbandel <[email protected]> * Increase loader cache size to improve performance and update test settings for caching behavior Signed-off-by: elronbandel <[email protected]> --------- Signed-off-by: elronbandel <[email protected]>
1 parent 912dc2a commit 7152be4

39 files changed

+210
-218
lines changed

examples/evaluate_benchmark_with_custom_provider.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
data = load_dataset(
55
"benchmarks.glue[max_samples_per_subset=5, format=formats.chat_api, system_prompt=system_prompts.general.be_concise]",
66
split="test",
7-
disable_cache=False,
7+
use_cache=True,
88
)
99

1010
model = CrossProviderInferenceEngine(

examples/evaluate_bluebench.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
)
55

66
with settings.context(
7-
disable_hf_datasets_cache=False,
87
allow_unverified_code=True,
98
mock_inference_mode=True,
109
):
1110
test_dataset = load_dataset(
12-
"benchmarks.bluebench[loader_limit=30,max_samples_per_subset=30]", split="test"
11+
"benchmarks.bluebench[loader_limit=30,max_samples_per_subset=30]",
12+
split="test",
13+
use_cache=True,
1314
)
14-
1515
# Infer
1616
model = CrossProviderInferenceEngine(
1717
model="llama-3-8b-instruct",

examples/evaluate_same_datasets_and_models_with_multiple_providers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
demos_pool_size=10,
5656
loader_limit=1000,
5757
max_test_instances=10,
58-
disable_cache=False,
58+
use_cache=True,
5959
split="test",
6060
)
6161

performance/bluebench_profiler.py

+6-15
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,22 @@
77
from io import StringIO
88
from typing import Any, Dict, List, Union
99

10-
from unitxt.api import evaluate, load_recipe
10+
from unitxt.api import _source_to_dataset, evaluate, load_recipe
1111
from unitxt.benchmark import Benchmark
1212
from unitxt.inference import (
1313
CrossProviderInferenceEngine,
1414
InferenceEngine,
1515
TextGenerationInferenceOutput,
1616
)
1717
from unitxt.logging_utils import get_logger
18-
from unitxt.schema import UNITXT_DATASET_SCHEMA, loads_instance
1918
from unitxt.settings_utils import get_settings
2019

2120
logger = get_logger()
2221
settings = get_settings()
22+
2323
settings.allow_unverified_code = True
24+
settings.disable_hf_datasets_cache = False
25+
settings.mock_inference_mode = True
2426

2527

2628
class BlueBenchProfiler:
@@ -65,19 +67,8 @@ def profiler_instantiate_benchmark_recipe(
6567
def profiler_generate_benchmark_dataset(
6668
self, benchmark_recipe: Benchmark, split: str, **kwargs
6769
) -> List[Dict[str, Any]]:
68-
with settings.context(
69-
disable_hf_datasets_cache=False,
70-
allow_unverified_code=True,
71-
mock_inference_mode=True,
72-
):
73-
stream = benchmark_recipe()[split]
74-
75-
dataset = stream.to_dataset(
76-
features=UNITXT_DATASET_SCHEMA, disable_cache=False
77-
).with_transform(loads_instance)
78-
79-
# to charge here for the time of generating all instances
80-
return list(dataset)
70+
dataset = _source_to_dataset(benchmark_recipe, split=split)
71+
return list(dataset)
8172

8273
def profiler_instantiate_model(self) -> InferenceEngine:
8374
return CrossProviderInferenceEngine(

prepare/cards/CFPB_product.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
}
4141
for subset, url in subset_and_urls.items():
4242
card = TaskCard(
43-
loader=LoadCSV(files={"train": url}),
43+
loader=LoadCSV(files={"train": url}, streaming=False),
4444
preprocess_steps=[
4545
SplitRandomMix(
4646
{

prepare/cards/chat_rag_bench.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,12 @@
5757
"metrics.rouge",
5858
]
5959

60-
test_card(
61-
card_for_test,
62-
strict=True,
63-
demos_taken_from="test",
64-
)
60+
if subset == "doqa_travel":
61+
test_card(
62+
card_for_test,
63+
strict=True,
64+
demos_taken_from="test",
65+
)
6566
add_to_catalog(
6667
card,
6768
f"cards.rag.response_generation.chat_rag_bench.{'train.' if split == 'train' else ''}user_assistant_format.{subset}",

prepare/cards/head_qa.py

+40-47
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,50 @@
1-
from datasets import get_dataset_config_names
1+
import unitxt
22
from unitxt import add_to_catalog
33
from unitxt.blocks import (
44
LoadHF,
55
Rename,
66
Set,
77
TaskCard,
88
)
9-
from unitxt.settings_utils import get_settings
109
from unitxt.test_utils.card import test_card
1110

12-
settings = get_settings()
13-
14-
dataset_name = "head_qa"
15-
16-
categories = [
17-
"biology",
18-
"chemistry",
19-
"medicine",
20-
"nursery",
21-
"pharmacology",
22-
"psychology",
23-
]
24-
for subset in get_dataset_config_names(
25-
dataset_name, trust_remote_code=settings.allow_unverified_code
26-
):
27-
card = TaskCard(
28-
loader=LoadHF(path=f"{dataset_name}", name=subset),
29-
preprocess_steps=[
30-
Rename(field_to_field={"qtext": "text", "category": "label"}),
31-
Set(
32-
fields={
33-
"classes": categories,
34-
"text_type": "question",
35-
}
11+
with unitxt.settings.context(allow_unverified_code=True):
12+
for subset in ["es", "en"]:
13+
card = TaskCard(
14+
loader=LoadHF(path="dvilares/head_qa", name=subset),
15+
preprocess_steps=[
16+
Rename(field_to_field={"qtext": "text", "category": "label"}),
17+
Set(
18+
fields={
19+
"classes": [
20+
"biology",
21+
"chemistry",
22+
"medicine",
23+
"nursery",
24+
"pharmacology",
25+
"psychology",
26+
],
27+
"text_type": "question",
28+
}
29+
),
30+
],
31+
task="tasks.classification.multi_class.topic_classification",
32+
templates="templates.classification.multi_class.all",
33+
__description__=(
34+
"HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio de Sanidad, Consumo y Bienestar Social. The dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology… See the full description on the dataset page: https://huggingface.co/datasets/head_qa"
3635
),
37-
],
38-
task="tasks.classification.multi_class.topic_classification",
39-
templates="templates.classification.multi_class.all",
40-
__description__=(
41-
"HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio de Sanidad, Consumo y Bienestar Social. The dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology… See the full description on the dataset page: https://huggingface.co/datasets/head_qa"
42-
),
43-
__tags__={
44-
"annotations_creators": "no-annotation",
45-
"language": ["en", "es"],
46-
"language_creators": "expert-generated",
47-
"license": "mit",
48-
"multilinguality": "monolingual",
49-
"region": "us",
50-
"size_categories": "1K<n<10K",
51-
"source_datasets": "original",
52-
"task_categories": "question-answering",
53-
"task_ids": "multiple-choice-qa",
54-
},
55-
)
56-
test_card(card, debug=False)
57-
add_to_catalog(card, f"cards.{dataset_name}.{subset}", overwrite=True)
36+
__tags__={
37+
"annotations_creators": "no-annotation",
38+
"language": ["en", "es"],
39+
"language_creators": "expert-generated",
40+
"license": "mit",
41+
"multilinguality": "monolingual",
42+
"region": "us",
43+
"size_categories": "1K<n<10K",
44+
"source_datasets": "original",
45+
"task_categories": "question-answering",
46+
"task_ids": "multiple-choice-qa",
47+
},
48+
)
49+
test_card(card, debug=False)
50+
add_to_catalog(card, f"cards.head_qa.{subset}", overwrite=True)

prepare/cards/piqa.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from unitxt.test_utils.card import test_card
55

66
card = TaskCard(
7-
loader=LoadHF(path="piqa"),
7+
loader=LoadHF(path="piqa", revision="refs/pr/9"),
88
preprocess_steps=[
99
ListFieldValues(fields=["sol1", "sol2"], to_field="choices"),
1010
Rename(

prepare/cards/social_iqa.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
with unitxt.settings.context(allow_unverified_code=True):
1515
card = TaskCard(
1616
loader=LoadHF(
17-
path="allenai/social_i_qa", data_classification_policy=["public"]
17+
path="allenai/social_i_qa",
18+
data_classification_policy=["public"],
19+
revision="refs/pr/3",
1820
),
1921
preprocess_steps=[
2022
Deduplicate(by=["context", "question", "answerA", "answerB", "answerC"]),

prepare/cards/translation/flores101.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -150,17 +150,8 @@
150150
task="tasks.translation.directed",
151151
templates="templates.translation.directed.all",
152152
)
153-
154-
test_card(card, demos_taken_from="test")
153+
if pair == pairs[0]:
154+
test_card(card, demos_taken_from="test")
155155
add_to_catalog(
156156
card, f"cards.mt.flores_101.{pair['src']}_{pair['tgt']}", overwrite=True
157157
)
158-
159-
if __name__ == "__main__":
160-
from unitxt import load_dataset
161-
162-
ds = load_dataset(
163-
"card=cards.mt.flores_101.eng_deu,template_card_index=0",
164-
)
165-
166-
ds["test"][0]

prepare/cards/turl_col_type.py

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
loader=LoadHF(
1515
path="ibm/turl_table_col_type",
1616
data_classification_policy=["public"],
17+
streaming=True,
1718
),
1819
task=Task(
1920
input_fields={

prepare/cards/websrc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
)
1616

1717
card = TaskCard(
18-
loader=LoadHF(path="rootsautomation/websrc"),
18+
loader=LoadHF(path="rootsautomation/websrc", streaming=True),
1919
preprocess_steps=[
2020
RenameSplits(mapper={"train": "train", "dev": "test"}),
2121
"splitters.small_no_dev",

prepare/cards/wiki_bio.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from unitxt.test_utils.card import test_card
1212

1313
card = TaskCard(
14-
loader=LoadHF(path="wiki_bio"),
14+
loader=LoadHF(path="wiki_bio", streaming=True),
1515
preprocess_steps=[
1616
SplitRandomMix({"train": "train", "validation": "val", "test": "test"}),
1717
ListToKeyValPairs(

prepare/cards/winogrande.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55

66
for subtask in ["debiased", "l", "m", "s", "xl", "xs"]:
77
card = TaskCard(
8-
loader=LoadHF(path="winogrande", name=f"winogrande_{subtask}"),
8+
loader=LoadHF(
9+
path="winogrande", name=f"winogrande_{subtask}", revision="refs/pr/6"
10+
),
911
preprocess_steps=[
1012
"splitters.small_no_test",
1113
ListFieldValues(fields=["option1", "option2"], to_field="choices"),

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ keep-runtime-typing = true
184184
".vscode/*" = ["TID251"]
185185
"tests/*" = ["TID251"]
186186
"utils/*" = ["TID251"]
187+
"src/unitxt/api.py" = ["B904"]
187188
"src/unitxt/__init__.py" = ["F811", "F401"]
188189
"src/unitxt/metric.py" = ["F811", "F401"]
189190
"src/unitxt/dataset.py" = ["F811", "F401"]

src/unitxt/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
create_dataset,
66
evaluate,
77
infer,
8-
load,
98
load_dataset,
109
post_process,
1110
produce,

0 commit comments

Comments
 (0)