Skip to content

Commit 78fc482

Browse files
committed
Turn off streaming for huggingface datasets and set cache to be off
Signed-off-by: elronbandel <[email protected]>
1 parent fddf5e3 commit 78fc482

File tree

3 files changed

+11
-10
lines changed

3 files changed

+11
-10
lines changed

src/unitxt/loaders.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ class LoadHF(Loader):
210210
Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]
211211
] = None
212212
revision: Optional[str] = None
213-
streaming: bool = True
213+
streaming: bool = False
214214
filtering_lambda: Optional[str] = None
215215
num_proc: Optional[int] = None
216216
requirements_list: List[str] = OptionalField(default_factory=list)
@@ -308,11 +308,12 @@ def _maybe_set_classification_policy(self):
308308
)
309309

310310
def load_iterables(self) -> IterableDatasetDict:
311-
try:
312-
dataset = self.stream_dataset()
313-
except (
314-
NotImplementedError
315-
): # streaming is not supported for zipped files so we load without streaming
311+
if self.streaming:
312+
try:
313+
dataset = self.stream_dataset()
314+
except NotImplementedError: # streaming is not supported for zipped files so we load without streaming
315+
dataset = self.load_dataset()
316+
else:
316317
dataset = self.load_dataset()
317318

318319
if self.filtering_lambda is not None:

src/unitxt/settings_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def __getattr__(self, key):
149149
settings.skip_artifacts_prepare_and_verify = (bool, False)
150150
settings.data_classification_policy = None
151151
settings.mock_inference_mode = (bool, False)
152-
settings.disable_hf_datasets_cache = (bool, True)
152+
settings.disable_hf_datasets_cache = (bool, False)
153153
settings.loader_cache_size = (int, 1)
154154
settings.task_data_as_text = (bool, True)
155155
settings.default_provider = "watsonx"
@@ -193,5 +193,5 @@ def get_settings() -> Settings:
193193
return Settings()
194194

195195

196-
def get_constants():
196+
def get_constants() -> Constants:
197197
return Constants()

utils/.secrets.baseline

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@
151151
"filename": "src/unitxt/loaders.py",
152152
"hashed_secret": "840268f77a57d5553add023cfa8a4d1535f49742",
153153
"is_verified": false,
154-
"line_number": 531,
154+
"line_number": 532,
155155
"is_secret": false
156156
}
157157
],
@@ -184,5 +184,5 @@
184184
}
185185
]
186186
},
187-
"generated_at": "2025-01-26T10:03:47Z"
187+
"generated_at": "2025-01-28T08:40:45Z"
188188
}

0 commit comments

Comments
 (0)