Skip to content

Commit c863ee7

Browse files
committed
Enable streaming for LoadFromHFSpace and clean up commented code
Signed-off-by: elronbandel <[email protected]>
1 parent db2b74b commit c863ee7

File tree

1 file changed

+1
-39
lines changed

1 file changed

+1
-39
lines changed

src/unitxt/loaders.py

+1-39
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,7 @@ class LoadFromHFSpace(LoadHF):
823823
use_token: Optional[bool] = None
824824
token_env: Optional[str] = None
825825
requirements_list: List[str] = ["huggingface_hub"]
826+
streaming = True
826827

827828
def _get_token(self) -> Optional[Union[bool, str]]:
828829
if self.token_env:
@@ -953,45 +954,6 @@ def load_data(self):
953954
self._map_wildcard_path_to_full_paths()
954955
self.path = self._download_data()
955956
return super().load_data()
956-
957-
# url: str
958-
959-
# _requirements_list: List[str] = ["opendatasets"]
960-
# data_classification_policy = ["public"]
961-
962-
# def verify(self):
963-
# super().verify()
964-
# if not os.path.isfile("kaggle.json"):
965-
# raise MissingKaggleCredentialsError(
966-
# "Please obtain kaggle credentials https://christianjmills.com/posts/kaggle-obtain-api-key-tutorial/ and save them to local ./kaggle.json file"
967-
# )
968-
969-
# if self.streaming:
970-
# raise NotImplementedError("LoadFromKaggle cannot load with streaming.")
971-
972-
# def prepare(self):
973-
# super().prepare()
974-
# from opendatasets import download
975-
976-
# self.downloader = download
977-
978-
# def load_iterables(self):
979-
# with TemporaryDirectory() as temp_directory:
980-
# self.downloader(self.url, temp_directory)
981-
# return hf_load_dataset(temp_directory, streaming=False)
982-
983-
# class LoadFromAPI(Loader):
984-
# """Loads data from from API"""
985-
986-
# urls: Dict[str, str]
987-
# chunksize: int = 100000
988-
# loader_limit: Optional[int] = None
989-
# streaming: bool = False
990-
991-
# def _maybe_set_classification_policy(self):
992-
# self.set_default_data_classification(["proprietary"], "when loading from API")
993-
994-
# def load_iterables(self):
995957
self.api_key = os.getenv("SQL_API_KEY", None)
996958
if not self.api_key:
997959
raise ValueError(

0 commit comments

Comments
 (0)