@@ -823,6 +823,7 @@ class LoadFromHFSpace(LoadHF):
823
823
use_token : Optional [bool ] = None
824
824
token_env : Optional [str ] = None
825
825
requirements_list : List [str ] = ["huggingface_hub" ]
826
+ streaming = True
826
827
827
828
def _get_token (self ) -> Optional [Union [bool , str ]]:
828
829
if self .token_env :
@@ -953,45 +954,6 @@ def load_data(self):
953
954
self ._map_wildcard_path_to_full_paths ()
954
955
self .path = self ._download_data ()
955
956
return super ().load_data ()
956
-
957
- # url: str
958
-
959
- # _requirements_list: List[str] = ["opendatasets"]
960
- # data_classification_policy = ["public"]
961
-
962
- # def verify(self):
963
- # super().verify()
964
- # if not os.path.isfile("kaggle.json"):
965
- # raise MissingKaggleCredentialsError(
966
- # "Please obtain kaggle credentials https://christianjmills.com/posts/kaggle-obtain-api-key-tutorial/ and save them to local ./kaggle.json file"
967
- # )
968
-
969
- # if self.streaming:
970
- # raise NotImplementedError("LoadFromKaggle cannot load with streaming.")
971
-
972
- # def prepare(self):
973
- # super().prepare()
974
- # from opendatasets import download
975
-
976
- # self.downloader = download
977
-
978
- # def load_iterables(self):
979
- # with TemporaryDirectory() as temp_directory:
980
- # self.downloader(self.url, temp_directory)
981
- # return hf_load_dataset(temp_directory, streaming=False)
982
-
983
- # class LoadFromAPI(Loader):
984
- # """Loads data from from API"""
985
-
986
- # urls: Dict[str, str]
987
- # chunksize: int = 100000
988
- # loader_limit: Optional[int] = None
989
- # streaming: bool = False
990
-
991
- # def _maybe_set_classification_policy(self):
992
- # self.set_default_data_classification(["proprietary"], "when loading from API")
993
-
994
- # def load_iterables(self):
995
957
self .api_key = os .getenv ("SQL_API_KEY" , None )
996
958
if not self .api_key :
997
959
raise ValueError (
0 commit comments