1313HF_DOWNLOAD_PATH = Path (__file__ ).resolve ().parents [4 ]
1414HF_DOWNLOAD_PATH = HF_DOWNLOAD_PATH / "hf_download"
1515
16+
1617@dataclass
1718class HuggingFaceAgent :
1819
@@ -38,13 +39,17 @@ def upload(self, objects, metadata, repo_id, private=False, **kwargs):
3839 metadata_file = "metadata.yaml"
3940
4041 try :
41- df = pd .DataFrame (data = [(obj [0 ], obj [2 ]['_embeddings' ], obj [2 ]['document' ]) for obj in objects ])
42+ df = pd .DataFrame (
43+ data = [(obj [0 ], obj [2 ]['_embeddings' ], obj [2 ]['document' ]) for obj in objects ])
4244 except Exception as e :
43- raise ValueError (f"Creation of Dataframe not successful: { e } " ) from e
45+ raise ValueError (
46+ f"Creation of Dataframe not successful: { e } " ) from e
4447
4548 with ExitStack () as stack :
46- tmp_parquet = stack .enter_context (tempfile .NamedTemporaryFile (suffix = ".parquet" , delete = True ))
47- tmp_yaml = stack .enter_context (tempfile .NamedTemporaryFile (suffix = ".yaml" , delete = True ))
49+ tmp_parquet = stack .enter_context (
50+ tempfile .NamedTemporaryFile (suffix = ".parquet" , delete = True ))
51+ tmp_yaml = stack .enter_context (
52+ tempfile .NamedTemporaryFile (suffix = ".yaml" , delete = True ))
4853
4954 embedding_path = tmp_parquet .name
5055 metadata_path = tmp_yaml .name
@@ -56,8 +61,8 @@ def upload(self, objects, metadata, repo_id, private=False, **kwargs):
5661 self ._create_repo (repo_id , private = private )
5762
5863 self ._upload_files (repo_id , {
59- embedding_path : repo_id + "/" + embedding_file ,
60- metadata_path : repo_id + "/" + metadata_file
64+ embedding_path : repo_id + "/" + embedding_file ,
65+ metadata_path : repo_id + "/" + metadata_file
6166 })
6267
6368 def upload_duckdb (self , objects , metadata , repo_id , private = False , ** kwargs ):
@@ -74,13 +79,17 @@ def upload_duckdb(self, objects, metadata, repo_id, private=False, **kwargs):
7479 embedding_file = "embeddings.parquet"
7580 metadata_file = "metadata.yaml"
7681 try :
77- df = pd .DataFrame (data = [(obj [0 ], obj [2 ]['_embeddings' ], obj [2 ]['documents' ]) for obj in objects ])
82+ df = pd .DataFrame (
83+ data = [(obj [0 ], obj [2 ]['_embeddings' ], obj [2 ]['documents' ]) for obj in objects ])
7884 except Exception as e :
79- raise ValueError (f"Creation of Dataframe not successful: { e } " ) from e
85+ raise ValueError (
86+ f"Creation of Dataframe not successful: { e } " ) from e
8087
8188 with ExitStack () as stack :
82- tmp_parquet = stack .enter_context (tempfile .NamedTemporaryFile (suffix = ".parquet" , delete = True ))
83- tmp_yaml = stack .enter_context (tempfile .NamedTemporaryFile (suffix = ".yaml" , delete = True ))
89+ tmp_parquet = stack .enter_context (
90+ tempfile .NamedTemporaryFile (suffix = ".parquet" , delete = True ))
91+ tmp_yaml = stack .enter_context (
92+ tempfile .NamedTemporaryFile (suffix = ".yaml" , delete = True ))
8493
8594 embedding_path = tmp_parquet .name
8695 metadata_path = tmp_yaml .name
@@ -92,8 +101,8 @@ def upload_duckdb(self, objects, metadata, repo_id, private=False, **kwargs):
92101 self ._create_repo (repo_id , private = private )
93102
94103 self ._upload_files (repo_id , {
95- embedding_path : repo_id + "/" + embedding_file ,
96- metadata_path : repo_id + "/" + metadata_file
104+ embedding_path : repo_id + "/" + embedding_file ,
105+ metadata_path : repo_id + "/" + metadata_file
97106 })
98107
99108 def _create_repo (self , repo_id : str , private : bool = False ):
@@ -104,13 +113,15 @@ def _create_repo(self, repo_id: str, private: bool = False):
104113 :param private: Whether the repository is private.
105114 """
106115 try :
107- create_repo (repo_id = repo_id , token = self .token , repo_type = "dataset" , private = private )
108- logger .info (f"Repository { repo_id } created successfully on Hugging Face." )
116+ create_repo (repo_id = repo_id , token = self .token ,
117+ repo_type = "dataset" , private = private )
118+ logger .info (
119+ f"Repository { repo_id } created successfully on Hugging Face." )
109120 except Exception as e :
110- logger .error (f"Failed to create repository { repo_id } on Hugging Face: { e } " )
121+ logger .error (
122+ f"Failed to create repository { repo_id } on Hugging Face: { e } " )
111123 raise
112124
113-
114125 def _upload_files (self , repo_id : str , files : Dict [str , str ]):
115126 """
116127 Upload files to a Hugging Face repository.
@@ -126,9 +137,11 @@ def _upload_files(self, repo_id: str, files: Dict[str, str]):
126137 repo_id = repo_id ,
127138 repo_type = "dataset" ,
128139 )
129- logger .info (f"Uploaded { local_path } to { repo_path } in { repo_id } " )
140+ logger .info (
141+ f"Uploaded { local_path } to { repo_path } in { repo_id } " )
130142 except Exception as e :
131- logger .error (f"Failed to upload files to { repo_id } on Hugging Face: { e } " )
143+ logger .error (
144+ f"Failed to upload files to { repo_id } on Hugging Face: { e } " )
132145 raise
133146
134147 def cached_download (
@@ -145,7 +158,3 @@ def cached_download(
145158 )
146159
147160 return download_path
148-
149-
150-
151-
0 commit comments