1
1
from __future__ import annotations
2
- from typing import Any , Dict , Optional , Tuple
2
+ from typing import Optional
3
3
4
4
import pyarrow
5
5
from deltacat .constants import DELTACAT_CATALOG_PROPERTY_ROOT
6
+
6
7
from deltacat .utils .filesystem import resolve_path_and_filesystem
7
8
8
9
"""
@@ -142,8 +143,6 @@ def __init__(
142
143
)
143
144
self ._root = resolved_root
144
145
self ._filesystem = resolved_filesystem
145
-
146
- self ._dataset_cache : Dict [Tuple [str , str ], Any ] = {}
147
146
148
147
@property
149
148
def root (self ) -> str :
@@ -152,104 +151,3 @@ def root(self) -> str:
152
151
@property
153
152
def filesystem (self ) -> Optional [pyarrow .fs .FileSystem ]:
154
153
return self ._filesystem
155
-
156
- def cache_dataset (self , dataset , namespace : str ) -> None :
157
- """
158
- Cache a dataset for faster retrieval.
159
-
160
- Args:
161
- dataset: The dataset to cache
162
- namespace: The namespace the dataset belongs to
163
- """
164
- self ._dataset_cache [(namespace , dataset .dataset_name )] = dataset
165
-
166
- def get_cached_dataset (self , namespace : str , table_name : str ) -> Optional [Any ]:
167
- """
168
- Retrieve a cached dataset if available.
169
-
170
- Args:
171
- namespace: The namespace
172
- table_name: The table name
173
-
174
- Returns:
175
- The cached dataset or None if not found
176
- """
177
- return self ._dataset_cache .get ((namespace , table_name ))
178
-
179
- def remove_dataset_from_cache (self , namespace : str , table_name : str ) -> None :
180
- """
181
- Remove a specific dataset from the cache.
182
-
183
- Args:
184
- namespace: The namespace
185
- table_name: The table name
186
- """
187
- if (namespace , table_name ) in self ._dataset_cache :
188
- del self ._dataset_cache [(namespace , table_name )]
189
-
190
- def remove_namespace_from_cache (self , namespace : str ) -> None :
191
- """
192
- Remove all datasets for a specific namespace from the cache.
193
-
194
- Args:
195
- namespace: The namespace to remove
196
- """
197
- keys_to_remove = []
198
- for (ns , table ), _ in self ._dataset_cache .items ():
199
- if ns == namespace :
200
- keys_to_remove .append ((ns , table ))
201
-
202
- for key in keys_to_remove :
203
- del self ._dataset_cache [key ]
204
-
205
- def rename_dataset_in_cache (self , namespace : str , old_table_name : str , new_table_name : str ) -> None :
206
- """
207
- Rename a dataset in the cache.
208
-
209
- Args:
210
- namespace: The namespace
211
- old_table_name: The current table name
212
- new_table_name: The new table name
213
- """
214
- if (namespace , old_table_name ) in self ._dataset_cache :
215
- dataset = self ._dataset_cache [(namespace , old_table_name )]
216
- # Update the dataset's name property if available
217
- if hasattr (dataset , 'dataset_name' ):
218
- dataset .dataset_name = new_table_name
219
- # Store with new key
220
- self ._dataset_cache [(namespace , new_table_name )] = dataset
221
- # Remove old entry
222
- del self ._dataset_cache [(namespace , old_table_name )]
223
-
224
- def rename_namespace_in_cache (self , old_namespace : str , new_namespace : str ) -> None :
225
- """
226
- Rename a namespace in the cache, updating all associated datasets.
227
-
228
- Args:
229
- old_namespace: The current namespace name
230
- new_namespace: The new namespace name
231
- """
232
- datasets_to_rename = []
233
-
234
- # Find all datasets in the cache that belong to this namespace
235
- for (ns , table_name ), dataset in self ._dataset_cache .items ():
236
- if ns == old_namespace :
237
- datasets_to_rename .append ((table_name , dataset ))
238
-
239
- # Remove old entries and add with new namespace
240
- for table_name , dataset in datasets_to_rename :
241
- del self ._dataset_cache [(old_namespace , table_name )]
242
- self ._dataset_cache [(new_namespace , table_name )] = dataset
243
-
244
- def clear_dataset_cache (self ) -> None :
245
- """Clear the entire dataset cache"""
246
- self ._dataset_cache .clear ()
247
-
248
- def get_dataset_cache_size (self ) -> int :
249
- """
250
- Get the number of datasets in the cache.
251
-
252
- Returns:
253
- int: The number of cached datasets
254
- """
255
- return len (self ._dataset_cache )
0 commit comments