77import re
88import warnings
99
10- import dapla
1110import pandas as pd
1211
1312from ssb_konjunk import timestamp
@@ -40,7 +39,6 @@ def _structure_ssb_filepath(
4039 undermappe : str | None = None ,
4140 version_number : int | None = None ,
4241 filetype : str = "parquet" ,
43- fs : dapla .gcs .GCSFileSystem | None = None ,
4442) -> str :
4543 """Structure the name of the file to SSB-format and the path.
4644
@@ -54,19 +52,14 @@ def _structure_ssb_filepath(
5452 undermappe: Optional string for if you want folders betwen 'datatilstand' and file.
5553 version_number: Optional int for reading specific file.
5654 filetype: String with default 'parquet', specifies file type.
57- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
5855
5956 Returns:
6057 str: the full path to the file.
6158
6259 Raises:
6360 ValueError: Raise if version number is not None or int.
6461 """
65- # Handle that path starts with / in prodsonen.
66- if fs is None :
67- bucket = _remove_edge_slashes (bucket , only_last = True )
68- else :
69- bucket = _remove_edge_slashes (bucket )
62+ bucket = _remove_edge_slashes (bucket )
7063 kortnavn = _remove_edge_slashes (kortnavn )
7164 datatilstand = _remove_edge_slashes (datatilstand )
7265 file_name = _remove_edge_slashes (file_name )
@@ -96,17 +89,12 @@ def _structure_ssb_filepath(
9689 return file_path
9790
9891
99- def _get_files (
100- folder_path : str , filetype : str , fs : dapla .gcs .GCSFileSystem | None
101- ) -> list [str ]:
92+ def _get_files (folder_path : str , filetype : str ) -> list [str ]:
10293 """Function to list files in a folder based on base name and timestamp."""
10394 filenames = []
10495
10596 match_string = f"{ folder_path } *"
106- if fs :
107- filenames = fs .glob (match_string )
108- else :
109- filenames = glob .glob (match_string )
97+ filenames = glob .glob (match_string )
11098
11199 # Only include files with the relevant file extension
112100 filenames = [i for i in filenames if i .endswith (filetype )]
@@ -238,28 +226,16 @@ def _save_df(
238226 df : pd .DataFrame ,
239227 file_path : str ,
240228 filetype : str ,
241- fs : dapla .gcs .GCSFileSystem | None ,
242229 seperator : str ,
243230 encoding : str ,
244231) -> None :
245232 """Do the actual saving, either as csv or parquet."""
246233 # Save as parquet
247234 if filetype == "parquet" :
248-
249- if fs :
250- with fs .open (file_path , "wb" ) as f :
251- df .to_parquet (f , index = False )
252- f .close ()
253- else :
254- df .to_parquet (file_path , index = False )
235+ df .to_parquet (file_path , index = False )
255236 # Save as csv
256237 elif filetype == "csv" :
257- if fs :
258- with fs .open (file_path , "wb" ) as f :
259- df .to_csv (f , sep = seperator , index = False , encoding = encoding )
260- f .close ()
261- else :
262- df .to_csv (file_path , sep = seperator , index = False , encoding = encoding )
238+ df .to_csv (file_path , sep = seperator , index = False , encoding = encoding )
263239 # Save as jsonl
264240 elif filetype == "jsonl" :
265241 df .to_json (file_path , orient = "records" , lines = True )
@@ -286,7 +262,6 @@ def write_ssb_file(
286262 undermappe : str | None = None ,
287263 stable_version : bool = True ,
288264 filetype : str = "parquet" ,
289- fs : dapla .gcs .GCSFileSystem | None = None ,
290265 seperator : str = ";" ,
291266 encoding : str = "latin1" ,
292267) -> None :
@@ -303,7 +278,6 @@ def write_ssb_file(
303278 undermappe: Optional folder under 'datatilstand'.
304279 stable_version: Bool for whether you should have checks in place in case of overwrite.
305280 filetype: the filetype to save as. Default: 'parquet'.
306- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
307281 seperator: the seperator to use it filetype is csv. Default: ';'.
308282 encoding: Encoding for file, base is latin1.
309283
@@ -327,10 +301,9 @@ def write_ssb_file(
327301 datatilstand = datatilstand ,
328302 file_name = file_name ,
329303 undermappe = undermappe ,
330- fs = fs ,
331304 )
332305 # Get list with the filenames, if several, ordered by the highest version number at last.
333- files = _get_files (file_path , filetype , fs = fs )
306+ files = _get_files (file_path , filetype )
334307 # Find version number/decide whether to overwrite or make new version.
335308 version_number = _find_version_number (files , stable_version )
336309
@@ -339,7 +312,7 @@ def write_ssb_file(
339312 file_path = file_path [:- 1 ]
340313 file_path = f"{ file_path } _v{ version_number } .{ filetype } "
341314
342- _save_df (df , file_path , filetype , fs , seperator , encoding )
315+ _save_df (df , file_path , filetype , seperator , encoding )
343316
344317
345318def read_ssb_file (
@@ -353,7 +326,6 @@ def read_ssb_file(
353326 filetype : str = "parquet" ,
354327 columns : list [str ] | None = None ,
355328 version_number : int | None = None ,
356- fs : dapla .gcs .GCSFileSystem | None = None ,
357329 seperator : str = ";" ,
358330 encoding : str = "latin1" ,
359331) -> pd .DataFrame | None :
@@ -374,7 +346,6 @@ def read_ssb_file(
374346 version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
375347 filetype: the filetype to save as. Default: 'parquet'.
376348 columns: Columns to read from the file. If None (default), all columns are read.
377- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
378349 seperator: the seperator to use it filetype is csv. Default: ';'.
379350 encoding: Encoding for file, base is latin1.
380351
@@ -395,12 +366,11 @@ def read_ssb_file(
395366 undermappe = undermappe ,
396367 version_number = version_number ,
397368 filetype = filetype ,
398- fs = fs ,
399369 )
400370
401371 if not version_number :
402372 # If version number not specified then list out versions.
403- files = _get_files (file_path , filetype , fs = fs )
373+ files = _get_files (file_path , filetype )
404374 # If list is empty, no matching files of any version were found.
405375 if not files :
406376 raise FileNotFoundError (
@@ -411,17 +381,9 @@ def read_ssb_file(
411381
412382 # Different functions used for reading depending on the filetype.
413383 if filetype == "csv" :
414- if fs :
415- # Samme som tidligere kan brukes til å lese alle filformater.
416- with fs .open (file_path , "r" ) as f :
417- df = pd .read_csv (f , sep = seperator , encoding = encoding , usecols = columns )
418- f .close ()
419- else :
420- df = pd .read_csv (
421- file_path , sep = seperator , encoding = encoding , usecols = columns
422- )
384+ df = pd .read_csv (file_path , sep = seperator , encoding = encoding , usecols = columns )
423385 elif filetype == "parquet" :
424- df = pd .read_parquet (file_path , columns = columns , filesystem = fs )
386+ df = pd .read_parquet (file_path , columns = columns )
425387 elif filetype == "jsonl" :
426388 if columns is not None :
427389 warnings .warn (
0 commit comments