55
66import glob
77import re
8+ import warnings
89
910import dapla
1011import pandas as pd
@@ -350,6 +351,7 @@ def read_ssb_file(
350351 datatilstand : str = "" ,
351352 undermappe : str | None = None ,
352353 filetype : str = "parquet" ,
354+ columns : list [str ] | None = None ,
353355 version_number : int | None = None ,
354356 fs : dapla .gcs .GCSFileSystem | None = None ,
355357 seperator : str = ";" ,
@@ -371,6 +373,7 @@ def read_ssb_file(
371373 undermappe: Optional folder under 'datatilstand'.
372374 version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
373375 filetype: the filetype to save as. Default: 'parquet'.
376+ columns: Columns to read from the file. If None (default), all columns are read.
374377 fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
375378 seperator: the seperator to use it filetype is csv. Default: ';'.
376379 encoding: Encoding for file, base is latin1.
@@ -411,15 +414,27 @@ def read_ssb_file(
411414 if fs :
412415 # Samme som tidligere kan brukes til å lese alle filformater.
413416 with fs .open (file_path , "r" ) as f :
414- df = pd .read_csv (f , sep = seperator , encoding = encoding )
417+ df = pd .read_csv (f , sep = seperator , encoding = encoding , usecols = columns )
415418 f .close ()
416419 else :
417- df = pd .read_csv (file_path , sep = seperator , encoding = encoding )
420+ df = pd .read_csv (
421+ file_path , sep = seperator , encoding = encoding , usecols = columns
422+ )
418423 elif filetype == "parquet" :
419- df = pd .read_parquet (file_path , filesystem = fs )
424+ df = pd .read_parquet (file_path , columns = columns , filesystem = fs )
420425 elif filetype == "jsonl" :
421- df = pd .read_json (file_path , lines = True )
426+ if columns is not None :
427+ warnings .warn (
428+ f"Columns argumentet blir ignorert for { filetype } filer, hele filen vil bli lastet inn." ,
429+ stacklevel = 2 ,
430+ )
431+ df = pd .read_json (file_path , lines = False )
422432 elif filetype == "json" :
433+ if columns is not None :
434+ warnings .warn (
435+ f"Columns argumentet blir ignorert for { filetype } filer, hele filen vil bli lastet inn." ,
436+ stacklevel = 2 ,
437+ )
423438 df = pd .read_json (file_path , lines = False )
424439 # Returns pandas df.
425440 return df
0 commit comments