Skip to content

Commit 5834cf7

Browse files
authored
Merge pull request #103 from statisticsnorway/hvr_savings_kolonne
Hvr savings kolonne
2 parents 1565aae + 64be4da commit 5834cf7

File tree

1 file changed

+19
-4
lines changed

1 file changed

+19
-4
lines changed

src/ssb_konjunk/saving.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import glob
77
import re
8+
import warnings
89

910
import dapla
1011
import pandas as pd
@@ -350,6 +351,7 @@ def read_ssb_file(
350351
datatilstand: str = "",
351352
undermappe: str | None = None,
352353
filetype: str = "parquet",
354+
columns: list[str] | None = None,
353355
version_number: int | None = None,
354356
fs: dapla.gcs.GCSFileSystem | None = None,
355357
seperator: str = ";",
@@ -371,6 +373,7 @@ def read_ssb_file(
371373
undermappe: Optional folder under 'datatilstand'.
372374
version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
373375
filetype: the filetype to save as. Default: 'parquet'.
376+
columns: Columns to read from the file. If None (default), all columns are read.
374377
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
375378
seperator: the seperator to use it filetype is csv. Default: ';'.
376379
encoding: Encoding for file, base is latin1.
@@ -411,15 +414,27 @@ def read_ssb_file(
411414
if fs:
412415
# Samme som tidligere kan brukes til å lese alle filformater.
413416
with fs.open(file_path, "r") as f:
414-
df = pd.read_csv(f, sep=seperator, encoding=encoding)
417+
df = pd.read_csv(f, sep=seperator, encoding=encoding, usecols=columns)
415418
f.close()
416419
else:
417-
df = pd.read_csv(file_path, sep=seperator, encoding=encoding)
420+
df = pd.read_csv(
421+
file_path, sep=seperator, encoding=encoding, usecols=columns
422+
)
418423
elif filetype == "parquet":
419-
df = pd.read_parquet(file_path, filesystem=fs)
424+
df = pd.read_parquet(file_path, columns=columns, filesystem=fs)
420425
elif filetype == "jsonl":
421-
df = pd.read_json(file_path, lines=True)
426+
if columns is not None:
427+
warnings.warn(
428+
f"Columns argumentet blir ignorert for {filetype} filer, hele filen vil bli lastet inn.",
429+
stacklevel=2,
430+
)
431+
df = pd.read_json(file_path, lines=False)
422432
elif filetype == "json":
433+
if columns is not None:
434+
warnings.warn(
435+
f"Columns argumentet blir ignorert for {filetype} filer, hele filen vil bli lastet inn.",
436+
stacklevel=2,
437+
)
423438
df = pd.read_json(file_path, lines=False)
424439
# Returns pandas df.
425440
return df

0 commit comments

Comments
 (0)