Skip to content

Commit 722cef9

Browse files
authored
Remove dapla toolbelt (#106)
* Removed dapla-toolbelt * Removed dapla-toolbelt in saving.py * Removed dapla-toolbelt * Remove dapla-toolbelt * pre-commit * Docstring * Breaking changes 2.0.0 * Added typehinting function in fame.py
1 parent 0a2d70c commit 722cef9

File tree

6 files changed

+68
-2154
lines changed

6 files changed

+68
-2154
lines changed

poetry.lock

Lines changed: 48 additions & 2078 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "ssb-konjunk"
3-
version = "1.0.1"
3+
version = "2.0.0"
44
description = "SSB Konjunk 422"
55
authors = ["Johanne Saxegaard <jox@ssb.no>"]
66
license = "MIT"
@@ -18,7 +18,6 @@ python = ">=3.10, <4.0"
1818
click = ">=8.0.1"
1919
pandas = ">=2.2.0"
2020
pendulum = ">=3.0.0"
21-
dapla-toolbelt = ">=3.0.0"
2221
pandas-stubs = ">=2.2.2.240807"
2322

2423
[tool.poetry.group.dev.dependencies]
@@ -35,7 +34,6 @@ pytest = ">=6.2.5"
3534
sphinx = ">=6.2.1"
3635
sphinx-autobuild = ">=2021.3.14"
3736
sphinx-autodoc-typehints = ">=1.24.0"
38-
sphinx-click = ">=3.0.2"
3937
typeguard = ">=2.13.3"
4038
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
4139
myst-parser = { version = ">=0.16.1" }

src/ssb_konjunk/fame.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@
66
"""
77

88
# Importing external packages
9-
import pandas as pd
10-
from dapla import FileClient
9+
from __future__ import annotations
1110

12-
# Getting filesystem
13-
fs = FileClient.get_gcs_file_system()
11+
import pandas as pd
1412

1513

1614
def change_date_format_fame(series: pd.Series[str]) -> pd.Series[str]:
@@ -34,17 +32,17 @@ def write_out_fame_format_txt(
3432
names: pd.Series[str],
3533
dates: pd.Series[str],
3634
values: pd.Series[float],
37-
gcp_path: str,
35+
path: str,
3836
) -> None:
3937
"""Function to write out txt file in fame format.
4038
4139
Args:
4240
names: Pandas series containing name or type for value.
4341
dates: Pandas series containing date for values.
4442
values: Pandas series containing values.
45-
gcp_path: String to google cloud.
43+
path: String to output file.
4644
"""
47-
with fs.open(gcp_path, "w") as f:
45+
with open(path, "w") as f:
4846
# Write data rows
4947
for name, date, value in zip(names, dates, values, strict=False):
5048
# Apply format specification

src/ssb_konjunk/saving.py

Lines changed: 10 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import re
88
import warnings
99

10-
import dapla
1110
import pandas as pd
1211

1312
from ssb_konjunk import timestamp
@@ -40,7 +39,6 @@ def _structure_ssb_filepath(
4039
undermappe: str | None = None,
4140
version_number: int | None = None,
4241
filetype: str = "parquet",
43-
fs: dapla.gcs.GCSFileSystem | None = None,
4442
) -> str:
4543
"""Structure the name of the file to SSB-format and the path.
4644
@@ -54,19 +52,14 @@ def _structure_ssb_filepath(
5452
undermappe: Optional string for if you want folders betwen 'datatilstand' and file.
5553
version_number: Optional int for reading specific file.
5654
filetype: String with default 'parquet', specifies file type.
57-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
5855
5956
Returns:
6057
str: the full path to the file.
6158
6259
Raises:
6360
ValueError: Raise if version number is not None or int.
6461
"""
65-
# Handle that path starts with / in prodsonen.
66-
if fs is None:
67-
bucket = _remove_edge_slashes(bucket, only_last=True)
68-
else:
69-
bucket = _remove_edge_slashes(bucket)
62+
bucket = _remove_edge_slashes(bucket)
7063
kortnavn = _remove_edge_slashes(kortnavn)
7164
datatilstand = _remove_edge_slashes(datatilstand)
7265
file_name = _remove_edge_slashes(file_name)
@@ -96,17 +89,12 @@ def _structure_ssb_filepath(
9689
return file_path
9790

9891

99-
def _get_files(
100-
folder_path: str, filetype: str, fs: dapla.gcs.GCSFileSystem | None
101-
) -> list[str]:
92+
def _get_files(folder_path: str, filetype: str) -> list[str]:
10293
"""Function to list files in a folder based on base name and timestamp."""
10394
filenames = []
10495

10596
match_string = f"{folder_path}*"
106-
if fs:
107-
filenames = fs.glob(match_string)
108-
else:
109-
filenames = glob.glob(match_string)
97+
filenames = glob.glob(match_string)
11098

11199
# Only include files with the relevant file extension
112100
filenames = [i for i in filenames if i.endswith(filetype)]
@@ -238,28 +226,16 @@ def _save_df(
238226
df: pd.DataFrame,
239227
file_path: str,
240228
filetype: str,
241-
fs: dapla.gcs.GCSFileSystem | None,
242229
seperator: str,
243230
encoding: str,
244231
) -> None:
245232
"""Do the actual saving, either as csv or parquet."""
246233
# Save as parquet
247234
if filetype == "parquet":
248-
249-
if fs:
250-
with fs.open(file_path, "wb") as f:
251-
df.to_parquet(f, index=False)
252-
f.close()
253-
else:
254-
df.to_parquet(file_path, index=False)
235+
df.to_parquet(file_path, index=False)
255236
# Save as csv
256237
elif filetype == "csv":
257-
if fs:
258-
with fs.open(file_path, "wb") as f:
259-
df.to_csv(f, sep=seperator, index=False, encoding=encoding)
260-
f.close()
261-
else:
262-
df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
238+
df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
263239
# Save as jsonl
264240
elif filetype == "jsonl":
265241
df.to_json(file_path, orient="records", lines=True)
@@ -286,7 +262,6 @@ def write_ssb_file(
286262
undermappe: str | None = None,
287263
stable_version: bool = True,
288264
filetype: str = "parquet",
289-
fs: dapla.gcs.GCSFileSystem | None = None,
290265
seperator: str = ";",
291266
encoding: str = "latin1",
292267
) -> None:
@@ -303,7 +278,6 @@ def write_ssb_file(
303278
undermappe: Optional folder under 'datatilstand'.
304279
stable_version: Bool for whether you should have checks in place in case of overwrite.
305280
filetype: the filetype to save as. Default: 'parquet'.
306-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
307281
seperator: the seperator to use it filetype is csv. Default: ';'.
308282
encoding: Encoding for file, base is latin1.
309283
@@ -327,10 +301,9 @@ def write_ssb_file(
327301
datatilstand=datatilstand,
328302
file_name=file_name,
329303
undermappe=undermappe,
330-
fs=fs,
331304
)
332305
# Get list with the filenames, if several, ordered by the highest version number at last.
333-
files = _get_files(file_path, filetype, fs=fs)
306+
files = _get_files(file_path, filetype)
334307
# Find version number/decide whether to overwrite or make new version.
335308
version_number = _find_version_number(files, stable_version)
336309

@@ -339,7 +312,7 @@ def write_ssb_file(
339312
file_path = file_path[:-1]
340313
file_path = f"{file_path}_v{version_number}.{filetype}"
341314

342-
_save_df(df, file_path, filetype, fs, seperator, encoding)
315+
_save_df(df, file_path, filetype, seperator, encoding)
343316

344317

345318
def read_ssb_file(
@@ -353,7 +326,6 @@ def read_ssb_file(
353326
filetype: str = "parquet",
354327
columns: list[str] | None = None,
355328
version_number: int | None = None,
356-
fs: dapla.gcs.GCSFileSystem | None = None,
357329
seperator: str = ";",
358330
encoding: str = "latin1",
359331
) -> pd.DataFrame | None:
@@ -374,7 +346,6 @@ def read_ssb_file(
374346
version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
375347
filetype: the filetype to save as. Default: 'parquet'.
376348
columns: Columns to read from the file. If None (default), all columns are read.
377-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
378349
seperator: the seperator to use it filetype is csv. Default: ';'.
379350
encoding: Encoding for file, base is latin1.
380351
@@ -395,12 +366,11 @@ def read_ssb_file(
395366
undermappe=undermappe,
396367
version_number=version_number,
397368
filetype=filetype,
398-
fs=fs,
399369
)
400370

401371
if not version_number:
402372
# If version number not specified then list out versions.
403-
files = _get_files(file_path, filetype, fs=fs)
373+
files = _get_files(file_path, filetype)
404374
# If list is empty, no matching files of any version were found.
405375
if not files:
406376
raise FileNotFoundError(
@@ -411,17 +381,9 @@ def read_ssb_file(
411381

412382
# Different functions used for reading depending on the filetype.
413383
if filetype == "csv":
414-
if fs:
415-
# Samme som tidligere kan brukes til å lese alle filformater.
416-
with fs.open(file_path, "r") as f:
417-
df = pd.read_csv(f, sep=seperator, encoding=encoding, usecols=columns)
418-
f.close()
419-
else:
420-
df = pd.read_csv(
421-
file_path, sep=seperator, encoding=encoding, usecols=columns
422-
)
384+
df = pd.read_csv(file_path, sep=seperator, encoding=encoding, usecols=columns)
423385
elif filetype == "parquet":
424-
df = pd.read_parquet(file_path, columns=columns, filesystem=fs)
386+
df = pd.read_parquet(file_path, columns=columns)
425387
elif filetype == "jsonl":
426388
if columns is not None:
427389
warnings.warn(

src/ssb_konjunk/xml_handling.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,19 @@
22

33
import xml.etree.ElementTree as ET
44

5-
import dapla
65

7-
8-
def read_xml(xml_file: str, fs: dapla.gcs.GCSFileSystem | None = None) -> ET.Element:
6+
def read_xml(xml_file: str) -> ET.Element:
97
"""Funtion to get xml root from disk.
108
119
Args:
1210
xml_file: Strin value for xml filepath.
13-
fs: filesystem
1411
1512
Returns:
1613
ET.Element: Root of xml file.
1714
"""
18-
if fs:
19-
with fs.open(xml_file, mode="r") as file:
20-
single_xml = file.read()
21-
file.close()
22-
else:
23-
with open(xml_file) as file:
24-
single_xml = file.read()
25-
file.close()
15+
with open(xml_file) as file:
16+
single_xml = file.read()
17+
file.close()
2618

2719
return ET.fromstring(single_xml)
2820

tests/test_saving.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,6 @@ def test_find_version_number() -> None:
120120

121121
assert _find_version_number(files, stable_version=False) == "0"
122122

123-
# Need to pass input, first n and then y. For running: pytest -s
124-
# assert _find_version_number(files, stable_version=True) == '3'
125-
126123

127124
def test_verify_base_filename() -> None:
128125
"""Test function _verify_base_filename."""
@@ -152,9 +149,6 @@ def test_verify_datatilstand() -> None:
152149
assert _verify_datatilstand("utdata") == "utdata"
153150
assert _verify_datatilstand("klargjorte-data") == "klargjorte-data"
154151

155-
# Need to pass input, inndata. For running: pytest -s
156-
# assert _verify_datatilstand('overnatting') == 'inndata'
157-
158152

159153
def test_verify_list_filtypes() -> None:
160154
"""Test of function _structure_ssb_filepath without version number."""

0 commit comments

Comments
 (0)