Skip to content

Commit 11673c5

Browse files
committed
Update cli to support both zip and eml files
1 parent 5df7a39 commit 11673c5

2 files changed

Lines changed: 42 additions & 27 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ build-backend = "hatchling.build"
1818
package = true
1919

2020
[tool.hatch.build.targets.sdist.force-include]
21-
"data/zip_to_neighbourhood_2023.parquet" = "data/zip_to_neighbourhood_2023.parquet"
21+
"data/zip_to_neighbourhood_2024.parquet" = "data/zip_to_neighbourhood_2024.parquet"
2222

2323
[tool.hatch.build.targets.wheel.force-include]
24-
"data/zip_to_neighbourhood_2023.parquet" = "data/zip_to_neighbourhood_2023.parquet"
24+
"data/zip_to_neighbourhood_2024.parquet" = "data/zip_to_neighbourhood_2024.parquet"
2525

2626
[project.scripts]
2727
hcp = "hcp.cli:start"

src/hcp/cli.py

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -41,36 +41,51 @@ def start():
4141
if not neighbourhood_file.exists():
4242
raise RuntimeError("Could not find specified or bundled neighbourhood file!")
4343

44-
with ZipFile(args.data_source, "r") as outer_zipfile:
45-
# Find and extract the .eml.xml and .odt file
46-
odt_zipinfo = next(
47-
f for f in outer_zipfile.filelist if f.filename.endswith(".odt")
48-
)
49-
inner_zipinfo = next(
50-
f for f in outer_zipfile.filelist if f.filename.endswith(".zip")
51-
)
52-
with ZipFile(outer_zipfile.open(inner_zipinfo), "r") as inner_zipfile:
53-
eml_zipinfo = next(
54-
f for f in inner_zipfile.filelist if f.filename.endswith(".eml.xml")
44+
file_suffix = Path(args.data_source).suffix
45+
# If we were supplied a zip file we unpack it and use the supplied odt
46+
if file_suffix == ".zip":
47+
with ZipFile(args.data_source, "r") as outer_zipfile:
48+
# Find and extract the .eml.xml and .odt file
49+
odt_zipinfo = next(
50+
f for f in outer_zipfile.filelist if f.filename.endswith(".odt")
51+
)
52+
inner_zipinfo = next(
53+
f for f in outer_zipfile.filelist if f.filename.endswith(".zip")
5554
)
56-
inner_zipfile.extract(eml_zipinfo, extract_path)
57-
outer_zipfile.extract(odt_zipinfo, extract_path)
55+
with ZipFile(outer_zipfile.open(inner_zipinfo), "r") as inner_zipfile:
56+
eml_zipinfo = next(
57+
f for f in inner_zipfile.filelist if f.filename.endswith(".eml.xml")
58+
)
59+
inner_zipfile.extract(eml_zipinfo, extract_path)
60+
outer_zipfile.extract(odt_zipinfo, extract_path)
5861

59-
# Run HCP
62+
# Run HCP
63+
create_csv_files(
64+
path_to_xml=str(extract_path / eml_zipinfo.filename),
65+
path_to_odt=str(extract_path / odt_zipinfo.filename),
66+
path_to_neighbourhood_data=str(neighbourhood_file),
67+
dest_a="a.csv",
68+
dest_b="b.csv",
69+
dest_c="c.csv",
70+
)
71+
72+
# Clean up after ourselves
73+
remove(extract_path / eml_zipinfo.filename)
74+
remove(extract_path / odt_zipinfo.filename)
75+
try:
76+
rmdir(extract_path)
77+
except OSError as error:
78+
print(error)
79+
print(f"Not deleting {extract_path}, could not clean up")
80+
# If we are given an eml file we have nothing to unpack and don't use the odt
81+
elif file_suffix == ".xml":
6082
create_csv_files(
61-
path_to_xml=str(extract_path / eml_zipinfo.filename),
62-
path_to_odt=str(extract_path / odt_zipinfo.filename),
83+
path_to_xml=args.data_source,
84+
path_to_odt=None,
6385
path_to_neighbourhood_data=str(neighbourhood_file),
6486
dest_a="a.csv",
6587
dest_b="b.csv",
6688
dest_c="c.csv",
6789
)
68-
69-
# Clean up after ourselves
70-
remove(extract_path / eml_zipinfo.filename)
71-
remove(extract_path / odt_zipinfo.filename)
72-
try:
73-
rmdir(extract_path)
74-
except OSError as error:
75-
print(error)
76-
print(f"Not deleting {extract_path}, could not clean up")
90+
else:
91+
raise RuntimeError("Please specify either a .zip file or .eml.xml file!")

0 commit comments

Comments
 (0)