Skip to content

Commit efd8891

Browse files
committed
stricter style, zip output, upgrade
1 parent a583189 commit efd8891

5 files changed

Lines changed: 348 additions & 86 deletions

File tree

odk-central-sync/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,13 @@ uv run download-results --project-id=14
2626
- `--project-id` (required) — ODK Central project ID
2727
- `--output-dir` — Directory for output files (default: `results-output`)
2828
- `--config` — Path to pyODK config file (default: `.pyodk_config.toml`)
29+
- `--bundle` — Create a timestamped upload bundle in the output directory (default: `true`). Pass `--bundle=false` to skip.
30+
31+
## Output
32+
33+
Each run writes the following into `--output-dir`:
34+
35+
- `candidate_results.csv` — combined candidate results across all centers
36+
- `<center_id>/results.zip` — raw ODK Central export per center (cached; reused on reruns)
37+
- `media/` — extracted images from all centers, filenames prefixed by `center_id`
38+
- `results_export_p<project_id>_<timestamp>.zip` — upload bundle containing `candidate_results.csv` and `media/`. This is the file to upload to the results system for integration with other results. Disable with `--bundle=false`.

odk-central-sync/pyproject.toml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,29 @@ packages = ["src"]
2424
[dependency-groups]
2525
dev = [
2626
"pytest>=9.0.2",
27+
"ruff>=0.8",
2728
]
29+
30+
[tool.ruff]
31+
line-length = 100
32+
target-version = "py313"
33+
34+
[tool.ruff.lint]
35+
select = [
36+
"E", # pycodestyle errors
37+
"W", # pycodestyle warnings
38+
"F", # pyflakes
39+
"I", # isort
40+
"B", # flake8-bugbear
41+
"UP", # pyupgrade
42+
"SIM", # flake8-simplify
43+
"TID", # tidy imports
44+
]
45+
46+
[tool.ruff.lint.isort]
47+
force-single-line = true
48+
force-sort-within-sections = true
49+
lines-after-imports = 2
50+
51+
[tool.ruff.lint.flake8-tidy-imports]
52+
ban-relative-imports = "parents"

odk-central-sync/src/download_results_forms.py

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,22 @@
1-
import zipfile
1+
from datetime import datetime
2+
import logging
23
from pathlib import Path
4+
import zipfile
35

46
import click
57
import pandas as pd
68
from pyodk.client import Client
79
from pyodk.errors import PyODKError
8-
from requests.exceptions import ConnectionError, Timeout
10+
from requests.exceptions import ConnectionError
11+
from requests.exceptions import Timeout
912
from rich.console import Console
1013
from rich.progress import Progress
11-
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
14+
from tenacity import before_sleep_log
15+
from tenacity import retry
16+
from tenacity import retry_if_exception_type
17+
from tenacity import stop_after_attempt
18+
from tenacity import wait_exponential
1219

13-
import logging
1420

1521
log = logging.getLogger(__name__)
1622

@@ -24,7 +30,12 @@
2430
" 1) Create a pyODK config file (default: .pyodk_config.toml).\n"
2531
" https://getodk.github.io/pyodk/#configure\n\n"
2632
"Example:\n"
27-
" uv run download-results --project-id=5 11034 11035 11036"
33+
" uv run download-results --project-id=5 11034 11035 11036\n\n"
34+
"Output:\n"
35+
" - candidate_results.csv: combined results across all centers\n"
36+
" - media/: extracted images, filenames prefixed by center_id\n"
37+
" - results_export_p<project_id>_<timestamp>.zip: upload bundle\n"
38+
" (CSV + media/). Disable with --bundle=false."
2839
)
2940

3041

@@ -153,6 +164,32 @@ def export_center_candidate_results(
153164
return pd.concat(candidate_results, ignore_index=True)
154165

155166

167+
def create_upload_bundle(
168+
output_dir: Path,
169+
csv_path: Path,
170+
media_dir: Path,
171+
project_id: int,
172+
) -> Path:
173+
"""
174+
Package the candidate results CSV and media files into a timestamped ZIP
175+
for upload to the results system.
176+
177+
The bundle is written to output_dir as results_export_p<project_id>_<timestamp>.zip
178+
with no compression (ZIP_STORED) since media is already compressed.
179+
"""
180+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
181+
bundle_path = output_dir / f"results_export_p{project_id}_{timestamp}.zip"
182+
183+
with zipfile.ZipFile(bundle_path, "w", zipfile.ZIP_STORED) as zf:
184+
zf.write(csv_path, arcname=csv_path.name)
185+
if media_dir.exists():
186+
for media_file in sorted(media_dir.iterdir()):
187+
if media_file.is_file():
188+
zf.write(media_file, arcname=f"media/{media_file.name}")
189+
190+
return bundle_path
191+
192+
156193
@click.command(help=CLI_HELP, epilog=CLI_EPILOG)
157194
@click.option("--project-id", type=int, required=True, help="ODK Central project ID")
158195
@click.argument("center-ids", type=click.INT, nargs=-1)
@@ -170,7 +207,23 @@ def export_center_candidate_results(
170207
show_default=True,
171208
help="Path to pyODK config file. See https://getodk.github.io/pyodk/#configure",
172209
)
173-
def main(project_id: int, center_ids: tuple[int, ...], output_dir: Path, config: Path):
210+
@click.option(
211+
"--bundle",
212+
type=bool,
213+
default=True,
214+
show_default=True,
215+
help=(
216+
"Create a timestamped ZIP bundle (candidate_results.csv + media/) in the "
217+
"output directory for upload to the results system. Pass --bundle=false to disable."
218+
),
219+
)
220+
def main(
221+
project_id: int,
222+
center_ids: tuple[int, ...],
223+
output_dir: Path,
224+
config: Path,
225+
bundle: bool,
226+
):
174227
output_dir.mkdir(parents=True, exist_ok=True)
175228
output_csv = output_dir / "candidate_results.csv"
176229

@@ -197,6 +250,15 @@ def main(project_id: int, center_ids: tuple[int, ...], output_dir: Path, config:
197250
results_df.to_csv(output_csv, index=False)
198251
console.log(f"Saved {len(results_df)} rows to {output_csv}")
199252

253+
if bundle:
254+
bundle_path = create_upload_bundle(
255+
output_dir=output_dir,
256+
csv_path=output_csv,
257+
media_dir=output_dir / "media",
258+
project_id=project_id,
259+
)
260+
console.log(f"Created upload bundle: {bundle_path}")
261+
200262

201263
if __name__ == "__main__":
202264
main()

odk-central-sync/tests/test_download_results_forms.py

Lines changed: 144 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import io
2+
import re
3+
from unittest.mock import MagicMock
4+
from unittest.mock import patch
25
import zipfile
3-
from unittest.mock import MagicMock, patch
46

5-
import pytest
67
from click.testing import CliRunner
7-
88
from pyodk.errors import PyODKError
9+
import pytest
910

10-
from src.download_results_forms import (
11-
export_center_candidate_results,
12-
export_form_submissions,
13-
main,
14-
)
11+
from src.download_results_forms import create_upload_bundle
12+
from src.download_results_forms import export_center_candidate_results
13+
from src.download_results_forms import export_form_submissions
14+
from src.download_results_forms import main
1515

1616

1717
def make_test_zip(center_id, include_media=True):
@@ -220,6 +220,89 @@ def test_skips_existing_valid_zip(self, output_dir):
220220
assert len(df) == 2
221221

222222

223+
class TestCreateUploadBundle:
224+
def test_bundle_contains_csv_and_media(self, tmp_path):
225+
output_dir = tmp_path / "output"
226+
output_dir.mkdir()
227+
csv_path = output_dir / "candidate_results.csv"
228+
csv_path.write_text("a,b\n1,2\n")
229+
media_dir = output_dir / "media"
230+
media_dir.mkdir()
231+
(media_dir / "100_sig.jpg").write_bytes(b"img1")
232+
(media_dir / "100_page.jpg").write_bytes(b"img2")
233+
234+
bundle_path = create_upload_bundle(
235+
output_dir=output_dir,
236+
csv_path=csv_path,
237+
media_dir=media_dir,
238+
project_id=14,
239+
)
240+
241+
assert bundle_path.exists()
242+
assert bundle_path.parent == output_dir
243+
with zipfile.ZipFile(bundle_path) as zf:
244+
names = set(zf.namelist())
245+
assert "candidate_results.csv" in names
246+
assert "media/100_sig.jpg" in names
247+
assert "media/100_page.jpg" in names
248+
# ZIP_STORED = no compression
249+
for info in zf.infolist():
250+
assert info.compress_type == zipfile.ZIP_STORED
251+
252+
def test_bundle_filename_has_project_id_and_timestamp(self, tmp_path):
253+
output_dir = tmp_path / "output"
254+
output_dir.mkdir()
255+
csv_path = output_dir / "candidate_results.csv"
256+
csv_path.write_text("a,b\n1,2\n")
257+
258+
bundle_path = create_upload_bundle(
259+
output_dir=output_dir,
260+
csv_path=csv_path,
261+
media_dir=output_dir / "media",
262+
project_id=42,
263+
)
264+
265+
assert re.fullmatch(
266+
r"results_export_p42_\d{8}_\d{6}\.zip", bundle_path.name
267+
)
268+
269+
def test_bundle_handles_missing_media_dir(self, tmp_path):
270+
output_dir = tmp_path / "output"
271+
output_dir.mkdir()
272+
csv_path = output_dir / "candidate_results.csv"
273+
csv_path.write_text("a,b\n1,2\n")
274+
275+
bundle_path = create_upload_bundle(
276+
output_dir=output_dir,
277+
csv_path=csv_path,
278+
media_dir=output_dir / "media",
279+
project_id=1,
280+
)
281+
282+
assert bundle_path.exists()
283+
with zipfile.ZipFile(bundle_path) as zf:
284+
names = set(zf.namelist())
285+
assert names == {"candidate_results.csv"}
286+
287+
def test_bundle_handles_empty_media_dir(self, tmp_path):
288+
output_dir = tmp_path / "output"
289+
output_dir.mkdir()
290+
csv_path = output_dir / "candidate_results.csv"
291+
csv_path.write_text("a,b\n1,2\n")
292+
media_dir = output_dir / "media"
293+
media_dir.mkdir()
294+
295+
bundle_path = create_upload_bundle(
296+
output_dir=output_dir,
297+
csv_path=csv_path,
298+
media_dir=media_dir,
299+
project_id=1,
300+
)
301+
302+
with zipfile.ZipFile(bundle_path) as zf:
303+
assert set(zf.namelist()) == {"candidate_results.csv"}
304+
305+
223306
class TestCLI:
224307
def test_with_center_ids(self, output_dir):
225308
zip_bytes = make_test_zip(100)
@@ -272,6 +355,59 @@ def test_auto_discovers_centers(self, output_dir):
272355
mock_client.forms.list.assert_called_once_with(project_id=1)
273356
assert (output_dir / "candidate_results.csv").exists()
274357

358+
def test_creates_bundle_by_default(self, output_dir):
359+
zip_bytes = make_test_zip(100)
360+
361+
mock_response = MagicMock()
362+
mock_response.content = zip_bytes
363+
mock_response.raise_for_status = MagicMock()
364+
365+
mock_client = MagicMock()
366+
mock_client.get.return_value = mock_response
367+
mock_client.__enter__ = MagicMock(return_value=mock_client)
368+
mock_client.__exit__ = MagicMock(return_value=False)
369+
370+
runner = CliRunner()
371+
with patch("src.download_results_forms.Client", return_value=mock_client):
372+
result = runner.invoke(
373+
main,
374+
["--project-id=14", "--output-dir", str(output_dir), "100"],
375+
)
376+
377+
assert result.exit_code == 0
378+
bundles = list(output_dir.glob("results_export_p14_*.zip"))
379+
assert len(bundles) == 1
380+
381+
def test_bundle_false_skips_bundle(self, output_dir):
382+
zip_bytes = make_test_zip(100)
383+
384+
mock_response = MagicMock()
385+
mock_response.content = zip_bytes
386+
mock_response.raise_for_status = MagicMock()
387+
388+
mock_client = MagicMock()
389+
mock_client.get.return_value = mock_response
390+
mock_client.__enter__ = MagicMock(return_value=mock_client)
391+
mock_client.__exit__ = MagicMock(return_value=False)
392+
393+
runner = CliRunner()
394+
with patch("src.download_results_forms.Client", return_value=mock_client):
395+
result = runner.invoke(
396+
main,
397+
[
398+
"--project-id=14",
399+
"--output-dir",
400+
str(output_dir),
401+
"--bundle=false",
402+
"100",
403+
],
404+
)
405+
406+
assert result.exit_code == 0
407+
assert (output_dir / "candidate_results.csv").exists()
408+
bundles = list(output_dir.glob("results_export_p*.zip"))
409+
assert len(bundles) == 0
410+
275411
def test_pyodk_error_exits_gracefully(self, output_dir):
276412
mock_client = MagicMock()
277413
mock_client.__enter__ = MagicMock(return_value=mock_client)

0 commit comments

Comments
 (0)