Skip to content

Commit 750a398

Browse files
committed
get the recon data too
1 parent a52976c commit 750a398

3 files changed

Lines changed: 187 additions & 13 deletions

File tree

odk-central-sync/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,28 @@ Each run writes the following into `--output-dir`:
3636
- `<center_id>/results.zip` — raw ODK Central export per center (cached; reused on reruns)
3737
- `media/` — extracted images from all centers, filenames prefixed by `center_id`
3838
- `results_export_p<project_id>_<timestamp>.zip` — upload bundle containing `candidate_results.csv` and `media/`. This is the file to upload to the results system for integration with other results. Disable with `--bundle=false`.
39+
40+
### `candidate_results.csv` columns
41+
42+
One row per candidate per submission. Columns:
43+
44+
- Candidate fields from the ODK `candidate_results` repeat:
45+
`pos`, `candidate_id`, `candidate_order`, `candidate_name`,
46+
`candidate_result_round1`, `candidate_result_round2` (flattened from the
47+
ODK `candidate_result_r2-candidate_result_round2` path),
48+
`candidate_result_r2-result_note`, `PARENT_KEY`, `KEY`.
49+
- Provenance: `xml_form_id`, `center_id`.
50+
- Submission fields joined on `PARENT_KEY``meta-instanceID`:
51+
`station_number`, `staff_user_name`, `ballot_number`, `race_type`.
52+
- **`barcode`** — the PVP scanned barcode (renamed from the ODK
53+
`intro-barcode` field). String type to preserve leading zeros.
54+
- **Reconciliation fields** — both rounds (r1 and r2) captured by the PVP
55+
device, five fields each:
56+
`reconciliation_r1-number_ballots_received_r1`,
57+
`reconciliation_r1-number_voter_cards_r1`,
58+
`reconciliation_r1-number_valid_ballots_r1`,
59+
`reconciliation_r1-number_invalid_ballots_r1`,
60+
`reconciliation_r1-number_ballots_inside_box_r1`,
61+
and the equivalent `reconciliation_r2-*` columns.
62+
- Image filenames (prefixed with `center_id`): `clerk_signature`,
63+
`forms_picture_1st_page`, `forms_picture_2nd_page`.

odk-central-sync/src/download_results_forms.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,21 @@ def export_center_candidate_results(
8383
"forms_picture_1st_page",
8484
"forms_picture_2nd_page",
8585
]
86+
# PVP reconciliation fields. PVP devices collect two passes (r1, r2) of the
87+
# reconciliation data per submission. Downstream tally-ho uses r2 but we
88+
# export both so the raw data is preserved for review.
89+
RECON_COLUMNS = [
90+
"reconciliation_r1-number_ballots_received_r1",
91+
"reconciliation_r1-number_voter_cards_r1",
92+
"reconciliation_r1-number_valid_ballots_r1",
93+
"reconciliation_r1-number_invalid_ballots_r1",
94+
"reconciliation_r1-number_ballots_inside_box_r1",
95+
"reconciliation_r2-number_ballots_received_r2",
96+
"reconciliation_r2-number_voter_cards_r2",
97+
"reconciliation_r2-number_valid_ballots_r2",
98+
"reconciliation_r2-number_invalid_ballots_r2",
99+
"reconciliation_r2-number_ballots_inside_box_r2",
100+
]
86101

87102
candidate_results: list[pd.DataFrame] = []
88103
media_dir = output_dir / "media"
@@ -114,6 +129,15 @@ def export_center_candidate_results(
114129
candidates_path = Path(f"{xml_form_id}-candidate_results.csv")
115130
console.log(f"Extracting {candidates_path}")
116131
df = pd.read_csv(archive.open(str(candidates_path)))
132+
# PVP stores the second round of candidate votes under a
133+
# hyphenated group path; flatten it to candidate_result_round2
134+
# to match the round1 column naming.
135+
df = df.rename(
136+
columns={
137+
"candidate_result_r2-candidate_result_round2":
138+
"candidate_result_round2",
139+
}
140+
)
117141
df["xml_form_id"] = xml_form_id
118142
df["center_id"] = center_id
119143

@@ -142,21 +166,32 @@ def export_center_candidate_results(
142166
if pd.notna(v) else v
143167
)
144168

169+
required_cols = [
170+
"meta-instanceID",
171+
"station_number",
172+
"staff_user_name",
173+
"ballot_number",
174+
"race_type",
175+
]
176+
optional_cols = (
177+
["intro-barcode"] + RECON_COLUMNS + IMAGE_COLUMNS
178+
)
179+
selected_cols = required_cols + [
180+
c for c in optional_cols if c in submissions_df.columns
181+
]
145182
df = df.merge(
146-
submissions_df[
147-
[
148-
"meta-instanceID",
149-
"station_number",
150-
"staff_user_name",
151-
"ballot_number",
152-
"race_type",
153-
]
154-
+ [c for c in IMAGE_COLUMNS if c in submissions_df.columns]
155-
],
183+
submissions_df[selected_cols],
156184
left_on="PARENT_KEY",
157185
right_on="meta-instanceID",
158186
how="left",
159187
).drop(columns=["meta-instanceID"])
188+
# Rename the PVP barcode column to a plain "barcode" for
189+
# downstream consumers (tally-ho upload matches on barcode).
190+
# Coerce to string so numeric-looking barcodes keep leading
191+
# zeros and match ResultForm.barcode (CharField).
192+
if "intro-barcode" in df.columns:
193+
df = df.rename(columns={"intro-barcode": "barcode"})
194+
df["barcode"] = df["barcode"].astype("string")
160195
candidate_results.append(df)
161196

162197
progress.advance(task)

odk-central-sync/tests/test_download_results_forms.py

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,32 @@ def make_test_zip(center_id, include_media=True):
1919
form_id = f"results_{center_id}"
2020

2121
candidates_csv = (
22-
"pos,candidate_id,candidate_name,candidate_result_round1,PARENT_KEY,KEY\n"
23-
f"1,101,Alice,25,uuid:abc-{center_id},uuid:abc-{center_id}/candidate_results[1]\n"
24-
f"2,102,Bob,30,uuid:abc-{center_id},uuid:abc-{center_id}/candidate_results[2]\n"
22+
"pos,candidate_id,candidate_name,"
23+
"candidate_result_round1,candidate_result_r2-candidate_result_round2,"
24+
"PARENT_KEY,KEY\n"
25+
f"1,101,Alice,25,26,uuid:abc-{center_id},"
26+
f"uuid:abc-{center_id}/candidate_results[1]\n"
27+
f"2,102,Bob,30,31,uuid:abc-{center_id},"
28+
f"uuid:abc-{center_id}/candidate_results[2]\n"
2529
)
2630
submissions_csv = (
2731
"meta-instanceID,station_number,staff_user_name,ballot_number,race_type,"
32+
"intro-barcode,"
33+
"reconciliation_r1-number_ballots_received_r1,"
34+
"reconciliation_r1-number_voter_cards_r1,"
35+
"reconciliation_r1-number_valid_ballots_r1,"
36+
"reconciliation_r1-number_invalid_ballots_r1,"
37+
"reconciliation_r1-number_ballots_inside_box_r1,"
38+
"reconciliation_r2-number_ballots_received_r2,"
39+
"reconciliation_r2-number_voter_cards_r2,"
40+
"reconciliation_r2-number_valid_ballots_r2,"
41+
"reconciliation_r2-number_invalid_ballots_r2,"
42+
"reconciliation_r2-number_ballots_inside_box_r2,"
2843
"clerk_signature,forms_picture_1st_page,forms_picture_2nd_page\n"
2944
f"uuid:abc-{center_id},3,tester,1313,Individual,"
45+
f"{center_id}003001,"
46+
"200,150,140,10,150,"
47+
"204,149,139,10,149,"
3048
f"sig_{center_id}.jpg,page1_{center_id}.jpg,\n"
3149
)
3250

@@ -197,6 +215,102 @@ def test_image_columns_have_prefixed_filenames(self, output_dir):
197215
assert df["clerk_signature"].iloc[0] == f"{center_id}_sig_{center_id}.jpg"
198216
assert df["forms_picture_1st_page"].iloc[0] == f"{center_id}_page1_{center_id}.jpg"
199217

218+
def test_extracts_barcode(self, output_dir):
219+
center_id = 100
220+
zip_bytes = make_test_zip(center_id)
221+
222+
mock_response = MagicMock()
223+
mock_response.content = zip_bytes
224+
mock_response.raise_for_status = MagicMock()
225+
226+
mock_client = MagicMock()
227+
mock_client.get.return_value = mock_response
228+
229+
df = export_center_candidate_results(
230+
client=mock_client,
231+
project_id=1,
232+
center_ids=[center_id],
233+
output_dir=output_dir,
234+
)
235+
236+
assert "barcode" in df.columns
237+
assert list(df["barcode"]) == [f"{center_id}003001"] * 2
238+
# Original intro-barcode should have been renamed
239+
assert "intro-barcode" not in df.columns
240+
241+
def test_extracts_round2_candidate_votes(self, output_dir):
242+
center_id = 100
243+
zip_bytes = make_test_zip(center_id)
244+
245+
mock_response = MagicMock()
246+
mock_response.content = zip_bytes
247+
mock_response.raise_for_status = MagicMock()
248+
249+
mock_client = MagicMock()
250+
mock_client.get.return_value = mock_response
251+
252+
df = export_center_candidate_results(
253+
client=mock_client,
254+
project_id=1,
255+
center_ids=[center_id],
256+
output_dir=output_dir,
257+
)
258+
259+
assert "candidate_result_round2" in df.columns
260+
assert list(df["candidate_result_round2"]) == [26, 31]
261+
# Original hyphenated column should have been renamed away
262+
assert (
263+
"candidate_result_r2-candidate_result_round2" not in df.columns
264+
)
265+
# Round 1 should still be present unchanged
266+
assert "candidate_result_round1" in df.columns
267+
assert list(df["candidate_result_round1"]) == [25, 30]
268+
269+
def test_extracts_reconciliation_fields(self, output_dir):
270+
center_id = 100
271+
zip_bytes = make_test_zip(center_id)
272+
273+
mock_response = MagicMock()
274+
mock_response.content = zip_bytes
275+
mock_response.raise_for_status = MagicMock()
276+
277+
mock_client = MagicMock()
278+
mock_client.get.return_value = mock_response
279+
280+
df = export_center_candidate_results(
281+
client=mock_client,
282+
project_id=1,
283+
center_ids=[center_id],
284+
output_dir=output_dir,
285+
)
286+
287+
r1_cols = [
288+
"reconciliation_r1-number_ballots_received_r1",
289+
"reconciliation_r1-number_voter_cards_r1",
290+
"reconciliation_r1-number_valid_ballots_r1",
291+
"reconciliation_r1-number_invalid_ballots_r1",
292+
"reconciliation_r1-number_ballots_inside_box_r1",
293+
]
294+
r2_cols = [
295+
"reconciliation_r2-number_ballots_received_r2",
296+
"reconciliation_r2-number_voter_cards_r2",
297+
"reconciliation_r2-number_valid_ballots_r2",
298+
"reconciliation_r2-number_invalid_ballots_r2",
299+
"reconciliation_r2-number_ballots_inside_box_r2",
300+
]
301+
for col in r1_cols + r2_cols:
302+
assert col in df.columns, f"missing reconciliation column: {col}"
303+
304+
# Spot-check the values round-trip correctly
305+
assert (
306+
df["reconciliation_r1-number_ballots_received_r1"].iloc[0] == 200
307+
)
308+
assert df["reconciliation_r1-number_valid_ballots_r1"].iloc[0] == 140
309+
assert (
310+
df["reconciliation_r2-number_ballots_received_r2"].iloc[0] == 204
311+
)
312+
assert df["reconciliation_r2-number_valid_ballots_r2"].iloc[0] == 139
313+
200314
def test_skips_existing_valid_zip(self, output_dir):
201315
center_id = 100
202316
zip_bytes = make_test_zip(center_id)

0 commit comments

Comments
 (0)