Skip to content

Commit 1a9726e

Browse files
authored
Merge pull request #32 from cortega26/codex/fix-publish-sub-routine-skipping-issue
Fix publish workflow skip reason visibility
2 parents 9d67f17 + 1c6c7ae commit 1a9726e

File tree

4 files changed

+255
-89
lines changed

4 files changed

+255
-89
lines changed

.github/workflows/scrape.yml

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,40 @@ jobs:
117117
id: decision
118118
run: |
119119
python - <<'PY'
120-
import json, pathlib, os
121-
payload = json.loads(pathlib.Path('artifacts/run_summary.json').read_text())
122-
value = 'true' if payload.get('publish') else 'false'
123-
with open(os.environ['GITHUB_OUTPUT'], 'a', encoding='utf-8') as handle:
124-
handle.write(f"publish={value}\n")
120+
"""Expose publish decision metadata for downstream jobs."""
121+
122+
from __future__ import annotations
123+
124+
import json
125+
import os
126+
from pathlib import Path
127+
128+
summary_path = Path("artifacts/run_summary.json")
129+
if not summary_path.exists():
130+
raise SystemExit("Run summary not found; cannot determine publish decision")
131+
132+
payload = json.loads(summary_path.read_text(encoding="utf-8"))
133+
publish_flag = bool(payload.get("publish"))
134+
decision = payload.get("decision", {}) or {}
135+
status = str(decision.get("status", "unknown"))
136+
reason = str(payload.get("publish_reason") or decision.get("reason") or "unknown")
137+
138+
outputs_path = Path(os.environ["GITHUB_OUTPUT"])
139+
with outputs_path.open("a", encoding="utf-8") as handle:
140+
handle.write(f"publish={'true' if publish_flag else 'false'}\n")
141+
handle.write(f"publish_reason={reason}\n")
142+
handle.write(f"decision_status={status}\n")
143+
144+
summary_output = os.environ.get("GITHUB_STEP_SUMMARY")
145+
if summary_output:
146+
with Path(summary_output).open("a", encoding="utf-8") as handle:
147+
handle.write("### Publish decision\n")
148+
handle.write(f"- Publish: {'yes' if publish_flag else 'no'}\n")
149+
handle.write(f"- Status: {status}\n")
150+
handle.write(f"- Reason: {reason}\n")
151+
152+
if not publish_flag:
153+
print(f"::notice::Publish job will be skipped because {reason} (status={status}).")
125154
PY
126155
127156
- name: Save pipeline state
@@ -167,29 +196,40 @@ jobs:
167196

168197
publish:
169198
needs: ingest
170-
if: needs.ingest.outputs.publish == 'true'
199+
if: needs.ingest.result == 'success'
171200
runs-on: ubuntu-latest
172201
steps:
202+
- name: Publish decision summary
203+
if: ${{ needs.ingest.outputs.publish != 'true' }}
204+
run: |
205+
echo "Publish step skipped: ${{ needs.ingest.outputs.publish_reason || 'no reason provided' }} (status=${{ needs.ingest.outputs.decision_status || 'unknown' }})"
206+
exit 0
207+
173208
- uses: actions/checkout@v4
209+
if: ${{ needs.ingest.outputs.publish == 'true' }}
174210

175211
- name: Set up Python
176212
uses: actions/setup-python@v5
213+
if: ${{ needs.ingest.outputs.publish == 'true' }}
177214
with:
178215
python-version: '3.11'
179216
cache: 'pip'
180217
cache-dependency-path: 'requirements.txt'
181218

182219
- name: Install dependencies
220+
if: ${{ needs.ingest.outputs.publish == 'true' }}
183221
run: |
184222
python -m pip install --upgrade pip
185223
pip install -r requirements.txt
186224
187225
- name: Download artifacts
226+
if: ${{ needs.ingest.outputs.publish == 'true' }}
188227
uses: actions/download-artifact@v4
189228
with:
190229
name: alt-sources-artifacts
191230

192231
- name: Publish to Google Sheets
232+
if: ${{ needs.ingest.outputs.publish == 'true' }}
193233
env:
194234
GOOGLE_SERVICE_ACCOUNT_JSON: ${{ secrets.GOOGLE_SHEETS_CREDENTIALS }}
195235
GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }}

docs/API.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ True
1414

1515
- Sources: `"pozos"` (primary + fallback) or `"openloto"` (fallback only).
1616
- `source_overrides`: case‑insensitive mapping of `{ "openloto": url, "resultadoslotochile": url }`.
17-
- Returns a run summary with `publish` boolean and artifact paths.
17+
- Returns a run summary with `publish` boolean, `publish_reason` string, and artifact paths.
1818

1919
Example:
2020

@@ -38,6 +38,7 @@ summary = run_pipeline(
3838
include_pozos=True,
3939
)
4040
print(summary["publish"]) # True/False
41+
print(summary["publish_reason"]) # e.g. "updated_or_new_amounts"
4142
```
4243

4344
## Publishing

polla_app/pipeline.py

Lines changed: 93 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -298,20 +298,21 @@ def _compute_unchanged(
298298
return False
299299

300300

301-
def _build_report_payload(
302-
*,
303-
run_id: str,
304-
generated_at: str,
305-
requested_sources: Sequence[str],
306-
timeout: int,
307-
retries: int,
308-
fail_fast: bool,
309-
sorteo: Any,
310-
fecha: Any,
311-
merged_pozos: Mapping[str, Any],
312-
record_source: Any,
313-
decision_status: str,
314-
) -> dict[str, Any]:
301+
def _build_report_payload(
302+
*,
303+
run_id: str,
304+
generated_at: str,
305+
requested_sources: Sequence[str],
306+
timeout: int,
307+
retries: int,
308+
fail_fast: bool,
309+
sorteo: Any,
310+
fecha: Any,
311+
merged_pozos: Mapping[str, Any],
312+
record_source: Any,
313+
decision_status: str,
314+
decision_reason: str,
315+
) -> dict[str, Any]:
315316
return {
316317
"run": {
317318
"id": run_id,
@@ -321,41 +322,44 @@ def _build_report_payload(
321322
"retries": retries,
322323
"fail_fast": fail_fast,
323324
},
324-
"last_draw": {"sorteo": sorteo, "fecha": fecha},
325-
"decision": {
326-
"status": decision_status,
327-
"total_categories": len(merged_pozos),
328-
"mismatched_categories": 0,
329-
},
330-
"prizes_changed": decision_status != "skip",
331-
"mismatches": [],
332-
"sources": {"pozos": {"url": record_source, "premios": 0}},
333-
"failures": [],
325+
"last_draw": {"sorteo": sorteo, "fecha": fecha},
326+
"decision": {
327+
"status": decision_status,
328+
"total_categories": len(merged_pozos),
329+
"mismatched_categories": 0,
330+
"reason": decision_reason,
331+
},
332+
"prizes_changed": decision_status != "skip",
333+
"mismatches": [],
334+
"sources": {"pozos": {"url": record_source, "premios": 0}},
335+
"failures": [],
334336
}
335337

336338

337-
def _build_summary_payload(
338-
*,
339-
run_id: str,
340-
generated_at: str,
341-
decision: Mapping[str, Any],
342-
normalized_path: Path,
343-
comparison_report_path: Path,
344-
raw_dir: Path,
345-
state_path: Path,
346-
publish_flag: bool,
347-
) -> dict[str, Any]:
348-
return {
349-
"run_id": run_id,
350-
"generated_at": generated_at,
351-
"decision": dict(decision),
352-
"prizes_changed": bool(decision.get("status") != "skip"),
353-
"normalized_path": str(normalized_path),
354-
"comparison_report": str(comparison_report_path),
355-
"raw_dir": str(raw_dir),
356-
"state_path": str(state_path),
357-
"publish": publish_flag,
358-
}
339+
def _build_summary_payload(
340+
*,
341+
run_id: str,
342+
generated_at: str,
343+
decision: Mapping[str, Any],
344+
normalized_path: Path,
345+
comparison_report_path: Path,
346+
raw_dir: Path,
347+
state_path: Path,
348+
publish_flag: bool,
349+
publish_reason: str,
350+
) -> dict[str, Any]:
351+
return {
352+
"run_id": run_id,
353+
"generated_at": generated_at,
354+
"decision": dict(decision),
355+
"prizes_changed": bool(decision.get("status") != "skip"),
356+
"normalized_path": str(normalized_path),
357+
"comparison_report": str(comparison_report_path),
358+
"raw_dir": str(raw_dir),
359+
"state_path": str(state_path),
360+
"publish": publish_flag,
361+
"publish_reason": publish_reason,
362+
}
359363

360364

361365
def _handle_pozos_only(
@@ -401,39 +405,48 @@ def _handle_pozos_only(
401405
_write_jsonl(normalized_path, [record])
402406
_write_jsonl(state_path, [record])
403407

404-
decision_status = "skip" if unchanged else "publish"
405-
publish_flag = not unchanged
406-
if force_publish and unchanged:
407-
decision_status = "publish_forced"
408-
publish_flag = True
408+
if unchanged:
409+
decision_status = "skip"
410+
publish_flag = False
411+
publish_reason = "sorteo_fecha_and_amounts_unchanged"
412+
else:
413+
decision_status = "publish"
414+
publish_flag = True
415+
publish_reason = "updated_or_new_amounts"
416+
if force_publish and unchanged:
417+
decision_status = "publish_forced"
418+
publish_flag = True
419+
publish_reason = "force_publish_requested"
409420

410421
generated_at = datetime.now(timezone.utc).isoformat()
411-
report_payload = _build_report_payload(
412-
run_id=run_id,
413-
generated_at=generated_at,
414-
requested_sources=requested_sources,
415-
timeout=timeout,
416-
retries=retries,
417-
fail_fast=fail_fast,
418-
sorteo=sorteo,
419-
fecha=fecha,
420-
merged_pozos=merged_pozos,
421-
record_source=record["fuente"],
422-
decision_status=decision_status,
423-
)
422+
report_payload = _build_report_payload(
423+
run_id=run_id,
424+
generated_at=generated_at,
425+
requested_sources=requested_sources,
426+
timeout=timeout,
427+
retries=retries,
428+
fail_fast=fail_fast,
429+
sorteo=sorteo,
430+
fecha=fecha,
431+
merged_pozos=merged_pozos,
432+
record_source=record["fuente"],
433+
decision_status=decision_status,
434+
decision_reason=publish_reason,
435+
)
424436
report_payload["api_version"] = API_VERSION
425437
_write_json(comparison_report_path, report_payload)
426438

427-
summary_payload = _build_summary_payload(
428-
run_id=run_id,
429-
generated_at=generated_at,
430-
decision=report_payload["decision"],
431-
normalized_path=normalized_path,
432-
comparison_report_path=comparison_report_path,
433-
raw_dir=raw_dir,
434-
state_path=state_path,
435-
publish_flag=publish_flag,
436-
)
439+
summary_payload = _build_summary_payload(
440+
run_id=run_id,
441+
generated_at=generated_at,
442+
decision=report_payload["decision"],
443+
normalized_path=normalized_path,
444+
comparison_report_path=comparison_report_path,
445+
raw_dir=raw_dir,
446+
state_path=state_path,
447+
publish_flag=publish_flag,
448+
publish_reason=publish_reason,
449+
)
437450
summary_payload["api_version"] = API_VERSION
438451

439452
log_event(
@@ -449,14 +462,12 @@ def _handle_pozos_only(
449462
{
450463
"event": "pipeline_complete",
451464
"run_id": run_id,
452-
"decision": decision_status,
453-
"mismatch_ratio": 0.0,
454-
"prizes_changed": not unchanged,
455-
"reason": (
456-
"sorteo_fecha_and_amounts_unchanged" if unchanged else "updated_or_new_amounts"
457-
),
458-
}
459-
)
465+
"decision": decision_status,
466+
"mismatch_ratio": 0.0,
467+
"prizes_changed": not unchanged,
468+
"reason": publish_reason,
469+
}
470+
)
460471
metric(
461472
"pipeline_run",
462473
log_event,

0 commit comments

Comments
 (0)