-
Notifications
You must be signed in to change notification settings - Fork 0
outbox event publishing failure, queue management in erroneous cases #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -36,3 +36,8 @@ class AEMPackEventConfig(BaseSettings): | |||||
|
|
||||||
| class OriginalAEMPack(AEMPack): | ||||||
| """Model for the incoming AEMPack payload.""" | ||||||
|
|
||||||
| version: int = Field( | ||||||
| default=..., | ||||||
| description="Version assigned by the publishing service (RS), incremented on each republish.", | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should not point to the concrete service and republishing actually doesn't increase the version, only a genuinely new version of the AEMPack does.
Suggested change
or something like that. |
||||||
| ) | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,8 +22,9 @@ | |
| from pydantic_settings import BaseSettings | ||
|
|
||
| from ets.core import models | ||
| from ets.core.models import AEMPack | ||
| from ets.core.models import AEMPack, AEMPackFailedEvent | ||
| from ets.ports.outbound.dao import ( | ||
| FailedEventDao, | ||
| ModelDao, | ||
| RouteDao, | ||
| WorkflowDao, | ||
|
|
@@ -59,6 +60,14 @@ class AEMPackDaoConfig(BaseSettings): | |
| description="Topic for events informing about derived AEMPacks.", | ||
| examples=["derived-aempacks"], | ||
| ) | ||
| aem_pack_processing_event_topic: str = Field( | ||
| default=..., | ||
| description=( | ||
| "Topic for AEMPack processing-lifecycle (status) events, e.g. processing" | ||
| " failures, and later successes." | ||
| ), | ||
| examples=["aempack-processing-events"], | ||
| ) | ||
|
Comment on lines
+63
to
+70
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As mentioned on the yaml file: |
||
|
|
||
|
|
||
| async def get_aem_pack_dao( | ||
|
|
@@ -74,3 +83,17 @@ async def get_aem_pack_dao( | |
| autopublish=True, | ||
| indexes=[MongoDbIndex(fields={"pid": 1, "model_name": 1})], | ||
| ) | ||
|
|
||
|
|
||
| async def get_failed_event_dao( | ||
| *, dao_publisher_factory: DaoPublisherFactoryProtocol, topic: str | ||
| ) -> FailedEventDao: | ||
| """Construct an outbox DAO for AEMPack processing-failure events.""" | ||
| return await dao_publisher_factory.get_dao( | ||
| name="aem_pack_failed_events", | ||
| id_field="id", | ||
| dto_model=AEMPackFailedEvent, | ||
| dto_to_event=lambda event: event.model_dump(mode="json"), | ||
| event_topic=topic, | ||
| autopublish=True, | ||
| ) | ||
|
Comment on lines
+88
to
+99
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should not be coupled to only the failure event, there should be events fired when an AEMPack has been successfully put into the queue and when one has successfully been processed. The collection name and model should be more more generic, something in the direction of |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,12 +22,15 @@ | |
| from pydantic import UUID4 | ||
| from pymongo import ReturnDocument | ||
| from pymongo.asynchronous.collection import AsyncCollection | ||
| from pymongo.errors import DuplicateKeyError | ||
|
|
||
| from ets.constants import ( | ||
| FAILED_AT_FIELD, | ||
| NEEDS_REPROCESSING_FIELD, | ||
| PROCESSED_AT_FIELD, | ||
| PROCESSOR_FIELD, | ||
| TOMBSTONE_FIELD, | ||
| VERSION_FIELD, | ||
| ) | ||
| from ets.core.models import AEMPack, IncomingAEMPack | ||
| from ets.ports.outbound.incoming_aem_pack_queue import IncomingAEMPackQueuePort | ||
|
|
@@ -52,44 +55,64 @@ def __init__( | |
| self._worker_id = worker_id | ||
|
|
||
| async def queue(self, aem_pack: AEMPack) -> None: | ||
| """Upsert an AEMPack into the queue.""" | ||
| """Upsert an AEMPack into the queue if its version is newer than the stored one. | ||
|
|
||
| The incoming version is compared against any document already stored for the | ||
| same id. The document is only (over)written when the incoming version is | ||
| strictly higher; equal or lower versions are rejected and logged (the likely | ||
| cause is a republish on the RS side). Accepting a newer version also resets | ||
| ``failed_at``, so a previously failed pack is reprocessed under the new version. | ||
| """ | ||
| doc = aem_pack.model_dump(mode="json") | ||
| doc.pop("id") | ||
| doc["correlation_id"] = str(get_correlation_id()) | ||
|
|
||
| await self._collection.find_one_and_update( | ||
| filter={"_id": aem_pack.id}, | ||
| update=[ | ||
| { | ||
| "$set": { | ||
| **doc, | ||
| # Preserve the current processor so the in-flight instance can still | ||
| # complete and mark the doc as done; it will be requeued via needs_reprocessing. | ||
| PROCESSOR_FIELD: { | ||
| "$cond": { | ||
| "if": f"${PROCESSOR_FIELD}", | ||
| "then": f"${PROCESSOR_FIELD}", | ||
| "else": None, | ||
| } | ||
| }, | ||
| NEEDS_REPROCESSING_FIELD: { | ||
| "$or": [ | ||
| {"$ne": [f"${PROCESSOR_FIELD}", None]}, | ||
| {"$ne": [f"${PROCESSED_AT_FIELD}", None]}, | ||
| ] | ||
| }, | ||
| PROCESSED_AT_FIELD: { | ||
| "$cond": { | ||
| "if": f"${PROCESSED_AT_FIELD}", | ||
| "then": f"${PROCESSED_AT_FIELD}", | ||
| "else": None, | ||
| } | ||
| }, | ||
| incoming_version = doc[VERSION_FIELD] | ||
|
|
||
| try: | ||
| await self._collection.find_one_and_update( | ||
| # No match when the stored version is >= incoming: upsert then tries to | ||
| # insert a duplicate _id, which surfaces as DuplicateKeyError (rejection). | ||
| filter={"_id": aem_pack.id, VERSION_FIELD: {"$lt": incoming_version}}, | ||
| update=[ | ||
| { | ||
| "$set": { | ||
| **doc, | ||
| # Preserve the current processor so the in-flight instance can still | ||
| # complete and mark the doc as done; it will be requeued via needs_reprocessing. | ||
| PROCESSOR_FIELD: { | ||
| "$cond": { | ||
| "if": f"${PROCESSOR_FIELD}", | ||
| "then": f"${PROCESSOR_FIELD}", | ||
| "else": None, | ||
| } | ||
| }, | ||
| NEEDS_REPROCESSING_FIELD: { | ||
| "$or": [ | ||
| {"$ne": [f"${PROCESSOR_FIELD}", None]}, | ||
| {"$ne": [f"${PROCESSED_AT_FIELD}", None]}, | ||
| ] | ||
| }, | ||
| PROCESSED_AT_FIELD: { | ||
| "$cond": { | ||
| "if": f"${PROCESSED_AT_FIELD}", | ||
| "then": f"${PROCESSED_AT_FIELD}", | ||
| "else": None, | ||
| } | ||
| }, | ||
| # A newer version clears any prior failure so the pack is | ||
| # reprocessed instead of staying parked as failed. | ||
| FAILED_AT_FIELD: None, | ||
|
Comment on lines
+95
to
+104
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a bit of an inconsistency here with how marking as failed is treated: The easiest way to solve this would probably be to extend the if clause in the conditional set for the If that doesn't work out, another way would be to include the |
||
| } | ||
| } | ||
| } | ||
| ], | ||
| upsert=True, | ||
| ) | ||
| ], | ||
| upsert=True, | ||
| ) | ||
| except DuplicateKeyError: | ||
| log.info( | ||
| "AEMPack %s version %s is not newer than the stored version; rejecting.", | ||
| aem_pack.id, | ||
| incoming_version, | ||
| ) | ||
|
|
||
| async def claim_next(self) -> IncomingAEMPack | None: | ||
| """Claim the next available AEMPack for processing.""" | ||
|
|
@@ -185,8 +208,28 @@ async def free(self, aem_pack_id: UUID4) -> None: | |
| ) | ||
|
|
||
| async def mark_all_for_reprocessing(self) -> None: | ||
| """Flag all processed AEMPacks for reprocessing.""" | ||
| """Flag all processed AEMPacks for reprocessing. | ||
|
|
||
| Failed AEMPacks are included and their ``failed_at`` is cleared: a config | ||
| change may be exactly what fixes the transformation that previously failed, | ||
| so they are retried fresh under the new config. | ||
| """ | ||
| await self._collection.update_many( | ||
| {PROCESSED_AT_FIELD: {"$ne": None}, TOMBSTONE_FIELD: {"$ne": True}}, | ||
| {"$set": {NEEDS_REPROCESSING_FIELD: True}}, | ||
| {"$set": {NEEDS_REPROCESSING_FIELD: True, FAILED_AT_FIELD: None}}, | ||
| ) | ||
|
|
||
| async def mark_as_failed(self, aem_pack_id: UUID4) -> None: | ||
| """Mark an AEMPack as failed when data derivation raises an exception. | ||
| It is marked as processed for the sake of state management to ensure | ||
| that it is not picked up again for processing. | ||
| """ | ||
| await self._collection.update_one( | ||
| {"_id": aem_pack_id}, | ||
| { | ||
| "$set": { | ||
| FAILED_AT_FIELD: now_utc_ms_prec(), | ||
| } | ||
| }, | ||
| ) | ||
| await self.mark_processed(aem_pack_id) | ||
|
Comment on lines
+227
to
+235
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To guard against possible race conditions and make it one call instead of two, inline |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The event in the names is redundant, so a more compact
is better or, slightly less generic,
aem_pack_processing_status_topic