File tree Expand file tree Collapse file tree 2 files changed +51
-17
lines changed
Expand file tree Collapse file tree 2 files changed +51
-17
lines changed Original file line number Diff line number Diff line change @@ -365,23 +365,26 @@ def _query_requeue_dead_jobs(self):
365365 ELSE exc_info
366366 END)
367367 WHERE
368- id in (
369- SELECT
370- queue_job_id
371- FROM
372- queue_job_lock
373- WHERE
374- queue_job_id in (
375- SELECT
376- id
377- FROM
378- queue_job
379- WHERE
380- state IN ('enqueued','started')
381- AND date_enqueued <
382- (now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
383- )
384- FOR UPDATE SKIP LOCKED
368+ state IN ('enqueued','started')
369+ AND date_enqueued < (now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
370+ AND (
371+ id in (
372+ SELECT
373+ queue_job_id
374+ FROM
375+ queue_job_lock
376+ WHERE
377+ queue_job_lock.queue_job_id = queue_job.id
378+ FOR UPDATE SKIP LOCKED
379+ )
380+ OR NOT EXISTS (
381+ SELECT
382+ 1
383+ FROM
384+ queue_job_lock
385+ WHERE
386+ queue_job_lock.queue_job_id = queue_job.id
387+ )
385388 )
386389 RETURNING uuid
387390 """
@@ -404,6 +407,12 @@ def requeue_dead_jobs(self):
404407 However, when the Odoo server crashes or is otherwise force-stopped,
405408 running jobs are interrupted while the runner has no chance to know
406409 they have been aborted.
410+
411+ This also handles orphaned jobs (enqueued but never started, no lock).
412+ This edge case occurs when the runner marks a job as 'enqueued'
413+ but the HTTP request to start the job never reaches the Odoo server
414+ (e.g., due to server shutdown/crash between setting enqueued and
415+ the controller receiving the request).
407416 """
408417
409418 with closing (self .conn .cursor ()) as cr :
Original file line number Diff line number Diff line change @@ -131,3 +131,28 @@ def test_requeue_dead_jobs(self):
131131 # because we committed the cursor, the savepoint of the test method is
132132 # gone, and this would break TransactionCase cleanups
133133 self .cr .execute ("SAVEPOINT test_%d" % self ._savepoint_id )
134+
135+ def test_requeue_orphaned_jobs (self ):
136+ uuid = "test_enqueued_job"
137+ queue_job = self .create_dummy_job (uuid )
138+ job_obj = Job .load (self .env , queue_job .uuid )
139+
140+ # Only enqueued job, don't set it to started to simulate the scenario
141+ # that system shutdown before job is starting
142+ job_obj .set_enqueued ()
143+ job_obj .date_enqueued = datetime .now () - timedelta (minutes = 1 )
144+ job_obj .store ()
145+
146+ # job is now picked up by the requeue query (which includes orphaned jobs)
147+ query = Database (self .env .cr .dbname )._query_requeue_dead_jobs ()
148+ self .env .cr .execute (query )
149+ uuids_requeued = self .env .cr .fetchall ()
150+ self .assertTrue (queue_job .uuid in j [0 ] for j in uuids_requeued )
151+
152+ # clean up
153+ queue_job .unlink ()
154+ self .env .cr .commit () # pylint: disable=E8102
155+
156+ # because we committed the cursor, the savepoint of the test method is
157+ # gone, and this would break TransactionCase cleanups
158+ self .cr .execute ("SAVEPOINT test_%d" % self ._savepoint_id )
You can’t perform that action at this time.
0 commit comments