Skip to content

Commit 58632be

Browse files
committed
Merge PR #863 into 17.0
Signed-off-by sbidoul
2 parents 3dbb0c5 + c7324db commit 58632be

File tree

2 files changed

+51
-17
lines changed

2 files changed

+51
-17
lines changed

queue_job/jobrunner/runner.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -365,23 +365,26 @@ def _query_requeue_dead_jobs(self):
365365
ELSE exc_info
366366
END)
367367
WHERE
368-
id in (
369-
SELECT
370-
queue_job_id
371-
FROM
372-
queue_job_lock
373-
WHERE
374-
queue_job_id in (
375-
SELECT
376-
id
377-
FROM
378-
queue_job
379-
WHERE
380-
state IN ('enqueued','started')
381-
AND date_enqueued <
382-
(now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
383-
)
384-
FOR UPDATE SKIP LOCKED
368+
state IN ('enqueued','started')
369+
AND date_enqueued < (now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
370+
AND (
371+
id in (
372+
SELECT
373+
queue_job_id
374+
FROM
375+
queue_job_lock
376+
WHERE
377+
queue_job_lock.queue_job_id = queue_job.id
378+
FOR UPDATE SKIP LOCKED
379+
)
380+
OR NOT EXISTS (
381+
SELECT
382+
1
383+
FROM
384+
queue_job_lock
385+
WHERE
386+
queue_job_lock.queue_job_id = queue_job.id
387+
)
385388
)
386389
RETURNING uuid
387390
"""
@@ -404,6 +407,12 @@ def requeue_dead_jobs(self):
404407
However, when the Odoo server crashes or is otherwise force-stopped,
405408
running jobs are interrupted while the runner has no chance to know
406409
they have been aborted.
410+
411+
This also handles orphaned jobs (enqueued but never started, no lock).
412+
This edge case occurs when the runner marks a job as 'enqueued'
413+
but the HTTP request to start the job never reaches the Odoo server
414+
(e.g., due to server shutdown/crash between setting enqueued and
415+
the controller receiving the request).
407416
"""
408417

409418
with closing(self.conn.cursor()) as cr:

queue_job/tests/test_requeue_dead_job.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,28 @@ def test_requeue_dead_jobs(self):
131131
# because we committed the cursor, the savepoint of the test method is
132132
# gone, and this would break TransactionCase cleanups
133133
self.cr.execute("SAVEPOINT test_%d" % self._savepoint_id)
134+
135+
def test_requeue_orphaned_jobs(self):
136+
uuid = "test_enqueued_job"
137+
queue_job = self.create_dummy_job(uuid)
138+
job_obj = Job.load(self.env, queue_job.uuid)
139+
140+
# Only enqueued job, don't set it to started to simulate the scenario
141+
# that system shutdown before job is starting
142+
job_obj.set_enqueued()
143+
job_obj.date_enqueued = datetime.now() - timedelta(minutes=1)
144+
job_obj.store()
145+
146+
# job is now picked up by the requeue query (which includes orphaned jobs)
147+
query = Database(self.env.cr.dbname)._query_requeue_dead_jobs()
148+
self.env.cr.execute(query)
149+
uuids_requeued = self.env.cr.fetchall()
150+
self.assertTrue(queue_job.uuid in j[0] for j in uuids_requeued)
151+
152+
# clean up
153+
queue_job.unlink()
154+
self.env.cr.commit() # pylint: disable=E8102
155+
156+
# because we committed the cursor, the savepoint of the test method is
157+
# gone, and this would break TransactionCase cleanups
158+
self.cr.execute("SAVEPOINT test_%d" % self._savepoint_id)

0 commit comments

Comments
 (0)