Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 24 additions & 17 deletions queue_job/jobrunner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,23 +357,24 @@ def _query_requeue_dead_jobs(self):
ELSE exc_info
END)
WHERE
id in (
SELECT
queue_job_id
FROM
queue_job_lock
WHERE
queue_job_id in (
SELECT
id
FROM
queue_job
WHERE
state IN ('enqueued','started')
AND date_enqueued <
(now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
)
FOR UPDATE SKIP LOCKED
state IN ('enqueued','started')
AND date_enqueued < (now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
AND (
id in (
SELECT
queue_job_id
FROM
queue_job_lock
FOR UPDATE SKIP LOCKED
)
OR NOT EXISTS (
SELECT
1
FROM
queue_job_lock
WHERE
queue_job_lock.queue_job_id = queue_job.id
)
)
RETURNING uuid
"""
Expand All @@ -396,6 +397,12 @@ def requeue_dead_jobs(self):
However, when the Odoo server crashes or is otherwise force-stopped,
running jobs are interrupted while the runner has no chance to know
they have been aborted.

This also handles orphaned jobs (enqueued but never started, no lock).
This edge case occurs when the runner marks a job as 'enqueued'
but the HTTP request to start the job never reaches the Odoo server
(e.g., due to server shutdown/crash between setting enqueued and
the controller receiving the request).
"""

with closing(self.conn.cursor()) as cr:
Expand Down
16 changes: 16 additions & 0 deletions test_queue_job/tests/test_requeue_dead_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,19 @@ def test_requeue_dead_jobs(self):

uuids_requeued = self.env.cr.fetchall()
self.assertTrue(queue_job.uuid in j[0] for j in uuids_requeued)

def test_requeue_orphaned_jobs(self):
queue_job = self._get_demo_job("test_enqueued_job")
job_obj = Job.load(self.env, queue_job.uuid)

# Only enqueued job, don't set it to started to simulate the scenario
# that system shutdown before job is starting
job_obj.set_enqueued()
job_obj.date_enqueued = datetime.now() - timedelta(minutes=1)
job_obj.store()

# job is now picked up by the requeue query (which includes orphaned jobs)
query = Database(self.env.cr.dbname)._query_requeue_dead_jobs()
self.env.cr.execute(query)
uuids_requeued = self.env.cr.fetchall()
self.assertTrue(queue_job.uuid in j[0] for j in uuids_requeued)