Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCR Handler #1

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ Changelog
1.0a1 (unreleased)
------------------

- Publish Redis messages on file change
[kreafox]
- Initial release.
[MrTango]
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"z3c.jbot",
"plone.api>=1.8.4",
"plone.app.dexterity",
"redis",
],
extras_require={
"test": [
Expand Down
5 changes: 5 additions & 0 deletions src/collective/ocrmypdf/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import os

REDIS_HOST = os.getenv("REDIS_HOST", "redis")
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
REDIS_CHANNEL = os.getenv("REDIS_CHANNEL", "ocr_queue")
89 changes: 89 additions & 0 deletions src/collective/ocrmypdf/datamanager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import logging

import transaction

logger = logging.getLogger("collective.ocrmypdf")


class CallbacksDataManager(object):
"""Transaction aware data manager for calling callbacks at commit time"""

def __init__(self):
self.sp = 0
self.callbacks = []
self.txn = None

def tpc_begin(self, txn):
self.txn = txn

def tpc_finish(self, txn):
self.callbacks = []

def tpc_vote(self, txn):
pass

def tpc_abort(self, txn):
self._checkTransaction(txn)

if self.txn is not None:
self.txn = None

self.callbacks = []

def abort(self, txn):
self.callbacks = []

def commit(self, txn):
self._checkTransaction(txn)

for callback in self.callbacks:
try:
callback()
except Exception:
logger.exception("Error executing callback.")

self.txn = None
self.callbacks = []

def savepoint(self):
self.sp += 1

return Savepoint(self)

def sortKey(self):
return self.__class__.__name__

def add(self, callback):
logger.info("Add callback to queue %s", callback)
self.callbacks.append(callback)

def _checkTransaction(self, txn):
if txn is not self.txn and self.txn is not None:
raise TypeError("Transaction missmatch", txn, self.txn)


class Savepoint(object):
"""Savepoint implementation to allow rollback of queued callbacks"""

def __init__(self, dm):
self.dm = dm
self.sp = dm.sp
self.callbacks = dm.callbacks[:]
self.txn = dm.txn

def rollback(self):
if self.txn is not self.dm.txn:
raise TypeError("Attempt to rollback stale rollback")

if self.dm.sp < self.sp:
raise TypeError(
"Attempt to roll back to invalid save point", self.sp, self.dm.sp
)
self.dm.sp = self.sp
self.dm.callbacks = self.callbacks[:]


def queue_callback(callback):
cdm = CallbacksDataManager()
transaction.get().join(cdm)
cdm.add(callback)
10 changes: 7 additions & 3 deletions src/collective/ocrmypdf/subscribers/configure.zcml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
xmlns="http://namespaces.zope.org/zope"
i18n_domain="collective.ocrmypdf">

<!-- -*- extra stuff goes here -*- -->
<subscriber for="plone.app.contenttypes.interfaces.IFile
zope.lifecycleevent.interfaces.IObjectAddedEvent"
handler=".file_modified_ocr.handler"
/>


<subscriber for="plone.dexterity.interfaces.IDexterityContent
<subscriber for="plone.app.contenttypes.interfaces.IFile
zope.lifecycleevent.interfaces.IObjectModifiedEvent"
handler=".file_modified_ocr.handler"
/>
/>


</configure>
28 changes: 26 additions & 2 deletions src/collective/ocrmypdf/subscribers/file_modified_ocr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
# -*- coding: utf-8 -*-
import json
import redis
from zope.globalrequest import getRequest
from collective.ocrmypdf.datamanager import queue_callback
from collective.ocrmypdf.config import REDIS_HOST, REDIS_PORT, REDIS_CHANNEL

redis_client = redis.StrictRedis(host=REDIS_HOST, port=REDIS_PORT, db=0)


def handler(obj, event):
"""Event handler"""
print("{0} on object {1}".format(event.__class__, obj.absolute_url()))
"""Publish UID of PDF files to the Redis channel."""

if obj.content_type() == "application/pdf":
request = getRequest()
# Check the 'ocr' flag to prevent re-triggering the handler
# when the processed file is uploaded by the consumer.
ocr_flag = request.form.get("ocr")

if ocr_flag == "1":
return

uid = obj.UID()
message = {"uid": uid}
data = json.dumps(message)

def callback():
redis_client.publish(REDIS_CHANNEL, data)

queue_callback(callback)