Commit fb83a838 authored by Roberto Rosario's avatar Roberto Rosario

Add support for indexing on OCR content changes.

Signed-off-by: Roberto Rosario's avatarRoberto Rosario <roberto.rosario.gonzalez@gmail.com>
parent 34c2fd61
Pipeline #31814883 skipped with stage
......@@ -11,6 +11,7 @@
in the trash.
* Load the DropZone CSS from package and remove the
hard code CSS from appearance/base.css.
* Add support for indexing on OCR content changes.
3.1.3 (2018-09-27)
==================
......
......@@ -25,7 +25,8 @@ from navigation import SourceColumn
from .events import event_ocr_document_version_submit
from .handlers import (
handler_initialize_new_ocr_settings, handler_ocr_document_version,
handler_index_document, handler_initialize_new_ocr_settings,
handler_ocr_document_version,
)
from .links import (
link_document_page_ocr_content, link_document_ocr_content,
......@@ -39,6 +40,7 @@ from .permissions import (
permission_ocr_content_view
)
from .queues import * # NOQA
from .signals import post_document_version_ocr
from .utils import get_document_ocr_content
logger = logging.getLogger(__name__)
......@@ -191,6 +193,11 @@ class OCRApp(MayanAppConfig):
)
)
post_document_version_ocr.connect(
dispatch_uid='ocr_handler_index_document',
receiver=handler_index_document,
sender=DocumentVersion
)
post_save.connect(
dispatch_uid='ocr_handler_initialize_new_ocr_settings',
receiver=handler_initialize_new_ocr_settings,
......
......@@ -4,11 +4,19 @@ import logging
from django.apps import apps
from document_indexing.tasks import task_index_document
from .settings import setting_auto_ocr
logger = logging.getLogger(__name__)
def handler_index_document(sender, **kwargs):
task_index_document.apply_async(
kwargs=dict(document_id=kwargs['instance'].document.pk)
)
def handler_initialize_new_ocr_settings(sender, instance, **kwargs):
DocumentTypeSettings = apps.get_model(
app_label='ocr', model_name='DocumentTypeSettings'
......
......@@ -9,7 +9,7 @@ def move_content_from_documents_to_ocr_app(apps, schema_editor):
DocumentPageContent = apps.get_model('ocr', 'DocumentPageContent')
for document_page in DocumentPage.objects.using(schema_editor.connection.alias).all():
document_page_content = DocumentPageContent.objects.using(schema_editor.connection.alias).create(
DocumentPageContent.objects.using(schema_editor.connection.alias).create(
document_page=document_page,
content=document_page.content_old or ''
)
......
from __future__ import unicode_literals
TEST_OCR_INDEX_NODE_TEMPLATE = '{% if "mayan" in document.latest_version.ocr_content|join:" "|lower %}mayan{% endif %}'
from __future__ import unicode_literals
from django.test import override_settings
from common.tests import BaseTestCase
from documents.tests import DocumentTestMixin
from document_indexing.models import Index, IndexInstanceNode
from document_indexing.tests.literals import TEST_INDEX_LABEL
from .literals import TEST_OCR_INDEX_NODE_TEMPLATE
@override_settings(OCR_AUTO_OCR=False)
class OCRIndexingTestCase(DocumentTestMixin, BaseTestCase):
auto_upload_document = False
def test_ocr_indexing(self):
index = Index.objects.create(label=TEST_INDEX_LABEL)
index.document_types.add(self.document_type)
root = index.template_root
index.node_templates.create(
parent=root, expression=TEST_OCR_INDEX_NODE_TEMPLATE,
link_documents=True
)
self.document = self.upload_document()
self.document.submit_for_ocr()
self.assertTrue(
self.document in IndexInstanceNode.objects.get(
value='mayan'
).documents.all()
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment