trustgraph-ocr 0.23.22__tar.gz → 0.23.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: trustgraph-ocr
3
- Version: 0.23.22
3
+ Version: 0.23.23
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Home-page: https://github.com/trustgraph-ai/trustgraph
6
- Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.23.22.tar.gz
6
+ Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.23.23.tar.gz
7
7
  Author: trustgraph.ai
8
8
  Author-email: security@trustgraph.ai
9
9
  Classifier: Programming Language :: Python :: 3
@@ -10,39 +10,42 @@ import pytesseract
10
10
  from pdf2image import convert_from_bytes
11
11
 
12
12
  from ... schema import Document, TextDocument, Metadata
13
- from ... schema import document_ingest_queue, text_ingest_queue
14
- from ... log_level import LogLevel
15
- from ... base import ConsumerProducer
13
+ from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
16
14
 
17
- module = "ocr"
15
+ default_ident = "pdf-decoder"
18
16
 
19
- default_input_queue = document_ingest_queue
20
- default_output_queue = text_ingest_queue
21
- default_subscriber = module
22
-
23
- class Processor(ConsumerProducer):
17
+ class Processor(FlowProcessor):
24
18
 
25
19
  def __init__(self, **params):
26
20
 
27
- input_queue = params.get("input_queue", default_input_queue)
28
- output_queue = params.get("output_queue", default_output_queue)
29
- subscriber = params.get("subscriber", default_subscriber)
21
+ id = params.get("id", default_ident)
30
22
 
31
23
  super(Processor, self).__init__(
32
24
  **params | {
33
- "input_queue": input_queue,
34
- "output_queue": output_queue,
35
- "subscriber": subscriber,
36
- "input_schema": Document,
37
- "output_schema": TextDocument,
25
+ "id": id,
38
26
  }
39
27
  )
40
28
 
29
+ self.register_specification(
30
+ ConsumerSpec(
31
+ name = "input",
32
+ schema = Document,
33
+ handler = self.on_message,
34
+ )
35
+ )
36
+
37
+ self.register_specification(
38
+ ProducerSpec(
39
+ name = "output",
40
+ schema = TextDocument,
41
+ )
42
+ )
43
+
41
44
  print("PDF OCR inited")
42
45
 
43
- async def handle(self, msg):
46
+ async def on_message(self, msg, consumer, flow):
44
47
 
45
- print("PDF message received")
48
+ print("PDF message received", flush=True)
46
49
 
47
50
  v = msg.value()
48
51
 
@@ -65,19 +68,15 @@ class Processor(ConsumerProducer):
65
68
  text=text.encode("utf-8"),
66
69
  )
67
70
 
68
- await self.send(r)
71
+ await flow("output").send(r)
69
72
 
70
73
  print("Done.", flush=True)
71
74
 
72
75
  @staticmethod
73
76
  def add_args(parser):
74
-
75
- ConsumerProducer.add_args(
76
- parser, default_input_queue, default_subscriber,
77
- default_output_queue,
78
- )
77
+ FlowProcessor.add_args(parser)
79
78
 
80
79
  def run():
81
80
 
82
- Processor.launch(module, __doc__)
81
+ Processor.launch(default_ident, __doc__)
83
82
 
@@ -0,0 +1 @@
1
+ __version__ = "0.23.23"
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: trustgraph-ocr
3
- Version: 0.23.22
3
+ Version: 0.23.23
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Home-page: https://github.com/trustgraph-ai/trustgraph
6
- Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.23.22.tar.gz
6
+ Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.23.23.tar.gz
7
7
  Author: trustgraph.ai
8
8
  Author-email: security@trustgraph.ai
9
9
  Classifier: Programming Language :: Python :: 3
@@ -1 +0,0 @@
1
- __version__ = "0.23.22"