trustgraph-ocr 0.23.21__tar.gz → 0.23.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/PKG-INFO +2 -2
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph/decoding/ocr/pdf_decoder.py +25 -26
- trustgraph-ocr-0.23.23/trustgraph/ocr_version.py +1 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph_ocr.egg-info/PKG-INFO +2 -2
- trustgraph-ocr-0.23.21/trustgraph/ocr_version.py +0 -1
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/README.md +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/scripts/pdf-ocr +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/setup.cfg +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/setup.py +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph/decoding/ocr/__init__.py +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph/decoding/ocr/__main__.py +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph_ocr.egg-info/SOURCES.txt +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph_ocr.egg-info/dependency_links.txt +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph_ocr.egg-info/requires.txt +0 -0
- {trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph_ocr.egg-info/top_level.txt +0 -0
@@ -1,9 +1,9 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: trustgraph-ocr
|
3
|
-
Version: 0.23.
|
3
|
+
Version: 0.23.23
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
5
5
|
Home-page: https://github.com/trustgraph-ai/trustgraph
|
6
|
-
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.23.
|
6
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.23.23.tar.gz
|
7
7
|
Author: trustgraph.ai
|
8
8
|
Author-email: security@trustgraph.ai
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
@@ -10,39 +10,42 @@ import pytesseract
|
|
10
10
|
from pdf2image import convert_from_bytes
|
11
11
|
|
12
12
|
from ... schema import Document, TextDocument, Metadata
|
13
|
-
from ...
|
14
|
-
from ... log_level import LogLevel
|
15
|
-
from ... base import ConsumerProducer
|
13
|
+
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
16
14
|
|
17
|
-
|
15
|
+
default_ident = "pdf-decoder"
|
18
16
|
|
19
|
-
|
20
|
-
default_output_queue = text_ingest_queue
|
21
|
-
default_subscriber = module
|
22
|
-
|
23
|
-
class Processor(ConsumerProducer):
|
17
|
+
class Processor(FlowProcessor):
|
24
18
|
|
25
19
|
def __init__(self, **params):
|
26
20
|
|
27
|
-
|
28
|
-
output_queue = params.get("output_queue", default_output_queue)
|
29
|
-
subscriber = params.get("subscriber", default_subscriber)
|
21
|
+
id = params.get("id", default_ident)
|
30
22
|
|
31
23
|
super(Processor, self).__init__(
|
32
24
|
**params | {
|
33
|
-
"
|
34
|
-
"output_queue": output_queue,
|
35
|
-
"subscriber": subscriber,
|
36
|
-
"input_schema": Document,
|
37
|
-
"output_schema": TextDocument,
|
25
|
+
"id": id,
|
38
26
|
}
|
39
27
|
)
|
40
28
|
|
29
|
+
self.register_specification(
|
30
|
+
ConsumerSpec(
|
31
|
+
name = "input",
|
32
|
+
schema = Document,
|
33
|
+
handler = self.on_message,
|
34
|
+
)
|
35
|
+
)
|
36
|
+
|
37
|
+
self.register_specification(
|
38
|
+
ProducerSpec(
|
39
|
+
name = "output",
|
40
|
+
schema = TextDocument,
|
41
|
+
)
|
42
|
+
)
|
43
|
+
|
41
44
|
print("PDF OCR inited")
|
42
45
|
|
43
|
-
async def
|
46
|
+
async def on_message(self, msg, consumer, flow):
|
44
47
|
|
45
|
-
print("PDF message received")
|
48
|
+
print("PDF message received", flush=True)
|
46
49
|
|
47
50
|
v = msg.value()
|
48
51
|
|
@@ -65,19 +68,15 @@ class Processor(ConsumerProducer):
|
|
65
68
|
text=text.encode("utf-8"),
|
66
69
|
)
|
67
70
|
|
68
|
-
await
|
71
|
+
await flow("output").send(r)
|
69
72
|
|
70
73
|
print("Done.", flush=True)
|
71
74
|
|
72
75
|
@staticmethod
|
73
76
|
def add_args(parser):
|
74
|
-
|
75
|
-
ConsumerProducer.add_args(
|
76
|
-
parser, default_input_queue, default_subscriber,
|
77
|
-
default_output_queue,
|
78
|
-
)
|
77
|
+
FlowProcessor.add_args(parser)
|
79
78
|
|
80
79
|
def run():
|
81
80
|
|
82
|
-
Processor.launch(
|
81
|
+
Processor.launch(default_ident, __doc__)
|
83
82
|
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.23.23"
|
@@ -1,9 +1,9 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: trustgraph-ocr
|
3
|
-
Version: 0.23.
|
3
|
+
Version: 0.23.23
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
5
5
|
Home-page: https://github.com/trustgraph-ai/trustgraph
|
6
|
-
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.23.
|
6
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.23.23.tar.gz
|
7
7
|
Author: trustgraph.ai
|
8
8
|
Author-email: security@trustgraph.ai
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "0.23.21"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{trustgraph-ocr-0.23.21 → trustgraph-ocr-0.23.23}/trustgraph_ocr.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|