yomitoku 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yomitoku/cli/main.py +4 -3
- yomitoku/cli/mcp.py +165 -0
- yomitoku/export/export_csv.py +2 -2
- yomitoku/export/export_html.py +10 -5
- yomitoku/export/export_json.py +2 -2
- yomitoku/export/export_markdown.py +2 -2
- yomitoku/utils/misc.py +12 -0
- {yomitoku-0.8.1.dist-info → yomitoku-0.9.0.dist-info}/METADATA +3 -1
- {yomitoku-0.8.1.dist-info → yomitoku-0.9.0.dist-info}/RECORD +11 -10
- {yomitoku-0.8.1.dist-info → yomitoku-0.9.0.dist-info}/entry_points.txt +1 -0
- {yomitoku-0.8.1.dist-info → yomitoku-0.9.0.dist-info}/WHEEL +0 -0
yomitoku/cli/main.py
CHANGED
@@ -3,7 +3,6 @@ import os
|
|
3
3
|
import time
|
4
4
|
from pathlib import Path
|
5
5
|
|
6
|
-
import cv2
|
7
6
|
import torch
|
8
7
|
|
9
8
|
from ..constants import SUPPORT_OUTPUT_FORMAT
|
@@ -14,6 +13,8 @@ from ..utils.logger import set_logger
|
|
14
13
|
from ..export import save_csv, save_html, save_json, save_markdown
|
15
14
|
from ..export import convert_json, convert_csv, convert_html, convert_markdown
|
16
15
|
|
16
|
+
from ..utils.misc import save_image
|
17
|
+
|
17
18
|
logger = set_logger(__name__, "INFO")
|
18
19
|
|
19
20
|
|
@@ -94,7 +95,7 @@ def process_single_file(args, analyzer, path, format):
|
|
94
95
|
args.outdir, f"{dirname}_{filename}_p{page+1}_ocr.jpg"
|
95
96
|
)
|
96
97
|
|
97
|
-
|
98
|
+
save_image(ocr, out_path)
|
98
99
|
logger.info(f"Output file: {out_path}")
|
99
100
|
|
100
101
|
if layout is not None:
|
@@ -102,7 +103,7 @@ def process_single_file(args, analyzer, path, format):
|
|
102
103
|
args.outdir, f"{dirname}_{filename}_p{page+1}_layout.jpg"
|
103
104
|
)
|
104
105
|
|
105
|
-
|
106
|
+
save_image(layout, out_path)
|
106
107
|
logger.info(f"Output file: {out_path}")
|
107
108
|
|
108
109
|
out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
|
yomitoku/cli/mcp.py
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
import json
|
2
|
+
import io
|
3
|
+
import csv
|
4
|
+
import os
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
from mcp.server.fastmcp import Context, FastMCP
|
8
|
+
|
9
|
+
from yomitoku import DocumentAnalyzer
|
10
|
+
from yomitoku.data.functions import load_image, load_pdf
|
11
|
+
from yomitoku.export import convert_json, convert_markdown, convert_csv, convert_html
|
12
|
+
|
13
|
+
try:
|
14
|
+
RESOURCE_DIR = os.environ["RESOURCE_DIR"]
|
15
|
+
except KeyError:
|
16
|
+
raise ValueError("Environment variable 'RESOURCE_DIR' is not set.")
|
17
|
+
|
18
|
+
|
19
|
+
analyzer = None
|
20
|
+
|
21
|
+
|
22
|
+
async def load_analyzer(ctx: Context) -> DocumentAnalyzer:
|
23
|
+
"""
|
24
|
+
Load the DocumentAnalyzer instance if not already loaded.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
ctx (Context): The context in which the analyzer is being loaded.
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
DocumentAnalyzer: The loaded document analyzer instance.
|
31
|
+
"""
|
32
|
+
global analyzer
|
33
|
+
if analyzer is None:
|
34
|
+
await ctx.info("Load document analyzer")
|
35
|
+
analyzer = DocumentAnalyzer(visualize=False, device="cuda")
|
36
|
+
return analyzer
|
37
|
+
|
38
|
+
|
39
|
+
mcp = FastMCP("yomitoku")
|
40
|
+
|
41
|
+
|
42
|
+
@mcp.tool()
|
43
|
+
async def process_ocr(ctx: Context, filename: str, output_format: str) -> str:
|
44
|
+
"""
|
45
|
+
Perform OCR on the specified file in the resource direcory and convert
|
46
|
+
the results to the desired format.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
ctx (Context): The context in which the OCR processing is executed.
|
50
|
+
filename (str): The name of the file to process in the resource directory.
|
51
|
+
output_format (str): The desired format for the output. The available options are:
|
52
|
+
- json: Outputs the text as structured data along with positional information.
|
53
|
+
- markdown: Outputs texts and tables in Markdown format.
|
54
|
+
- html: Outputs texts and tables in HTML format.
|
55
|
+
- csv: Outputs texts and tables in CSV format.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
str: The OCR results converted to the specified format.
|
59
|
+
"""
|
60
|
+
analyzer = await load_analyzer(ctx)
|
61
|
+
|
62
|
+
await ctx.info("Start ocr processing")
|
63
|
+
|
64
|
+
file_path = os.path.join(RESOURCE_DIR, filename)
|
65
|
+
if Path(file_path).suffix[1:].lower() in ["pdf"]:
|
66
|
+
imgs = load_pdf(file_path)
|
67
|
+
else:
|
68
|
+
imgs = load_image(file_path)
|
69
|
+
|
70
|
+
results = []
|
71
|
+
for page, img in enumerate(imgs):
|
72
|
+
analyzer.img = img
|
73
|
+
result, _, _ = await analyzer.run(img)
|
74
|
+
results.append(result)
|
75
|
+
await ctx.report_progress(page + 1, len(imgs))
|
76
|
+
|
77
|
+
if output_format == "json":
|
78
|
+
return json.dumps(
|
79
|
+
[
|
80
|
+
convert_json(
|
81
|
+
result,
|
82
|
+
out_path=None,
|
83
|
+
ignore_line_break=True,
|
84
|
+
img=img,
|
85
|
+
export_figure=False,
|
86
|
+
figure_dir=None,
|
87
|
+
).model_dump()
|
88
|
+
for img, result in zip(imgs, results)
|
89
|
+
],
|
90
|
+
ensure_ascii=False,
|
91
|
+
sort_keys=True,
|
92
|
+
separators=(",", ": "),
|
93
|
+
)
|
94
|
+
elif output_format == "markdown":
|
95
|
+
return "\n".join(
|
96
|
+
[
|
97
|
+
convert_markdown(
|
98
|
+
result,
|
99
|
+
out_path=None,
|
100
|
+
ignore_line_break=True,
|
101
|
+
img=img,
|
102
|
+
export_figure=False,
|
103
|
+
)[0]
|
104
|
+
for img, result in zip(imgs, results)
|
105
|
+
]
|
106
|
+
)
|
107
|
+
elif output_format == "html":
|
108
|
+
return "\n".join(
|
109
|
+
[
|
110
|
+
convert_html(
|
111
|
+
result,
|
112
|
+
out_path=None,
|
113
|
+
ignore_line_break=True,
|
114
|
+
img=img,
|
115
|
+
export_figure=False,
|
116
|
+
export_figure_letter="",
|
117
|
+
)[0]
|
118
|
+
for img, result in zip(imgs, results)
|
119
|
+
]
|
120
|
+
)
|
121
|
+
elif output_format == "csv":
|
122
|
+
output = io.StringIO()
|
123
|
+
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
|
124
|
+
for img, result in zip(imgs, results):
|
125
|
+
elements = convert_csv(
|
126
|
+
result,
|
127
|
+
out_path=None,
|
128
|
+
ignore_line_break=True,
|
129
|
+
img=img,
|
130
|
+
export_figure=False,
|
131
|
+
)
|
132
|
+
for element in elements:
|
133
|
+
if element["type"] == "table":
|
134
|
+
writer.writerows(element["element"])
|
135
|
+
else:
|
136
|
+
writer.writerow([element["element"]])
|
137
|
+
writer.writerow([""])
|
138
|
+
return output.getvalue()
|
139
|
+
else:
|
140
|
+
raise ValueError(
|
141
|
+
f"Unsupported output format: {output_format}."
|
142
|
+
" Supported formats are json, markdown, html or csv."
|
143
|
+
)
|
144
|
+
|
145
|
+
|
146
|
+
@mcp.resource("file://list")
|
147
|
+
async def get_file_list() -> list[str]:
|
148
|
+
"""
|
149
|
+
Retrieve a list of files in the resource directory.
|
150
|
+
|
151
|
+
Returns:
|
152
|
+
list[str]: A list of filenames in the resource directory.
|
153
|
+
"""
|
154
|
+
return os.listdir(RESOURCE_DIR)
|
155
|
+
|
156
|
+
|
157
|
+
def run_mcp_server():
|
158
|
+
"""
|
159
|
+
Run the MCP server.
|
160
|
+
"""
|
161
|
+
mcp.run(transport="stdio")
|
162
|
+
|
163
|
+
|
164
|
+
if __name__ == "__main__":
|
165
|
+
run_mcp_server()
|
yomitoku/export/export_csv.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import csv
|
2
2
|
import os
|
3
3
|
|
4
|
-
import
|
4
|
+
from ..utils.misc import save_image
|
5
5
|
|
6
6
|
|
7
7
|
def table_to_csv(table, ignore_line_break):
|
@@ -54,7 +54,7 @@ def save_figure(
|
|
54
54
|
filename = os.path.splitext(os.path.basename(out_path))[0]
|
55
55
|
figure_name = f"{filename}_figure_{i}.png"
|
56
56
|
figure_path = os.path.join(save_dir, figure_name)
|
57
|
-
|
57
|
+
save_image(figure_img, figure_path)
|
58
58
|
|
59
59
|
|
60
60
|
def convert_csv(
|
yomitoku/export/export_html.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
import os
|
2
2
|
import re
|
3
3
|
from html import escape
|
4
|
-
|
5
|
-
import cv2
|
6
4
|
from lxml import etree, html
|
7
5
|
|
6
|
+
from ..utils.misc import save_image
|
7
|
+
|
8
8
|
|
9
9
|
def convert_text_to_html(text):
|
10
10
|
"""
|
@@ -122,7 +122,7 @@ def figure_to_html(
|
|
122
122
|
filename = os.path.splitext(os.path.basename(out_path))[0]
|
123
123
|
figure_name = f"{filename}_figure_{i}.png"
|
124
124
|
figure_path = os.path.join(save_dir, figure_name)
|
125
|
-
|
125
|
+
save_image(figure_img, figure_path)
|
126
126
|
|
127
127
|
elements.append(
|
128
128
|
{
|
@@ -180,8 +180,13 @@ def convert_html(
|
|
180
180
|
elements = sorted(elements, key=lambda x: x["order"])
|
181
181
|
|
182
182
|
html_string = "".join([element["html"] for element in elements])
|
183
|
-
|
184
|
-
|
183
|
+
if not len(html_string) == 0:
|
184
|
+
parsed_html = html.fromstring(html_string)
|
185
|
+
formatted_html = etree.tostring(
|
186
|
+
parsed_html, pretty_print=True, encoding="unicode"
|
187
|
+
)
|
188
|
+
else:
|
189
|
+
formatted_html = ""
|
185
190
|
|
186
191
|
return formatted_html, elements
|
187
192
|
|
yomitoku/export/export_json.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
3
|
|
4
|
-
import
|
4
|
+
from ..utils.misc import save_image
|
5
5
|
|
6
6
|
|
7
7
|
def paragraph_to_json(paragraph, ignore_line_break):
|
@@ -33,7 +33,7 @@ def save_figure(
|
|
33
33
|
filename = os.path.splitext(os.path.basename(out_path))[0]
|
34
34
|
figure_name = f"{filename}_figure_{i}.png"
|
35
35
|
figure_path = os.path.join(save_dir, figure_name)
|
36
|
-
|
36
|
+
save_image(figure_img, figure_path)
|
37
37
|
|
38
38
|
|
39
39
|
def convert_json(inputs, out_path, ignore_line_break, img, export_figure, figure_dir):
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import os
|
2
2
|
import re
|
3
3
|
|
4
|
-
import
|
4
|
+
from ..utils.misc import save_image
|
5
5
|
|
6
6
|
|
7
7
|
def escape_markdown_special_chars(text):
|
@@ -89,7 +89,7 @@ def figure_to_md(
|
|
89
89
|
filename = os.path.splitext(os.path.basename(out_path))[0]
|
90
90
|
figure_name = f"{filename}_figure_{i}.png"
|
91
91
|
figure_path = os.path.join(save_dir, figure_name)
|
92
|
-
|
92
|
+
save_image(figure_img, figure_path)
|
93
93
|
|
94
94
|
elements.append(
|
95
95
|
{
|
yomitoku/utils/misc.py
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
import cv2
|
2
|
+
|
3
|
+
|
1
4
|
def load_charset(charset_path):
|
2
5
|
with open(charset_path, "r", encoding="utf-8") as f:
|
3
6
|
charset = f.read()
|
@@ -9,6 +12,15 @@ def filter_by_flag(elements, flags):
|
|
9
12
|
return [element for element, flag in zip(elements, flags) if flag]
|
10
13
|
|
11
14
|
|
15
|
+
def save_image(img, path):
|
16
|
+
success, buffer = cv2.imencode(".jpg", img)
|
17
|
+
if not success:
|
18
|
+
raise ValueError("Failed to encode image")
|
19
|
+
|
20
|
+
with open(path, "wb") as f:
|
21
|
+
f.write(buffer.tobytes())
|
22
|
+
|
23
|
+
|
12
24
|
def calc_overlap_ratio(rect_a, rect_b):
|
13
25
|
intersection = calc_intersection(rect_a, rect_b)
|
14
26
|
if intersection is None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: yomitoku
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.9.0
|
4
4
|
Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
|
5
5
|
Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
|
6
6
|
License: CC BY-NC-SA 4.0
|
@@ -19,6 +19,8 @@ Requires-Dist: shapely>=2.0.6
|
|
19
19
|
Requires-Dist: timm>=1.0.11
|
20
20
|
Requires-Dist: torch>=2.5.0
|
21
21
|
Requires-Dist: torchvision>=0.20.0
|
22
|
+
Provides-Extra: mcp
|
23
|
+
Requires-Dist: mcp[cli]>=1.6.0; extra == 'mcp'
|
22
24
|
Description-Content-Type: text/markdown
|
23
25
|
|
24
26
|
日本語版 | [English](README_EN.md)
|
@@ -10,7 +10,8 @@ yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK
|
|
10
10
|
yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
|
11
11
|
yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
|
12
12
|
yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
yomitoku/cli/main.py,sha256=
|
13
|
+
yomitoku/cli/main.py,sha256=9X8QWwsTAv82uNn5Ns9T_laGJPKHDyeEwenaQxnAmn4,12062
|
14
|
+
yomitoku/cli/mcp.py,sha256=5h704SsUGNAqVnoO_5S-HY2-bApy_Rf8ajDxl1pkT2k,4888
|
14
15
|
yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
|
15
16
|
yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
|
16
17
|
yomitoku/configs/cfg_layout_parser_rtdtrv2_v2.py,sha256=nMrL3uvoVmyzZ909Bz2zmfp9b6AEBLKhIprOvQ5yiQE,2324
|
@@ -24,10 +25,10 @@ yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
|
|
24
25
|
yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
|
25
26
|
yomitoku/data/functions.py,sha256=HIrffs0zCJOq8IvQiI_z-b4MwTb-H2wmZjEE_5VpxFs,8040
|
26
27
|
yomitoku/export/__init__.py,sha256=gmlikMHRXfzfJ_8q4fyDlnpGms-x1oggQOwJEWHMgBU,508
|
27
|
-
yomitoku/export/export_csv.py,sha256=
|
28
|
-
yomitoku/export/export_html.py,sha256=
|
29
|
-
yomitoku/export/export_json.py,sha256=
|
30
|
-
yomitoku/export/export_markdown.py,sha256=
|
28
|
+
yomitoku/export/export_csv.py,sha256=VY8mntUCPDbDco_dyvq5O0_Q4wga9_GTyjHCS-y4UiQ,3399
|
29
|
+
yomitoku/export/export_html.py,sha256=LQDyZgbzmI0qJ0-FEK-54r9816H3L9hD10ChMcw0KyA,5620
|
30
|
+
yomitoku/export/export_json.py,sha256=iNG37tdIuYG2x3NiiZemKaB6-X45WrhVPZhbX7RUzRI,2410
|
31
|
+
yomitoku/export/export_markdown.py,sha256=KrdxDmKzVP_LbTKuDNGGsT31QOPKVsNNlb6wtLEW-1Q,4705
|
31
32
|
yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
|
32
33
|
yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
|
33
34
|
yomitoku/models/parseq.py,sha256=psCPjP3eKjOFAUZJPQQhbD0nWEV5FeOZ0tTK27Rvvbw,8748
|
@@ -49,9 +50,9 @@ yomitoku/resource/charset.txt,sha256=sU91kSi-9Wk4733bCXy4j_UDmvcsj96sHOq1ppUJlOY
|
|
49
50
|
yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
51
|
yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
|
51
52
|
yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
|
52
|
-
yomitoku/utils/misc.py,sha256=
|
53
|
+
yomitoku/utils/misc.py,sha256=cIUrvSJwfWwTui7ueYistf9XPapPR3XgqD2wQjWit40,2901
|
53
54
|
yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
|
54
|
-
yomitoku-0.
|
55
|
-
yomitoku-0.
|
56
|
-
yomitoku-0.
|
57
|
-
yomitoku-0.
|
55
|
+
yomitoku-0.9.0.dist-info/METADATA,sha256=vUbrNm2w-7OIqEEXNzFQBDm8y57mTuh1UeJYHBGRo9U,8622
|
56
|
+
yomitoku-0.9.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
57
|
+
yomitoku-0.9.0.dist-info/entry_points.txt,sha256=N3PzzSo-fdgri5liPpZ3ItMmRH6oVX14pIU_5pUJiAs,99
|
58
|
+
yomitoku-0.9.0.dist-info/RECORD,,
|
File without changes
|