yomitoku 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yomitoku/cli/main.py CHANGED
@@ -3,7 +3,6 @@ import os
3
3
  import time
4
4
  from pathlib import Path
5
5
 
6
- import cv2
7
6
  import torch
8
7
 
9
8
  from ..constants import SUPPORT_OUTPUT_FORMAT
@@ -14,6 +13,8 @@ from ..utils.logger import set_logger
14
13
  from ..export import save_csv, save_html, save_json, save_markdown
15
14
  from ..export import convert_json, convert_csv, convert_html, convert_markdown
16
15
 
16
+ from ..utils.misc import save_image
17
+
17
18
  logger = set_logger(__name__, "INFO")
18
19
 
19
20
 
@@ -94,7 +95,7 @@ def process_single_file(args, analyzer, path, format):
94
95
  args.outdir, f"{dirname}_{filename}_p{page+1}_ocr.jpg"
95
96
  )
96
97
 
97
- cv2.imwrite(out_path, ocr)
98
+ save_image(ocr, out_path)
98
99
  logger.info(f"Output file: {out_path}")
99
100
 
100
101
  if layout is not None:
@@ -102,7 +103,7 @@ def process_single_file(args, analyzer, path, format):
102
103
  args.outdir, f"{dirname}_{filename}_p{page+1}_layout.jpg"
103
104
  )
104
105
 
105
- cv2.imwrite(out_path, layout)
106
+ save_image(layout, out_path)
106
107
  logger.info(f"Output file: {out_path}")
107
108
 
108
109
  out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
yomitoku/cli/mcp.py ADDED
@@ -0,0 +1,165 @@
1
+ import json
2
+ import io
3
+ import csv
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from mcp.server.fastmcp import Context, FastMCP
8
+
9
+ from yomitoku import DocumentAnalyzer
10
+ from yomitoku.data.functions import load_image, load_pdf
11
+ from yomitoku.export import convert_json, convert_markdown, convert_csv, convert_html
12
+
13
+ try:
14
+ RESOURCE_DIR = os.environ["RESOURCE_DIR"]
15
+ except KeyError:
16
+ raise ValueError("Environment variable 'RESOURCE_DIR' is not set.")
17
+
18
+
19
+ analyzer = None
20
+
21
+
22
+ async def load_analyzer(ctx: Context) -> DocumentAnalyzer:
23
+ """
24
+ Load the DocumentAnalyzer instance if not already loaded.
25
+
26
+ Args:
27
+ ctx (Context): The context in which the analyzer is being loaded.
28
+
29
+ Returns:
30
+ DocumentAnalyzer: The loaded document analyzer instance.
31
+ """
32
+ global analyzer
33
+ if analyzer is None:
34
+ await ctx.info("Load document analyzer")
35
+ analyzer = DocumentAnalyzer(visualize=False, device="cuda")
36
+ return analyzer
37
+
38
+
39
+ mcp = FastMCP("yomitoku")
40
+
41
+
42
+ @mcp.tool()
43
+ async def process_ocr(ctx: Context, filename: str, output_format: str) -> str:
44
+ """
45
+ Perform OCR on the specified file in the resource direcory and convert
46
+ the results to the desired format.
47
+
48
+ Args:
49
+ ctx (Context): The context in which the OCR processing is executed.
50
+ filename (str): The name of the file to process in the resource directory.
51
+ output_format (str): The desired format for the output. The available options are:
52
+ - json: Outputs the text as structured data along with positional information.
53
+ - markdown: Outputs texts and tables in Markdown format.
54
+ - html: Outputs texts and tables in HTML format.
55
+ - csv: Outputs texts and tables in CSV format.
56
+
57
+ Returns:
58
+ str: The OCR results converted to the specified format.
59
+ """
60
+ analyzer = await load_analyzer(ctx)
61
+
62
+ await ctx.info("Start ocr processing")
63
+
64
+ file_path = os.path.join(RESOURCE_DIR, filename)
65
+ if Path(file_path).suffix[1:].lower() in ["pdf"]:
66
+ imgs = load_pdf(file_path)
67
+ else:
68
+ imgs = load_image(file_path)
69
+
70
+ results = []
71
+ for page, img in enumerate(imgs):
72
+ analyzer.img = img
73
+ result, _, _ = await analyzer.run(img)
74
+ results.append(result)
75
+ await ctx.report_progress(page + 1, len(imgs))
76
+
77
+ if output_format == "json":
78
+ return json.dumps(
79
+ [
80
+ convert_json(
81
+ result,
82
+ out_path=None,
83
+ ignore_line_break=True,
84
+ img=img,
85
+ export_figure=False,
86
+ figure_dir=None,
87
+ ).model_dump()
88
+ for img, result in zip(imgs, results)
89
+ ],
90
+ ensure_ascii=False,
91
+ sort_keys=True,
92
+ separators=(",", ": "),
93
+ )
94
+ elif output_format == "markdown":
95
+ return "\n".join(
96
+ [
97
+ convert_markdown(
98
+ result,
99
+ out_path=None,
100
+ ignore_line_break=True,
101
+ img=img,
102
+ export_figure=False,
103
+ )[0]
104
+ for img, result in zip(imgs, results)
105
+ ]
106
+ )
107
+ elif output_format == "html":
108
+ return "\n".join(
109
+ [
110
+ convert_html(
111
+ result,
112
+ out_path=None,
113
+ ignore_line_break=True,
114
+ img=img,
115
+ export_figure=False,
116
+ export_figure_letter="",
117
+ )[0]
118
+ for img, result in zip(imgs, results)
119
+ ]
120
+ )
121
+ elif output_format == "csv":
122
+ output = io.StringIO()
123
+ writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
124
+ for img, result in zip(imgs, results):
125
+ elements = convert_csv(
126
+ result,
127
+ out_path=None,
128
+ ignore_line_break=True,
129
+ img=img,
130
+ export_figure=False,
131
+ )
132
+ for element in elements:
133
+ if element["type"] == "table":
134
+ writer.writerows(element["element"])
135
+ else:
136
+ writer.writerow([element["element"]])
137
+ writer.writerow([""])
138
+ return output.getvalue()
139
+ else:
140
+ raise ValueError(
141
+ f"Unsupported output format: {output_format}."
142
+ " Supported formats are json, markdown, html or csv."
143
+ )
144
+
145
+
146
+ @mcp.resource("file://list")
147
+ async def get_file_list() -> list[str]:
148
+ """
149
+ Retrieve a list of files in the resource directory.
150
+
151
+ Returns:
152
+ list[str]: A list of filenames in the resource directory.
153
+ """
154
+ return os.listdir(RESOURCE_DIR)
155
+
156
+
157
+ def run_mcp_server():
158
+ """
159
+ Run the MCP server.
160
+ """
161
+ mcp.run(transport="stdio")
162
+
163
+
164
+ if __name__ == "__main__":
165
+ run_mcp_server()
@@ -27,7 +27,7 @@ class Data:
27
27
  @dataclass
28
28
  class PostProcess:
29
29
  min_size: int = 2
30
- thresh: float = 0.2
30
+ thresh: float = 0.15
31
31
  box_thresh: float = 0.5
32
32
  max_candidates: int = 1500
33
33
  unclip_ratio: float = 7.0
@@ -27,10 +27,10 @@ class Data:
27
27
  @dataclass
28
28
  class PostProcess:
29
29
  min_size: int = 2
30
- thresh: float = 0.4
30
+ thresh: float = 0.2
31
31
  box_thresh: float = 0.5
32
32
  max_candidates: int = 1500
33
- unclip_ratio: float = 6.0
33
+ unclip_ratio: float = 5.0
34
34
 
35
35
 
36
36
  @dataclass
@@ -1,7 +1,7 @@
1
1
  import csv
2
2
  import os
3
3
 
4
- import cv2
4
+ from ..utils.misc import save_image
5
5
 
6
6
 
7
7
  def table_to_csv(table, ignore_line_break):
@@ -54,7 +54,7 @@ def save_figure(
54
54
  filename = os.path.splitext(os.path.basename(out_path))[0]
55
55
  figure_name = f"{filename}_figure_{i}.png"
56
56
  figure_path = os.path.join(save_dir, figure_name)
57
- cv2.imwrite(figure_path, figure_img)
57
+ save_image(figure_img, figure_path)
58
58
 
59
59
 
60
60
  def convert_csv(
@@ -1,10 +1,10 @@
1
1
  import os
2
2
  import re
3
3
  from html import escape
4
-
5
- import cv2
6
4
  from lxml import etree, html
7
5
 
6
+ from ..utils.misc import save_image
7
+
8
8
 
9
9
  def convert_text_to_html(text):
10
10
  """
@@ -122,7 +122,7 @@ def figure_to_html(
122
122
  filename = os.path.splitext(os.path.basename(out_path))[0]
123
123
  figure_name = f"{filename}_figure_{i}.png"
124
124
  figure_path = os.path.join(save_dir, figure_name)
125
- cv2.imwrite(figure_path, figure_img)
125
+ save_image(figure_img, figure_path)
126
126
 
127
127
  elements.append(
128
128
  {
@@ -180,8 +180,13 @@ def convert_html(
180
180
  elements = sorted(elements, key=lambda x: x["order"])
181
181
 
182
182
  html_string = "".join([element["html"] for element in elements])
183
- parsed_html = html.fromstring(html_string)
184
- formatted_html = etree.tostring(parsed_html, pretty_print=True, encoding="unicode")
183
+ if not len(html_string) == 0:
184
+ parsed_html = html.fromstring(html_string)
185
+ formatted_html = etree.tostring(
186
+ parsed_html, pretty_print=True, encoding="unicode"
187
+ )
188
+ else:
189
+ formatted_html = ""
185
190
 
186
191
  return formatted_html, elements
187
192
 
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import os
3
3
 
4
- import cv2
4
+ from ..utils.misc import save_image
5
5
 
6
6
 
7
7
  def paragraph_to_json(paragraph, ignore_line_break):
@@ -33,7 +33,7 @@ def save_figure(
33
33
  filename = os.path.splitext(os.path.basename(out_path))[0]
34
34
  figure_name = f"{filename}_figure_{i}.png"
35
35
  figure_path = os.path.join(save_dir, figure_name)
36
- cv2.imwrite(figure_path, figure_img)
36
+ save_image(figure_img, figure_path)
37
37
 
38
38
 
39
39
  def convert_json(inputs, out_path, ignore_line_break, img, export_figure, figure_dir):
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import re
3
3
 
4
- import cv2
4
+ from ..utils.misc import save_image
5
5
 
6
6
 
7
7
  def escape_markdown_special_chars(text):
@@ -89,7 +89,7 @@ def figure_to_md(
89
89
  filename = os.path.splitext(os.path.basename(out_path))[0]
90
90
  figure_name = f"{filename}_figure_{i}.png"
91
91
  figure_path = os.path.join(save_dir, figure_name)
92
- cv2.imwrite(figure_path, figure_img)
92
+ save_image(figure_img, figure_path)
93
93
 
94
94
  elements.append(
95
95
  {
yomitoku/utils/misc.py CHANGED
@@ -1,3 +1,6 @@
1
+ import cv2
2
+
3
+
1
4
  def load_charset(charset_path):
2
5
  with open(charset_path, "r", encoding="utf-8") as f:
3
6
  charset = f.read()
@@ -9,6 +12,15 @@ def filter_by_flag(elements, flags):
9
12
  return [element for element, flag in zip(elements, flags) if flag]
10
13
 
11
14
 
15
+ def save_image(img, path):
16
+ success, buffer = cv2.imencode(".jpg", img)
17
+ if not success:
18
+ raise ValueError("Failed to encode image")
19
+
20
+ with open(path, "wb") as f:
21
+ f.write(buffer.tobytes())
22
+
23
+
12
24
  def calc_overlap_ratio(rect_a, rect_b):
13
25
  intersection = calc_intersection(rect_a, rect_b)
14
26
  if intersection is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.8.0
3
+ Version: 0.9.0
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
@@ -19,6 +19,8 @@ Requires-Dist: shapely>=2.0.6
19
19
  Requires-Dist: timm>=1.0.11
20
20
  Requires-Dist: torch>=2.5.0
21
21
  Requires-Dist: torchvision>=0.20.0
22
+ Provides-Extra: mcp
23
+ Requires-Dist: mcp[cli]>=1.6.0; extra == 'mcp'
22
24
  Description-Content-Type: text/markdown
23
25
 
24
26
  日本語版 | [English](README_EN.md)
@@ -10,13 +10,14 @@ yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK
10
10
  yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
11
11
  yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
12
12
  yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yomitoku/cli/main.py,sha256=jQCSwHw4oOwLQjARvaIO1yoSjz-2Rdb9c3DNShLS5OE,12038
13
+ yomitoku/cli/main.py,sha256=9X8QWwsTAv82uNn5Ns9T_laGJPKHDyeEwenaQxnAmn4,12062
14
+ yomitoku/cli/mcp.py,sha256=5h704SsUGNAqVnoO_5S-HY2-bApy_Rf8ajDxl1pkT2k,4888
14
15
  yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
15
16
  yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
16
17
  yomitoku/configs/cfg_layout_parser_rtdtrv2_v2.py,sha256=nMrL3uvoVmyzZ909Bz2zmfp9b6AEBLKhIprOvQ5yiQE,2324
17
18
  yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py,sha256=o70GMHD8k-zeBeJtuhPS8x7vVB-ffucnJXeSyn-0AXo,2116
18
- yomitoku/configs/cfg_text_detector_dbnet.py,sha256=U9k48PON7haoOaytiELhbZRpv9RMiUm6nnfHmdxIa9Q,1153
19
- yomitoku/configs/cfg_text_detector_dbnet_v2.py,sha256=PzdV6-f75ba-KBEBcPxyo9STWQ6m5-1Rl3MFBLl2TSc,1148
19
+ yomitoku/configs/cfg_text_detector_dbnet.py,sha256=8FVyUr9JiZPEFpNIjt0i5Zlw_fo0DMUNqDCQKOiZHDY,1154
20
+ yomitoku/configs/cfg_text_detector_dbnet_v2.py,sha256=YMhH0EPlRFgq9Fj10rML1wOlVw2LgA9sGrABYHT3gLM,1148
20
21
  yomitoku/configs/cfg_text_recognizer_parseq.py,sha256=hpFs3nKqh4XdU3BZMTultegtLEGahEsCaZdjfKC_MO8,1247
21
22
  yomitoku/configs/cfg_text_recognizer_parseq_small.py,sha256=uCm_VC_G79IbZpOiK8fgYzAJ4b98H5pf328wyQomtfo,1259
22
23
  yomitoku/configs/cfg_text_recognizer_parseq_v2.py,sha256=GfHzbByOKjH21PRTxT8x_fU4r4Mda6F750Z8pjNeb8g,1249
@@ -24,10 +25,10 @@ yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
24
25
  yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
25
26
  yomitoku/data/functions.py,sha256=HIrffs0zCJOq8IvQiI_z-b4MwTb-H2wmZjEE_5VpxFs,8040
26
27
  yomitoku/export/__init__.py,sha256=gmlikMHRXfzfJ_8q4fyDlnpGms-x1oggQOwJEWHMgBU,508
27
- yomitoku/export/export_csv.py,sha256=4DT5Nf4FdeGP0olIzv1ypBlswkZSdMB4MeQOgYWe8uk,3375
28
- yomitoku/export/export_html.py,sha256=syzAapHcUHcUlabmZcQdWiNy2NrRs7LPzA_x39pFtfQ,5494
29
- yomitoku/export/export_json.py,sha256=6cSXSsyEVJ5Rw2nKSUOcW8_XlGmSLWlWQWCBNmRKsps,2386
30
- yomitoku/export/export_markdown.py,sha256=7Jib-YXOw70H46kvNc6z0_3LFwX9iwp1eXxsGeylF0I,4681
28
+ yomitoku/export/export_csv.py,sha256=VY8mntUCPDbDco_dyvq5O0_Q4wga9_GTyjHCS-y4UiQ,3399
29
+ yomitoku/export/export_html.py,sha256=LQDyZgbzmI0qJ0-FEK-54r9816H3L9hD10ChMcw0KyA,5620
30
+ yomitoku/export/export_json.py,sha256=iNG37tdIuYG2x3NiiZemKaB6-X45WrhVPZhbX7RUzRI,2410
31
+ yomitoku/export/export_markdown.py,sha256=KrdxDmKzVP_LbTKuDNGGsT31QOPKVsNNlb6wtLEW-1Q,4705
31
32
  yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
32
33
  yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
33
34
  yomitoku/models/parseq.py,sha256=psCPjP3eKjOFAUZJPQQhbD0nWEV5FeOZ0tTK27Rvvbw,8748
@@ -49,9 +50,9 @@ yomitoku/resource/charset.txt,sha256=sU91kSi-9Wk4733bCXy4j_UDmvcsj96sHOq1ppUJlOY
49
50
  yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
51
  yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
51
52
  yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
52
- yomitoku/utils/misc.py,sha256=FbwPLeIYYBvNf9wQh2RoEonTM5BF7_IwaEqmRsYHKA8,2673
53
+ yomitoku/utils/misc.py,sha256=cIUrvSJwfWwTui7ueYistf9XPapPR3XgqD2wQjWit40,2901
53
54
  yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
54
- yomitoku-0.8.0.dist-info/METADATA,sha256=CH5KOT64Q8AMOaKkUbbd9rI1Zcd_dBk_OXd2GguC4f0,8555
55
- yomitoku-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
- yomitoku-0.8.0.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
57
- yomitoku-0.8.0.dist-info/RECORD,,
55
+ yomitoku-0.9.0.dist-info/METADATA,sha256=vUbrNm2w-7OIqEEXNzFQBDm8y57mTuh1UeJYHBGRo9U,8622
56
+ yomitoku-0.9.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
+ yomitoku-0.9.0.dist-info/entry_points.txt,sha256=N3PzzSo-fdgri5liPpZ3ItMmRH6oVX14pIU_5pUJiAs,99
58
+ yomitoku-0.9.0.dist-info/RECORD,,
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  yomitoku = yomitoku.cli.main:main
3
+ yomitoku_mcp = yomitoku.cli.mcp:run_mcp_server