yomitoku 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yomitoku/cli/main.py CHANGED
@@ -9,6 +9,7 @@ from ..constants import SUPPORT_OUTPUT_FORMAT
9
9
  from ..data.functions import load_image, load_pdf
10
10
  from ..document_analyzer import DocumentAnalyzer
11
11
  from ..utils.logger import set_logger
12
+ from ..utils.searchable_pdf import create_searchable_pdf
12
13
 
13
14
  from ..export import save_csv, save_html, save_json, save_markdown
14
15
  from ..export import convert_json, convert_csv, convert_html, convert_markdown
@@ -80,11 +81,13 @@ def process_single_file(args, analyzer, path, format):
80
81
  else:
81
82
  imgs = load_image(path)
82
83
 
84
+ format_results = []
83
85
  results = []
84
86
  for page, img in enumerate(imgs):
85
87
  result, ocr, layout = analyzer(img)
86
88
  dirname = path.parent.name
87
89
  filename = path.stem
90
+ results.append(result)
88
91
 
89
92
  # cv2.imwrite(
90
93
  # os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.jpg"), img
@@ -130,7 +133,7 @@ def process_single_file(args, analyzer, path, format):
130
133
  figure_dir=args.figure_dir,
131
134
  )
132
135
 
133
- results.append(
136
+ format_results.append(
134
137
  {
135
138
  "format": format,
136
139
  "data": json.model_dump(),
@@ -157,7 +160,7 @@ def process_single_file(args, analyzer, path, format):
157
160
  figure_dir=args.figure_dir,
158
161
  )
159
162
 
160
- results.append(
163
+ format_results.append(
161
164
  {
162
165
  "format": format,
163
166
  "data": csv,
@@ -188,7 +191,7 @@ def process_single_file(args, analyzer, path, format):
188
191
  encoding=args.encoding,
189
192
  )
190
193
 
191
- results.append(
194
+ format_results.append(
192
195
  {
193
196
  "format": format,
194
197
  "data": html,
@@ -219,14 +222,14 @@ def process_single_file(args, analyzer, path, format):
219
222
  encoding=args.encoding,
220
223
  )
221
224
 
222
- results.append(
225
+ format_results.append(
223
226
  {
224
227
  "format": format,
225
228
  "data": md,
226
229
  }
227
230
  )
228
231
 
229
- out = merge_all_pages(results)
232
+ out = merge_all_pages(format_results)
230
233
  if args.combine:
231
234
  out_path = os.path.join(args.outdir, f"{dirname}_{filename}.{format}")
232
235
  save_merged_file(
@@ -235,6 +238,15 @@ def process_single_file(args, analyzer, path, format):
235
238
  out,
236
239
  )
237
240
 
241
+ if args.searchable_pdf:
242
+ pdf_path = os.path.join(args.outdir, f"{filename}.pdf")
243
+ create_searchable_pdf(
244
+ imgs,
245
+ results,
246
+ output_path=pdf_path,
247
+ )
248
+ logger.info(f"Output SearchablePDF: {pdf_path}")
249
+
238
250
 
239
251
  def main():
240
252
  parser = argparse.ArgumentParser()
@@ -349,6 +361,11 @@ def main():
349
361
  type=str,
350
362
  choices=["auto", "left2right", "top2bottom", "right2left"],
351
363
  )
364
+ parser.add_argument(
365
+ "--searchable_pdf",
366
+ action="store_true",
367
+ help="if set, create searchable PDF",
368
+ )
352
369
 
353
370
  args = parser.parse_args()
354
371
 
@@ -1,14 +1,20 @@
1
- import json
2
- import io
3
1
  import csv
2
+ import io
3
+ import json
4
4
  import os
5
+ from argparse import ArgumentParser
5
6
  from pathlib import Path
6
7
 
7
8
  from mcp.server.fastmcp import Context, FastMCP
8
9
 
9
10
  from yomitoku import DocumentAnalyzer
10
11
  from yomitoku.data.functions import load_image, load_pdf
11
- from yomitoku.export import convert_json, convert_markdown, convert_csv, convert_html
12
+ from yomitoku.export import (
13
+ convert_csv,
14
+ convert_html,
15
+ convert_json,
16
+ convert_markdown,
17
+ )
12
18
 
13
19
  try:
14
20
  RESOURCE_DIR = os.environ["RESOURCE_DIR"]
@@ -154,12 +160,37 @@ async def get_file_list() -> list[str]:
154
160
  return os.listdir(RESOURCE_DIR)
155
161
 
156
162
 
157
- def run_mcp_server():
163
+ def run_mcp_server(transport="stdio", mount_path=None):
158
164
  """
159
165
  Run the MCP server.
160
166
  """
161
- mcp.run(transport="stdio")
167
+
168
+ if transport == "stdio":
169
+ mcp.run()
170
+ elif transport == "sse":
171
+ mcp.run(transport=transport, mount_path=mount_path)
172
+
173
+
174
+ def main():
175
+ parser = ArgumentParser(description="Run the MCP server.")
176
+ parser.add_argument(
177
+ "--transport",
178
+ "-t",
179
+ type=str,
180
+ default="stdio",
181
+ choices=["stdio", "sse"],
182
+ help="Transport method for the MCP server.",
183
+ )
184
+ parser.add_argument(
185
+ "--mount_path",
186
+ "-m",
187
+ type=str,
188
+ default=None,
189
+ help="Mount path for the MCP server (only used with SSE transport).",
190
+ )
191
+ args = parser.parse_args()
192
+ run_mcp_server(transport=args.transport, mount_path=args.mount_path)
162
193
 
163
194
 
164
195
  if __name__ == "__main__":
165
- run_mcp_server()
196
+ main()
@@ -0,0 +1,116 @@
1
+ import os
2
+
3
+ from PIL import Image
4
+ from io import BytesIO
5
+
6
+ from reportlab.pdfgen import canvas
7
+ from reportlab.pdfbase.ttfonts import TTFont
8
+ from reportlab.pdfbase import pdfmetrics
9
+ from reportlab.pdfbase.pdfmetrics import stringWidth
10
+
11
+ import numpy as np
12
+ import jaconv
13
+
14
+ from ..constants import ROOT_DIR
15
+
16
+ FONT_PATH = ROOT_DIR + "/resource/MPLUS1p-Medium.ttf"
17
+ pdfmetrics.registerFont(TTFont("MPLUS1p-Medium", FONT_PATH))
18
+
19
+
20
+ def _poly2rect(points):
21
+ """
22
+ Convert a polygon defined by its corner points to a rectangle.
23
+ The points should be in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]].
24
+ """
25
+ points = np.array(points, dtype=int)
26
+ x_min = points[:, 0].min()
27
+ x_max = points[:, 0].max()
28
+ y_min = points[:, 1].min()
29
+ y_max = points[:, 1].max()
30
+
31
+ return [x_min, y_min, x_max, y_max]
32
+
33
+
34
+ def _calc_font_size(content, bbox_height, bbox_width):
35
+ rates = np.arange(0.5, 1.0, 0.01)
36
+
37
+ min_diff = np.inf
38
+ best_font_size = None
39
+ for rate in rates:
40
+ font_size = bbox_height * rate
41
+ text_w = stringWidth(content, "MPLUS1p-Medium", font_size)
42
+ diff = abs(text_w - bbox_width)
43
+ if diff < min_diff:
44
+ min_diff = diff
45
+ best_font_size = font_size
46
+
47
+ return best_font_size
48
+
49
+
50
+ def to_full_width(text):
51
+ fw_map = {
52
+ "\u00a5": "\uffe5", # ¥ → ¥
53
+ "\u00b7": "\u30fb", # · → ・
54
+ " ": "\u3000", # 半角スペース→全角スペース
55
+ }
56
+
57
+ TO_FULLWIDTH = str.maketrans(fw_map)
58
+
59
+ jaconv_text = jaconv.h2z(text, kana=True, ascii=True, digit=True)
60
+ jaconv_text = jaconv_text.translate(TO_FULLWIDTH)
61
+
62
+ return jaconv_text
63
+
64
+
65
+ def create_searchable_pdf(images, ocr_results, output_path):
66
+ packet = BytesIO()
67
+ c = canvas.Canvas(packet)
68
+
69
+ for i, (image, ocr_result) in enumerate(zip(images, ocr_results)):
70
+ image = Image.fromarray(image[:, :, ::-1]) # Convert BGR to RGB
71
+ pdfmetrics.registerFont(TTFont("MPLUS1p-Medium", FONT_PATH))
72
+
73
+ image_path = f"tmp_{i}.png"
74
+ image.save(image_path)
75
+ w, h = image.size
76
+
77
+ c.setPageSize((w, h))
78
+ c.drawImage(image_path, 0, 0, width=w, height=h)
79
+ os.remove(image_path) # Clean up temporary image file
80
+
81
+ for word in ocr_result.words:
82
+ text = word.content
83
+ bbox = _poly2rect(word.points)
84
+ direction = word.direction
85
+
86
+ x1, y1, x2, y2 = bbox
87
+ bbox_height = y2 - y1
88
+ bbox_width = x2 - x1
89
+
90
+ if direction == "vertical":
91
+ text = to_full_width(text)
92
+
93
+ if direction == "horizontal":
94
+ font_size = _calc_font_size(text, bbox_height, bbox_width)
95
+ else:
96
+ font_size = _calc_font_size(text, bbox_width, bbox_height)
97
+
98
+ c.setFont("MPLUS1p-Medium", font_size)
99
+ c.setFillColorRGB(1, 1, 1, alpha=0) # 透明
100
+ # c.setFillColorRGB(0, 0, 0)
101
+ if direction == "vertical":
102
+ base_y = h - y2 + (bbox_height - font_size)
103
+ for j, ch in enumerate(text):
104
+ c.saveState()
105
+ c.translate(x1 + font_size * 0.5, base_y - (j - 1) * font_size)
106
+ c.rotate(-90)
107
+ c.drawString(0, 0, ch)
108
+ c.restoreState()
109
+ else:
110
+ base_y = h - y2 + (bbox_height - font_size) * 0.5
111
+ c.drawString(x1, base_y, text)
112
+ c.showPage()
113
+ c.save()
114
+
115
+ with open(output_path, "wb") as f:
116
+ f.write(packet.getvalue())
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.9.1
3
+ Version: 0.9.2
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
7
7
  Keywords: Deep Learning,Japanese,OCR
8
8
  Requires-Python: <3.13,>=3.10
9
9
  Requires-Dist: huggingface-hub>=0.26.1
10
+ Requires-Dist: jaconv>=0.4.0
10
11
  Requires-Dist: lxml>=5.3.0
11
12
  Requires-Dist: omegaconf>=2.3.0
12
13
  Requires-Dist: onnx>=1.17.0
@@ -15,6 +16,7 @@ Requires-Dist: opencv-python>=4.10.0.84
15
16
  Requires-Dist: pyclipper>=1.3.0.post6
16
17
  Requires-Dist: pydantic>=2.9.2
17
18
  Requires-Dist: pypdfium2>=4.30.0
19
+ Requires-Dist: reportlab>=4.4.1
18
20
  Requires-Dist: shapely>=2.0.6
19
21
  Requires-Dist: timm>=1.0.11
20
22
  Requires-Dist: torch>=2.5.0
@@ -41,7 +43,7 @@ YomiToku は日本語に特化した AI 文章画像解析エンジン(Document
41
43
  - 🤖 日本語データセットで学習した 4 種類(文字位置の検知、文字列認識、レイアウト解析、表の構造認識)の AI モデルを搭載しています。4 種類のモデルはすべて独自に学習されたモデルで日本語文書に対して、高精度に推論可能です。
42
44
  - 🇯🇵 各モデルは日本語の文書画像に特化して学習されており、7000 文字を超える日本語文字の認識をサーポート、手書き文字、縦書きなど日本語特有のレイアウト構造の文書画像の解析も可能です。(日本語以外にも英語の文書に対しても対応しています)。
43
45
  - 📈 レイアウト解析、表の構造解析, 読み順推定機能により、文書画像のレイアウトの意味的構造を壊さずに情報を抽出することが可能です。
44
- - 📄 多様な出力形式をサポートしています。html やマークダウン、json、csv のいずれかのフォーマットに変換可能です。また、文書内に含まれる図表、画像の抽出の出力も可能です。
46
+ - 📄 多様な出力形式をサポートしています。html やマークダウン、json、csv のいずれかのフォーマットに変換可能です。また、文書内に含まれる図表、画像の抽出の出力も可能です。文書画像をサーチャブルPDFに変換する処理もサポートしています。
45
47
  - ⚡ GPU 環境で高速に動作し、効率的に文書の文字起こし解析が可能です。また、VRAM も 8GB 以内で動作し、ハイエンドな GPU を用意する必要はありません。
46
48
 
47
49
  ## 🖼️ デモ
@@ -96,6 +98,7 @@ yomitoku ${path_data} -f md -o results -v --figure --lite
96
98
  - `--encoding` エクスポートする出力ファイルの文字エンコーディングを指定します。サポートされていない文字コードが含まれる場合は、その文字を無視します。(utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
97
99
  - `--combine` PDFを入力に与えたときに、複数ページが含まれる場合に、それらの予測結果を一つのファイルに統合してエクスポートします。
98
100
  - `--ignore_meta` 文章のheater, fotterなどの文字情報を出力ファイルに含めません。
101
+ - `--searchable_pdf` 読み取った文字情報をPDFに埋め込み全文検索可能なPDFを出力します。
99
102
 
100
103
  その他のオプションに関しては、ヘルプを参照
101
104
 
@@ -10,8 +10,8 @@ yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK
10
10
  yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
11
11
  yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
12
12
  yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yomitoku/cli/main.py,sha256=VZG8DZf-k_QytlDZtB91eBNY69MRpbryQg1rkn3fs20,12304
14
- yomitoku/cli/mcp.py,sha256=5h704SsUGNAqVnoO_5S-HY2-bApy_Rf8ajDxl1pkT2k,4888
13
+ yomitoku/cli/main.py,sha256=7AaaFzMf33ER__XPDBNkrJkKwclne7QyVFWeBvpUYBY,12849
14
+ yomitoku/cli/mcp_server.py,sha256=WnWzxd13HaemC3b-5i9B9NVBGc3WGfum2nYhoBolEnk,5641
15
15
  yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
16
16
  yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
17
17
  yomitoku/configs/cfg_layout_parser_rtdtrv2_v2.py,sha256=nMrL3uvoVmyzZ909Bz2zmfp9b6AEBLKhIprOvQ5yiQE,2324
@@ -51,8 +51,9 @@ yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
52
52
  yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
53
53
  yomitoku/utils/misc.py,sha256=r92x45kQR8lC5jO1MZaHBDtcCWBkQXg_WS9H4RXJzSY,4127
54
+ yomitoku/utils/searchable_pdf.py,sha256=40JbcxWrHzYTtzvI9MPYHMrWqLWKiLWo4mWDNRFXwHY,3530
54
55
  yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
55
- yomitoku-0.9.1.dist-info/METADATA,sha256=ozEkYekTPuEP1GwnCCQKgJC9DzEQpyActU_DltQGMHc,8700
56
- yomitoku-0.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
- yomitoku-0.9.1.dist-info/entry_points.txt,sha256=N3PzzSo-fdgri5liPpZ3ItMmRH6oVX14pIU_5pUJiAs,99
58
- yomitoku-0.9.1.dist-info/RECORD,,
56
+ yomitoku-0.9.2.dist-info/METADATA,sha256=vDEaaXAimCBfVwMeWmfyJBqzb7sXtZk4-ia3PXrtk7c,8966
57
+ yomitoku-0.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
+ yomitoku-0.9.2.dist-info/entry_points.txt,sha256=n3c8bQSj5Be5GHAOv_NZ8cldJFmWeigQxSmteFTmu_k,96
59
+ yomitoku-0.9.2.dist-info/RECORD,,
@@ -1,3 +1,3 @@
1
1
  [console_scripts]
2
2
  yomitoku = yomitoku.cli.main:main
3
- yomitoku_mcp = yomitoku.cli.mcp:run_mcp_server
3
+ yomitoku_mcp = yomitoku.cli.mcp_server:main