yomitoku 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yomitoku/cli/main.py CHANGED
@@ -3,7 +3,6 @@ import os
3
3
  import time
4
4
  from pathlib import Path
5
5
 
6
- import cv2
7
6
  import torch
8
7
 
9
8
  from ..constants import SUPPORT_OUTPUT_FORMAT
@@ -14,6 +13,8 @@ from ..utils.logger import set_logger
14
13
  from ..export import save_csv, save_html, save_json, save_markdown
15
14
  from ..export import convert_json, convert_csv, convert_html, convert_markdown
16
15
 
16
+ from ..utils.misc import save_image
17
+
17
18
  logger = set_logger(__name__, "INFO")
18
19
 
19
20
 
@@ -91,21 +92,23 @@ def process_single_file(args, analyzer, path, format):
91
92
 
92
93
  if ocr is not None:
93
94
  out_path = os.path.join(
94
- args.outdir, f"{dirname}_{filename}_p{page+1}_ocr.jpg"
95
+ args.outdir, f"{dirname}_{filename}_p{page + 1}_ocr.jpg"
95
96
  )
96
97
 
97
- cv2.imwrite(out_path, ocr)
98
+ save_image(ocr, out_path)
98
99
  logger.info(f"Output file: {out_path}")
99
100
 
100
101
  if layout is not None:
101
102
  out_path = os.path.join(
102
- args.outdir, f"{dirname}_{filename}_p{page+1}_layout.jpg"
103
+ args.outdir, f"{dirname}_{filename}_p{page + 1}_layout.jpg"
103
104
  )
104
105
 
105
- cv2.imwrite(out_path, layout)
106
+ save_image(layout, out_path)
106
107
  logger.info(f"Output file: {out_path}")
107
108
 
108
- out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
109
+ out_path = os.path.join(
110
+ args.outdir, f"{dirname}_{filename}_p{page + 1}.{format}"
111
+ )
109
112
 
110
113
  if format == "json":
111
114
  if args.combine:
@@ -340,6 +343,12 @@ def main():
340
343
  action="store_true",
341
344
  help="if set, ignore meta information(header, footer) in the output",
342
345
  )
346
+ parser.add_argument(
347
+ "--reading_order",
348
+ default="auto",
349
+ type=str,
350
+ choices=["auto", "left2right", "top2bottom", "right2left"],
351
+ )
343
352
 
344
353
  args = parser.parse_args()
345
354
 
@@ -393,6 +402,7 @@ def main():
393
402
  visualize=args.vis,
394
403
  device=args.device,
395
404
  ignore_meta=args.ignore_meta,
405
+ reading_order=args.reading_order,
396
406
  )
397
407
 
398
408
  os.makedirs(args.outdir, exist_ok=True)
@@ -407,7 +417,7 @@ def main():
407
417
  logger.info(f"Processing file: {file_path}")
408
418
  process_single_file(args, analyzer, file_path, format)
409
419
  end = time.time()
410
- logger.info(f"Total Processing time: {end-start:.2f} sec")
420
+ logger.info(f"Total Processing time: {end - start:.2f} sec")
411
421
  except Exception:
412
422
  continue
413
423
  else:
@@ -415,7 +425,7 @@ def main():
415
425
  logger.info(f"Processing file: {path}")
416
426
  process_single_file(args, analyzer, path, format)
417
427
  end = time.time()
418
- logger.info(f"Total Processing time: {end-start:.2f} sec")
428
+ logger.info(f"Total Processing time: {end - start:.2f} sec")
419
429
 
420
430
 
421
431
  if __name__ == "__main__":
yomitoku/cli/mcp.py ADDED
@@ -0,0 +1,165 @@
1
+ import json
2
+ import io
3
+ import csv
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from mcp.server.fastmcp import Context, FastMCP
8
+
9
+ from yomitoku import DocumentAnalyzer
10
+ from yomitoku.data.functions import load_image, load_pdf
11
+ from yomitoku.export import convert_json, convert_markdown, convert_csv, convert_html
12
+
13
+ try:
14
+ RESOURCE_DIR = os.environ["RESOURCE_DIR"]
15
+ except KeyError:
16
+ raise ValueError("Environment variable 'RESOURCE_DIR' is not set.")
17
+
18
+
19
+ analyzer = None
20
+
21
+
22
+ async def load_analyzer(ctx: Context) -> DocumentAnalyzer:
23
+ """
24
+ Load the DocumentAnalyzer instance if not already loaded.
25
+
26
+ Args:
27
+ ctx (Context): The context in which the analyzer is being loaded.
28
+
29
+ Returns:
30
+ DocumentAnalyzer: The loaded document analyzer instance.
31
+ """
32
+ global analyzer
33
+ if analyzer is None:
34
+ await ctx.info("Load document analyzer")
35
+ analyzer = DocumentAnalyzer(visualize=False, device="cuda")
36
+ return analyzer
37
+
38
+
39
+ mcp = FastMCP("yomitoku")
40
+
41
+
42
+ @mcp.tool()
43
+ async def process_ocr(ctx: Context, filename: str, output_format: str) -> str:
44
+ """
45
+ Perform OCR on the specified file in the resource direcory and convert
46
+ the results to the desired format.
47
+
48
+ Args:
49
+ ctx (Context): The context in which the OCR processing is executed.
50
+ filename (str): The name of the file to process in the resource directory.
51
+ output_format (str): The desired format for the output. The available options are:
52
+ - json: Outputs the text as structured data along with positional information.
53
+ - markdown: Outputs texts and tables in Markdown format.
54
+ - html: Outputs texts and tables in HTML format.
55
+ - csv: Outputs texts and tables in CSV format.
56
+
57
+ Returns:
58
+ str: The OCR results converted to the specified format.
59
+ """
60
+ analyzer = await load_analyzer(ctx)
61
+
62
+ await ctx.info("Start ocr processing")
63
+
64
+ file_path = os.path.join(RESOURCE_DIR, filename)
65
+ if Path(file_path).suffix[1:].lower() in ["pdf"]:
66
+ imgs = load_pdf(file_path)
67
+ else:
68
+ imgs = load_image(file_path)
69
+
70
+ results = []
71
+ for page, img in enumerate(imgs):
72
+ analyzer.img = img
73
+ result, _, _ = await analyzer.run(img)
74
+ results.append(result)
75
+ await ctx.report_progress(page + 1, len(imgs))
76
+
77
+ if output_format == "json":
78
+ return json.dumps(
79
+ [
80
+ convert_json(
81
+ result,
82
+ out_path=None,
83
+ ignore_line_break=True,
84
+ img=img,
85
+ export_figure=False,
86
+ figure_dir=None,
87
+ ).model_dump()
88
+ for img, result in zip(imgs, results)
89
+ ],
90
+ ensure_ascii=False,
91
+ sort_keys=True,
92
+ separators=(",", ": "),
93
+ )
94
+ elif output_format == "markdown":
95
+ return "\n".join(
96
+ [
97
+ convert_markdown(
98
+ result,
99
+ out_path=None,
100
+ ignore_line_break=True,
101
+ img=img,
102
+ export_figure=False,
103
+ )[0]
104
+ for img, result in zip(imgs, results)
105
+ ]
106
+ )
107
+ elif output_format == "html":
108
+ return "\n".join(
109
+ [
110
+ convert_html(
111
+ result,
112
+ out_path=None,
113
+ ignore_line_break=True,
114
+ img=img,
115
+ export_figure=False,
116
+ export_figure_letter="",
117
+ )[0]
118
+ for img, result in zip(imgs, results)
119
+ ]
120
+ )
121
+ elif output_format == "csv":
122
+ output = io.StringIO()
123
+ writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
124
+ for img, result in zip(imgs, results):
125
+ elements = convert_csv(
126
+ result,
127
+ out_path=None,
128
+ ignore_line_break=True,
129
+ img=img,
130
+ export_figure=False,
131
+ )
132
+ for element in elements:
133
+ if element["type"] == "table":
134
+ writer.writerows(element["element"])
135
+ else:
136
+ writer.writerow([element["element"]])
137
+ writer.writerow([""])
138
+ return output.getvalue()
139
+ else:
140
+ raise ValueError(
141
+ f"Unsupported output format: {output_format}."
142
+ " Supported formats are json, markdown, html or csv."
143
+ )
144
+
145
+
146
+ @mcp.resource("file://list")
147
+ async def get_file_list() -> list[str]:
148
+ """
149
+ Retrieve a list of files in the resource directory.
150
+
151
+ Returns:
152
+ list[str]: A list of filenames in the resource directory.
153
+ """
154
+ return os.listdir(RESOURCE_DIR)
155
+
156
+
157
+ def run_mcp_server():
158
+ """
159
+ Run the MCP server.
160
+ """
161
+ mcp.run(transport="stdio")
162
+
163
+
164
+ if __name__ == "__main__":
165
+ run_mcp_server()
yomitoku/data/dataset.py CHANGED
@@ -8,9 +8,11 @@ from .functions import (
8
8
  validate_quads,
9
9
  )
10
10
 
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
11
13
 
12
14
  class ParseqDataset(Dataset):
13
- def __init__(self, cfg, img, quads):
15
+ def __init__(self, cfg, img, quads, num_workers=8):
14
16
  self.img = img[:, :, ::-1]
15
17
  self.quads = quads
16
18
  self.cfg = cfg
@@ -22,19 +24,27 @@ class ParseqDataset(Dataset):
22
24
  ]
23
25
  )
24
26
 
25
- validate_quads(self.img, self.quads)
27
+ with ThreadPoolExecutor(max_workers=num_workers) as executor:
28
+ data = list(executor.map(self.preprocess, self.quads))
26
29
 
27
- def __len__(self):
28
- return len(self.quads)
30
+ self.data = [tensor for tensor in data if tensor is not None]
31
+
32
+ def preprocess(self, quad):
33
+ if validate_quads(self.img, quad) is None:
34
+ return None
35
+
36
+ roi_img = extract_roi_with_perspective(self.img, quad)
29
37
 
30
- def __getitem__(self, index):
31
- polygon = self.quads[index]
32
- roi_img = extract_roi_with_perspective(self.img, polygon)
33
38
  if roi_img is None:
34
- return
39
+ return None
35
40
 
36
41
  roi_img = rotate_text_image(roi_img, thresh_aspect=2)
37
42
  resized = resize_with_padding(roi_img, self.cfg.data.img_size)
38
- tensor = self.transform(resized)
39
43
 
40
- return tensor
44
+ return resized
45
+
46
+ def __len__(self):
47
+ return len(self.data)
48
+
49
+ def __getitem__(self, index):
50
+ return self.transform(self.data[index])
@@ -191,7 +191,7 @@ def array_to_tensor(img: np.ndarray) -> torch.Tensor:
191
191
  return tensor
192
192
 
193
193
 
194
- def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
194
+ def validate_quads(img: np.ndarray, quad: list[list[list[int]]]):
195
195
  """
196
196
  Validate the vertices of the quadrilateral.
197
197
 
@@ -204,23 +204,23 @@ def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
204
204
  """
205
205
 
206
206
  h, w = img.shape[:2]
207
- for quad in quads:
208
- if len(quad) != 4:
209
- raise ValueError("The number of vertices must be 4.")
210
-
211
- for point in quad:
212
- if len(point) != 2:
213
- raise ValueError("The number of coordinates must be 2.")
214
-
215
- quad = np.array(quad, dtype=int)
216
- x1 = np.min(quad[:, 0])
217
- x2 = np.max(quad[:, 0])
218
- y1 = np.min(quad[:, 1])
219
- y2 = np.max(quad[:, 1])
220
- h, w = img.shape[:2]
207
+ if len(quad) != 4:
208
+ # raise ValueError("The number of vertices must be 4.")
209
+ return None
210
+
211
+ for point in quad:
212
+ if len(point) != 2:
213
+ return None
214
+
215
+ quad = np.array(quad, dtype=int)
216
+ x1 = np.min(quad[:, 0])
217
+ x2 = np.max(quad[:, 0])
218
+ y1 = np.min(quad[:, 1])
219
+ y2 = np.max(quad[:, 1])
220
+ h, w = img.shape[:2]
221
221
 
222
- if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
223
- raise ValueError(f"The vertices are out of the image. {quad.tolist()}")
222
+ if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
223
+ return None
224
224
 
225
225
  return True
226
226
 
@@ -237,19 +237,18 @@ def extract_roi_with_perspective(img, quad):
237
237
  np.ndarray: extracted image
238
238
  """
239
239
  dst = img.copy()
240
- quad = np.array(quad, dtype=np.float32)
240
+ quad = np.array(quad, dtype=np.int64)
241
+
241
242
  width = np.linalg.norm(quad[0] - quad[1])
242
243
  height = np.linalg.norm(quad[1] - quad[2])
243
244
 
244
245
  width = int(width)
245
246
  height = int(height)
246
-
247
247
  pts1 = np.float32(quad)
248
248
  pts2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
249
249
 
250
250
  M = cv2.getPerspectiveTransform(pts1, pts2)
251
251
  dst = cv2.warpPerspective(dst, M, (width, height))
252
-
253
252
  return dst
254
253
 
255
254
 
@@ -86,8 +86,12 @@ def extract_paragraph_within_figure(paragraphs, figures):
86
86
  check_list[i] = True
87
87
 
88
88
  figure["direction"] = judge_page_direction(contained_paragraphs)
89
+ reading_order = (
90
+ "left2right" if figure["direction"] == "horizontal" else "right2left"
91
+ )
92
+
89
93
  figure_paragraphs = prediction_reading_order(
90
- contained_paragraphs, figure["direction"]
94
+ contained_paragraphs, reading_order
91
95
  )
92
96
  figure["paragraphs"] = sorted(figure_paragraphs, key=lambda x: x.order)
93
97
  figure = FigureSchema(**figure)
@@ -126,8 +130,8 @@ def extract_words_within_element(pred_words, element):
126
130
  cnt_vertical = word_direction.count("vertical")
127
131
 
128
132
  element_direction = "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
129
-
130
- prediction_reading_order(contained_words, element_direction)
133
+ order = "left2right" if element_direction == "horizontal" else "right2left"
134
+ prediction_reading_order(contained_words, order)
131
135
  contained_words = sorted(contained_words, key=lambda x: x.order)
132
136
 
133
137
  contained_words = "\n".join([content.contents for content in contained_words])
@@ -328,6 +332,7 @@ class DocumentAnalyzer:
328
332
  device="cuda",
329
333
  visualize=False,
330
334
  ignore_meta=False,
335
+ reading_order="auto",
331
336
  ):
332
337
  default_configs = {
333
338
  "ocr": {
@@ -352,6 +357,8 @@ class DocumentAnalyzer:
352
357
  },
353
358
  }
354
359
 
360
+ self.reading_order = reading_order
361
+
355
362
  if isinstance(configs, dict):
356
363
  recursive_update(default_configs, configs)
357
364
  else:
@@ -452,9 +459,17 @@ class DocumentAnalyzer:
452
459
 
453
460
  elements = page_contents + layout_res.tables + figures
454
461
 
455
- prediction_reading_order(headers, page_direction)
456
- prediction_reading_order(footers, page_direction)
457
- prediction_reading_order(elements, page_direction, self.img)
462
+ prediction_reading_order(headers, "left2right")
463
+ prediction_reading_order(footers, "left2right")
464
+
465
+ if self.reading_order == "auto":
466
+ reading_order = (
467
+ "right2left" if page_direction == "vertical" else "top2bottom"
468
+ )
469
+ else:
470
+ reading_order = self.reading_order
471
+
472
+ prediction_reading_order(elements, reading_order, self.img)
458
473
 
459
474
  for i, element in enumerate(elements):
460
475
  element.order += len(headers)
@@ -1,7 +1,7 @@
1
1
  import csv
2
2
  import os
3
3
 
4
- import cv2
4
+ from ..utils.misc import save_image
5
5
 
6
6
 
7
7
  def table_to_csv(table, ignore_line_break):
@@ -54,7 +54,7 @@ def save_figure(
54
54
  filename = os.path.splitext(os.path.basename(out_path))[0]
55
55
  figure_name = f"{filename}_figure_{i}.png"
56
56
  figure_path = os.path.join(save_dir, figure_name)
57
- cv2.imwrite(figure_path, figure_img)
57
+ save_image(figure_img, figure_path)
58
58
 
59
59
 
60
60
  def convert_csv(
@@ -1,10 +1,10 @@
1
1
  import os
2
2
  import re
3
3
  from html import escape
4
-
5
- import cv2
6
4
  from lxml import etree, html
7
5
 
6
+ from ..utils.misc import save_image
7
+
8
8
 
9
9
  def convert_text_to_html(text):
10
10
  """
@@ -122,7 +122,7 @@ def figure_to_html(
122
122
  filename = os.path.splitext(os.path.basename(out_path))[0]
123
123
  figure_name = f"{filename}_figure_{i}.png"
124
124
  figure_path = os.path.join(save_dir, figure_name)
125
- cv2.imwrite(figure_path, figure_img)
125
+ save_image(figure_img, figure_path)
126
126
 
127
127
  elements.append(
128
128
  {
@@ -180,8 +180,13 @@ def convert_html(
180
180
  elements = sorted(elements, key=lambda x: x["order"])
181
181
 
182
182
  html_string = "".join([element["html"] for element in elements])
183
- parsed_html = html.fromstring(html_string)
184
- formatted_html = etree.tostring(parsed_html, pretty_print=True, encoding="unicode")
183
+ if not len(html_string) == 0:
184
+ parsed_html = html.fromstring(html_string)
185
+ formatted_html = etree.tostring(
186
+ parsed_html, pretty_print=True, encoding="unicode"
187
+ )
188
+ else:
189
+ formatted_html = ""
185
190
 
186
191
  return formatted_html, elements
187
192
 
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import os
3
3
 
4
- import cv2
4
+ from ..utils.misc import save_image
5
5
 
6
6
 
7
7
  def paragraph_to_json(paragraph, ignore_line_break):
@@ -33,7 +33,7 @@ def save_figure(
33
33
  filename = os.path.splitext(os.path.basename(out_path))[0]
34
34
  figure_name = f"{filename}_figure_{i}.png"
35
35
  figure_path = os.path.join(save_dir, figure_name)
36
- cv2.imwrite(figure_path, figure_img)
36
+ save_image(figure_img, figure_path)
37
37
 
38
38
 
39
39
  def convert_json(inputs, out_path, ignore_line_break, img, export_figure, figure_dir):
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import re
3
3
 
4
- import cv2
4
+ from ..utils.misc import save_image
5
5
 
6
6
 
7
7
  def escape_markdown_special_chars(text):
@@ -89,7 +89,7 @@ def figure_to_md(
89
89
  filename = os.path.splitext(os.path.basename(out_path))[0]
90
90
  figure_name = f"{filename}_figure_{i}.png"
91
91
  figure_path = os.path.join(save_dir, figure_name)
92
- cv2.imwrite(figure_path, figure_img)
92
+ save_image(figure_img, figure_path)
93
93
 
94
94
  elements.append(
95
95
  {
yomitoku/reading_order.py CHANGED
@@ -17,7 +17,6 @@ def _priority_dfs(nodes, direction):
17
17
 
18
18
  pending_nodes = sorted(nodes, key=lambda x: x.prop["distance"])
19
19
  visited = [False] * len(nodes)
20
-
21
20
  start = pending_nodes.pop(0)
22
21
  stack = [start]
23
22
 
@@ -53,11 +52,11 @@ def _priority_dfs(nodes, direction):
53
52
  children.append(node)
54
53
  stack.remove(node)
55
54
 
56
- if direction == "horizontal":
55
+ if direction in "top2bottom":
57
56
  children = sorted(
58
57
  children, key=lambda x: x.prop["box"][0], reverse=True
59
58
  )
60
- else:
59
+ elif direction in ["right2left", "left2right"]:
61
60
  children = sorted(
62
61
  children, key=lambda x: x.prop["box"][1], reverse=True
63
62
  )
@@ -121,7 +120,7 @@ def _exist_other_node_between_horizontal(node, other_node, nodes):
121
120
  return False
122
121
 
123
122
 
124
- def _create_graph_horizontal(nodes):
123
+ def _create_graph_top2bottom(nodes):
125
124
  for i, node in enumerate(nodes):
126
125
  for j, other_node in enumerate(nodes):
127
126
  if i == j:
@@ -146,7 +145,7 @@ def _create_graph_horizontal(nodes):
146
145
  node.children = sorted(node.children, key=lambda x: x.prop["box"][0])
147
146
 
148
147
 
149
- def _create_graph_vertical(nodes):
148
+ def _create_graph_right2left(nodes):
150
149
  max_x = max([node.prop["box"][2] for node in nodes])
151
150
 
152
151
  for i, node in enumerate(nodes):
@@ -172,15 +171,46 @@ def _create_graph_vertical(nodes):
172
171
  node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
173
172
 
174
173
 
174
+ def _create_graph_left2right(nodes, x_weight=1, y_weight=5):
175
+ for i, node in enumerate(nodes):
176
+ for j, other_node in enumerate(nodes):
177
+ if i == j:
178
+ continue
179
+
180
+ if is_intersected_horizontal(node.prop["box"], other_node.prop["box"]):
181
+ tx = node.prop["box"][2]
182
+ ox = other_node.prop["box"][2]
183
+
184
+ if _exist_other_node_between_horizontal(node, other_node, nodes):
185
+ continue
186
+
187
+ if ox < tx:
188
+ other_node.add_link(node)
189
+ else:
190
+ node.add_link(other_node)
191
+
192
+ node_distance = (
193
+ node.prop["box"][0] * x_weight + node.prop["box"][1] * y_weight
194
+ )
195
+ node.prop["distance"] = node_distance
196
+
197
+ for node in nodes:
198
+ node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
199
+
200
+
175
201
  def prediction_reading_order(elements, direction, img=None):
176
202
  if len(elements) < 2:
177
203
  return elements
178
204
 
179
205
  nodes = [Node(i, element.dict()) for i, element in enumerate(elements)]
180
- if direction == "horizontal":
181
- _create_graph_horizontal(nodes)
206
+ if direction == "top2bottom":
207
+ _create_graph_top2bottom(nodes)
208
+ elif direction == "right2left":
209
+ _create_graph_right2left(nodes)
210
+ elif direction == "left2right":
211
+ _create_graph_left2right(nodes)
182
212
  else:
183
- _create_graph_vertical(nodes)
213
+ raise ValueError(f"Invalid direction: {direction}")
184
214
 
185
215
  # For debugging
186
216
  # if img is not None:
yomitoku/utils/misc.py CHANGED
@@ -1,3 +1,6 @@
1
+ import cv2
2
+
3
+
1
4
  def load_charset(charset_path):
2
5
  with open(charset_path, "r", encoding="utf-8") as f:
3
6
  charset = f.read()
@@ -9,6 +12,15 @@ def filter_by_flag(elements, flags):
9
12
  return [element for element, flag in zip(elements, flags) if flag]
10
13
 
11
14
 
15
+ def save_image(img, path):
16
+ success, buffer = cv2.imencode(".jpg", img)
17
+ if not success:
18
+ raise ValueError("Failed to encode image")
19
+
20
+ with open(path, "wb") as f:
21
+ f.write(buffer.tobytes())
22
+
23
+
12
24
  def calc_overlap_ratio(rect_a, rect_b):
13
25
  intersection = calc_intersection(rect_a, rect_b)
14
26
  if intersection is None:
@@ -68,7 +80,7 @@ def calc_intersection(rect_a, rect_b):
68
80
  return [ix1, iy1, ix2, iy2]
69
81
 
70
82
 
71
- def is_intersected_horizontal(rect_a, rect_b):
83
+ def is_intersected_horizontal(rect_a, rect_b, threshold=0.5):
72
84
  _, ay1, _, ay2 = map(int, rect_a)
73
85
  _, by1, _, by2 = map(int, rect_b)
74
86
 
@@ -76,9 +88,11 @@ def is_intersected_horizontal(rect_a, rect_b):
76
88
  iy1 = max(ay1, by1)
77
89
  iy2 = min(ay2, by2)
78
90
 
91
+ min_height = min(ay2 - ay1, by2 - by1)
92
+
79
93
  overlap_height = max(0, iy2 - iy1)
80
94
 
81
- if overlap_height == 0:
95
+ if (overlap_height / min_height) < threshold:
82
96
  return False
83
97
 
84
98
  return True
@@ -107,3 +121,48 @@ def quad_to_xyxy(quad):
107
121
  y2 = max([y for _, y in quad])
108
122
 
109
123
  return x1, y1, x2, y2
124
+
125
+
126
+ def convert_table_array(table):
127
+ n_rows = table.n_row
128
+ n_cols = table.n_col
129
+
130
+ table_array = [["" for _ in range(n_cols)] for _ in range(n_rows)]
131
+
132
+ for cell in table.cells:
133
+ row = cell.row - 1
134
+ col = cell.col - 1
135
+ row_span = cell.row_span
136
+ col_span = cell.col_span
137
+ contents = cell.contents
138
+
139
+ for i in range(row, row + row_span):
140
+ for j in range(col, col + col_span):
141
+ table_array[i][j] = contents
142
+
143
+ return table_array
144
+
145
+
146
+ def convert_table_array_to_dict(table_array, header_row=1):
147
+ n_cols = len(table_array[0])
148
+ n_rows = len(table_array)
149
+
150
+ header_cols = []
151
+ for i in range(n_cols):
152
+ header = []
153
+ for j in range(header_row):
154
+ header.append(table_array[j][i])
155
+
156
+ if len(header) > 0:
157
+ header_cols.append("_".join(header))
158
+ else:
159
+ header_cols.append(f"col_{i}")
160
+
161
+ table_dict = []
162
+ for i in range(header_row, n_rows):
163
+ row_dict = {}
164
+ for j in range(n_cols):
165
+ row_dict[header_cols[j]] = table_array[i][j]
166
+ table_dict.append(row_dict)
167
+
168
+ return table_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.8.1
3
+ Version: 0.9.1
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
@@ -19,6 +19,8 @@ Requires-Dist: shapely>=2.0.6
19
19
  Requires-Dist: timm>=1.0.11
20
20
  Requires-Dist: torch>=2.5.0
21
21
  Requires-Dist: torchvision>=0.20.0
22
+ Provides-Extra: mcp
23
+ Requires-Dist: mcp[cli]>=1.6.0; extra == 'mcp'
22
24
  Description-Content-Type: text/markdown
23
25
 
24
26
  日本語版 | [English](README_EN.md)
@@ -64,6 +66,7 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
64
66
 
65
67
  ## 📣 リリース情報
66
68
 
69
+ - 2025 年 4 月 4 日 YomiToku v0.8.0 手書き文字認識のサポート
67
70
  - 2024 年 11 月 26 日 YomiToku v0.5.1 (beta) を公開
68
71
 
69
72
  ## 💡 インストールの方法
@@ -1,16 +1,17 @@
1
1
  yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
2
2
  yomitoku/base.py,sha256=9U3sfe69O6vuO430JzzKQQNkgPsLM9WdLfOUUhp3Ljs,3878
3
3
  yomitoku/constants.py,sha256=zlW5QRc_u_F3C2RAgBFWyHJZexBnJT5N15GC-9d3iLo,686
4
- yomitoku/document_analyzer.py,sha256=wQMmXACDsDmyaxg2OnG9Og5Nx53WPUkQdUmgYtljACQ,16412
4
+ yomitoku/document_analyzer.py,sha256=xliAelQdfsK64FtVuFvstDBr9uf2TwhqW31g2g91_CY,16888
5
5
  yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
6
6
  yomitoku/layout_parser.py,sha256=0MgbCsD90srQdsxkGEL0TgKm4rkmGzsQYx0sjKQ03yc,7718
7
7
  yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
8
- yomitoku/reading_order.py,sha256=OfhOS9ttPDoPSuHrIRKyOzG19GGeRufbuSKDqhsohh4,6404
8
+ yomitoku/reading_order.py,sha256=_T09PqT7guk57zWo4HdSazLSQTwM91piyELA_wNHQAQ,7521
9
9
  yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK-ynm4ALxg,9625
10
10
  yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
11
11
  yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
12
12
  yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yomitoku/cli/main.py,sha256=jQCSwHw4oOwLQjARvaIO1yoSjz-2Rdb9c3DNShLS5OE,12038
13
+ yomitoku/cli/main.py,sha256=VZG8DZf-k_QytlDZtB91eBNY69MRpbryQg1rkn3fs20,12304
14
+ yomitoku/cli/mcp.py,sha256=5h704SsUGNAqVnoO_5S-HY2-bApy_Rf8ajDxl1pkT2k,4888
14
15
  yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
15
16
  yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
16
17
  yomitoku/configs/cfg_layout_parser_rtdtrv2_v2.py,sha256=nMrL3uvoVmyzZ909Bz2zmfp9b6AEBLKhIprOvQ5yiQE,2324
@@ -21,13 +22,13 @@ yomitoku/configs/cfg_text_recognizer_parseq.py,sha256=hpFs3nKqh4XdU3BZMTultegtLE
21
22
  yomitoku/configs/cfg_text_recognizer_parseq_small.py,sha256=uCm_VC_G79IbZpOiK8fgYzAJ4b98H5pf328wyQomtfo,1259
22
23
  yomitoku/configs/cfg_text_recognizer_parseq_v2.py,sha256=GfHzbByOKjH21PRTxT8x_fU4r4Mda6F750Z8pjNeb8g,1249
23
24
  yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
24
- yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
25
- yomitoku/data/functions.py,sha256=HIrffs0zCJOq8IvQiI_z-b4MwTb-H2wmZjEE_5VpxFs,8040
25
+ yomitoku/data/dataset.py,sha256=lpBcpkMuQzRIyLJ4_mqtuhR9s2ZmzgBgc-XYuE_b2Sc,1326
26
+ yomitoku/data/functions.py,sha256=RExCUxI3-gccIMw-H0ribX2jeGKkrJWhS4fNn_12c3Y,7878
26
27
  yomitoku/export/__init__.py,sha256=gmlikMHRXfzfJ_8q4fyDlnpGms-x1oggQOwJEWHMgBU,508
27
- yomitoku/export/export_csv.py,sha256=4DT5Nf4FdeGP0olIzv1ypBlswkZSdMB4MeQOgYWe8uk,3375
28
- yomitoku/export/export_html.py,sha256=syzAapHcUHcUlabmZcQdWiNy2NrRs7LPzA_x39pFtfQ,5494
29
- yomitoku/export/export_json.py,sha256=6cSXSsyEVJ5Rw2nKSUOcW8_XlGmSLWlWQWCBNmRKsps,2386
30
- yomitoku/export/export_markdown.py,sha256=7Jib-YXOw70H46kvNc6z0_3LFwX9iwp1eXxsGeylF0I,4681
28
+ yomitoku/export/export_csv.py,sha256=VY8mntUCPDbDco_dyvq5O0_Q4wga9_GTyjHCS-y4UiQ,3399
29
+ yomitoku/export/export_html.py,sha256=LQDyZgbzmI0qJ0-FEK-54r9816H3L9hD10ChMcw0KyA,5620
30
+ yomitoku/export/export_json.py,sha256=iNG37tdIuYG2x3NiiZemKaB6-X45WrhVPZhbX7RUzRI,2410
31
+ yomitoku/export/export_markdown.py,sha256=KrdxDmKzVP_LbTKuDNGGsT31QOPKVsNNlb6wtLEW-1Q,4705
31
32
  yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
32
33
  yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
33
34
  yomitoku/models/parseq.py,sha256=psCPjP3eKjOFAUZJPQQhbD0nWEV5FeOZ0tTK27Rvvbw,8748
@@ -49,9 +50,9 @@ yomitoku/resource/charset.txt,sha256=sU91kSi-9Wk4733bCXy4j_UDmvcsj96sHOq1ppUJlOY
49
50
  yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
51
  yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
51
52
  yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
52
- yomitoku/utils/misc.py,sha256=FbwPLeIYYBvNf9wQh2RoEonTM5BF7_IwaEqmRsYHKA8,2673
53
+ yomitoku/utils/misc.py,sha256=r92x45kQR8lC5jO1MZaHBDtcCWBkQXg_WS9H4RXJzSY,4127
53
54
  yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
54
- yomitoku-0.8.1.dist-info/METADATA,sha256=Ds7gHmT1DxOJROrfpGaJGQKZ1qZ8ur78c6oxdld9kG4,8555
55
- yomitoku-0.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
- yomitoku-0.8.1.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
57
- yomitoku-0.8.1.dist-info/RECORD,,
55
+ yomitoku-0.9.1.dist-info/METADATA,sha256=ozEkYekTPuEP1GwnCCQKgJC9DzEQpyActU_DltQGMHc,8700
56
+ yomitoku-0.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
+ yomitoku-0.9.1.dist-info/entry_points.txt,sha256=N3PzzSo-fdgri5liPpZ3ItMmRH6oVX14pIU_5pUJiAs,99
58
+ yomitoku-0.9.1.dist-info/RECORD,,
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  yomitoku = yomitoku.cli.main:main
3
+ yomitoku_mcp = yomitoku.cli.mcp:run_mcp_server