yomitoku 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yomitoku/cli/main.py CHANGED
@@ -9,6 +9,7 @@ from ..constants import SUPPORT_OUTPUT_FORMAT
9
9
  from ..data.functions import load_image, load_pdf
10
10
  from ..document_analyzer import DocumentAnalyzer
11
11
  from ..utils.logger import set_logger
12
+ from ..utils.searchable_pdf import create_searchable_pdf
12
13
 
13
14
  from ..export import save_csv, save_html, save_json, save_markdown
14
15
  from ..export import convert_json, convert_csv, convert_html, convert_markdown
@@ -80,11 +81,13 @@ def process_single_file(args, analyzer, path, format):
80
81
  else:
81
82
  imgs = load_image(path)
82
83
 
84
+ format_results = []
83
85
  results = []
84
86
  for page, img in enumerate(imgs):
85
87
  result, ocr, layout = analyzer(img)
86
88
  dirname = path.parent.name
87
89
  filename = path.stem
90
+ results.append(result)
88
91
 
89
92
  # cv2.imwrite(
90
93
  # os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.jpg"), img
@@ -92,7 +95,7 @@ def process_single_file(args, analyzer, path, format):
92
95
 
93
96
  if ocr is not None:
94
97
  out_path = os.path.join(
95
- args.outdir, f"{dirname}_{filename}_p{page+1}_ocr.jpg"
98
+ args.outdir, f"{dirname}_{filename}_p{page + 1}_ocr.jpg"
96
99
  )
97
100
 
98
101
  save_image(ocr, out_path)
@@ -100,13 +103,15 @@ def process_single_file(args, analyzer, path, format):
100
103
 
101
104
  if layout is not None:
102
105
  out_path = os.path.join(
103
- args.outdir, f"{dirname}_{filename}_p{page+1}_layout.jpg"
106
+ args.outdir, f"{dirname}_{filename}_p{page + 1}_layout.jpg"
104
107
  )
105
108
 
106
109
  save_image(layout, out_path)
107
110
  logger.info(f"Output file: {out_path}")
108
111
 
109
- out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
112
+ out_path = os.path.join(
113
+ args.outdir, f"{dirname}_{filename}_p{page + 1}.{format}"
114
+ )
110
115
 
111
116
  if format == "json":
112
117
  if args.combine:
@@ -128,7 +133,7 @@ def process_single_file(args, analyzer, path, format):
128
133
  figure_dir=args.figure_dir,
129
134
  )
130
135
 
131
- results.append(
136
+ format_results.append(
132
137
  {
133
138
  "format": format,
134
139
  "data": json.model_dump(),
@@ -155,7 +160,7 @@ def process_single_file(args, analyzer, path, format):
155
160
  figure_dir=args.figure_dir,
156
161
  )
157
162
 
158
- results.append(
163
+ format_results.append(
159
164
  {
160
165
  "format": format,
161
166
  "data": csv,
@@ -186,7 +191,7 @@ def process_single_file(args, analyzer, path, format):
186
191
  encoding=args.encoding,
187
192
  )
188
193
 
189
- results.append(
194
+ format_results.append(
190
195
  {
191
196
  "format": format,
192
197
  "data": html,
@@ -217,14 +222,14 @@ def process_single_file(args, analyzer, path, format):
217
222
  encoding=args.encoding,
218
223
  )
219
224
 
220
- results.append(
225
+ format_results.append(
221
226
  {
222
227
  "format": format,
223
228
  "data": md,
224
229
  }
225
230
  )
226
231
 
227
- out = merge_all_pages(results)
232
+ out = merge_all_pages(format_results)
228
233
  if args.combine:
229
234
  out_path = os.path.join(args.outdir, f"{dirname}_{filename}.{format}")
230
235
  save_merged_file(
@@ -233,6 +238,15 @@ def process_single_file(args, analyzer, path, format):
233
238
  out,
234
239
  )
235
240
 
241
+ if args.searchable_pdf:
242
+ pdf_path = os.path.join(args.outdir, f"{filename}.pdf")
243
+ create_searchable_pdf(
244
+ imgs,
245
+ results,
246
+ output_path=pdf_path,
247
+ )
248
+ logger.info(f"Output SearchablePDF: {pdf_path}")
249
+
236
250
 
237
251
  def main():
238
252
  parser = argparse.ArgumentParser()
@@ -341,6 +355,17 @@ def main():
341
355
  action="store_true",
342
356
  help="if set, ignore meta information(header, footer) in the output",
343
357
  )
358
+ parser.add_argument(
359
+ "--reading_order",
360
+ default="auto",
361
+ type=str,
362
+ choices=["auto", "left2right", "top2bottom", "right2left"],
363
+ )
364
+ parser.add_argument(
365
+ "--searchable_pdf",
366
+ action="store_true",
367
+ help="if set, create searchable PDF",
368
+ )
344
369
 
345
370
  args = parser.parse_args()
346
371
 
@@ -394,6 +419,7 @@ def main():
394
419
  visualize=args.vis,
395
420
  device=args.device,
396
421
  ignore_meta=args.ignore_meta,
422
+ reading_order=args.reading_order,
397
423
  )
398
424
 
399
425
  os.makedirs(args.outdir, exist_ok=True)
@@ -408,7 +434,7 @@ def main():
408
434
  logger.info(f"Processing file: {file_path}")
409
435
  process_single_file(args, analyzer, file_path, format)
410
436
  end = time.time()
411
- logger.info(f"Total Processing time: {end-start:.2f} sec")
437
+ logger.info(f"Total Processing time: {end - start:.2f} sec")
412
438
  except Exception:
413
439
  continue
414
440
  else:
@@ -416,7 +442,7 @@ def main():
416
442
  logger.info(f"Processing file: {path}")
417
443
  process_single_file(args, analyzer, path, format)
418
444
  end = time.time()
419
- logger.info(f"Total Processing time: {end-start:.2f} sec")
445
+ logger.info(f"Total Processing time: {end - start:.2f} sec")
420
446
 
421
447
 
422
448
  if __name__ == "__main__":
@@ -1,14 +1,20 @@
1
- import json
2
- import io
3
1
  import csv
2
+ import io
3
+ import json
4
4
  import os
5
+ from argparse import ArgumentParser
5
6
  from pathlib import Path
6
7
 
7
8
  from mcp.server.fastmcp import Context, FastMCP
8
9
 
9
10
  from yomitoku import DocumentAnalyzer
10
11
  from yomitoku.data.functions import load_image, load_pdf
11
- from yomitoku.export import convert_json, convert_markdown, convert_csv, convert_html
12
+ from yomitoku.export import (
13
+ convert_csv,
14
+ convert_html,
15
+ convert_json,
16
+ convert_markdown,
17
+ )
12
18
 
13
19
  try:
14
20
  RESOURCE_DIR = os.environ["RESOURCE_DIR"]
@@ -154,12 +160,37 @@ async def get_file_list() -> list[str]:
154
160
  return os.listdir(RESOURCE_DIR)
155
161
 
156
162
 
157
- def run_mcp_server():
163
+ def run_mcp_server(transport="stdio", mount_path=None):
158
164
  """
159
165
  Run the MCP server.
160
166
  """
161
- mcp.run(transport="stdio")
167
+
168
+ if transport == "stdio":
169
+ mcp.run()
170
+ elif transport == "sse":
171
+ mcp.run(transport=transport, mount_path=mount_path)
172
+
173
+
174
+ def main():
175
+ parser = ArgumentParser(description="Run the MCP server.")
176
+ parser.add_argument(
177
+ "--transport",
178
+ "-t",
179
+ type=str,
180
+ default="stdio",
181
+ choices=["stdio", "sse"],
182
+ help="Transport method for the MCP server.",
183
+ )
184
+ parser.add_argument(
185
+ "--mount_path",
186
+ "-m",
187
+ type=str,
188
+ default=None,
189
+ help="Mount path for the MCP server (only used with SSE transport).",
190
+ )
191
+ args = parser.parse_args()
192
+ run_mcp_server(transport=args.transport, mount_path=args.mount_path)
162
193
 
163
194
 
164
195
  if __name__ == "__main__":
165
- run_mcp_server()
196
+ main()
yomitoku/data/dataset.py CHANGED
@@ -8,9 +8,11 @@ from .functions import (
8
8
  validate_quads,
9
9
  )
10
10
 
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
11
13
 
12
14
  class ParseqDataset(Dataset):
13
- def __init__(self, cfg, img, quads):
15
+ def __init__(self, cfg, img, quads, num_workers=8):
14
16
  self.img = img[:, :, ::-1]
15
17
  self.quads = quads
16
18
  self.cfg = cfg
@@ -22,19 +24,27 @@ class ParseqDataset(Dataset):
22
24
  ]
23
25
  )
24
26
 
25
- validate_quads(self.img, self.quads)
27
+ with ThreadPoolExecutor(max_workers=num_workers) as executor:
28
+ data = list(executor.map(self.preprocess, self.quads))
26
29
 
27
- def __len__(self):
28
- return len(self.quads)
30
+ self.data = [tensor for tensor in data if tensor is not None]
31
+
32
+ def preprocess(self, quad):
33
+ if validate_quads(self.img, quad) is None:
34
+ return None
35
+
36
+ roi_img = extract_roi_with_perspective(self.img, quad)
29
37
 
30
- def __getitem__(self, index):
31
- polygon = self.quads[index]
32
- roi_img = extract_roi_with_perspective(self.img, polygon)
33
38
  if roi_img is None:
34
- return
39
+ return None
35
40
 
36
41
  roi_img = rotate_text_image(roi_img, thresh_aspect=2)
37
42
  resized = resize_with_padding(roi_img, self.cfg.data.img_size)
38
- tensor = self.transform(resized)
39
43
 
40
- return tensor
44
+ return resized
45
+
46
+ def __len__(self):
47
+ return len(self.data)
48
+
49
+ def __getitem__(self, index):
50
+ return self.transform(self.data[index])
@@ -191,7 +191,7 @@ def array_to_tensor(img: np.ndarray) -> torch.Tensor:
191
191
  return tensor
192
192
 
193
193
 
194
- def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
194
+ def validate_quads(img: np.ndarray, quad: list[list[list[int]]]):
195
195
  """
196
196
  Validate the vertices of the quadrilateral.
197
197
 
@@ -204,23 +204,23 @@ def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
204
204
  """
205
205
 
206
206
  h, w = img.shape[:2]
207
- for quad in quads:
208
- if len(quad) != 4:
209
- raise ValueError("The number of vertices must be 4.")
210
-
211
- for point in quad:
212
- if len(point) != 2:
213
- raise ValueError("The number of coordinates must be 2.")
214
-
215
- quad = np.array(quad, dtype=int)
216
- x1 = np.min(quad[:, 0])
217
- x2 = np.max(quad[:, 0])
218
- y1 = np.min(quad[:, 1])
219
- y2 = np.max(quad[:, 1])
220
- h, w = img.shape[:2]
207
+ if len(quad) != 4:
208
+ # raise ValueError("The number of vertices must be 4.")
209
+ return None
210
+
211
+ for point in quad:
212
+ if len(point) != 2:
213
+ return None
214
+
215
+ quad = np.array(quad, dtype=int)
216
+ x1 = np.min(quad[:, 0])
217
+ x2 = np.max(quad[:, 0])
218
+ y1 = np.min(quad[:, 1])
219
+ y2 = np.max(quad[:, 1])
220
+ h, w = img.shape[:2]
221
221
 
222
- if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
223
- raise ValueError(f"The vertices are out of the image. {quad.tolist()}")
222
+ if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
223
+ return None
224
224
 
225
225
  return True
226
226
 
@@ -237,19 +237,18 @@ def extract_roi_with_perspective(img, quad):
237
237
  np.ndarray: extracted image
238
238
  """
239
239
  dst = img.copy()
240
- quad = np.array(quad, dtype=np.float32)
240
+ quad = np.array(quad, dtype=np.int64)
241
+
241
242
  width = np.linalg.norm(quad[0] - quad[1])
242
243
  height = np.linalg.norm(quad[1] - quad[2])
243
244
 
244
245
  width = int(width)
245
246
  height = int(height)
246
-
247
247
  pts1 = np.float32(quad)
248
248
  pts2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
249
249
 
250
250
  M = cv2.getPerspectiveTransform(pts1, pts2)
251
251
  dst = cv2.warpPerspective(dst, M, (width, height))
252
-
253
252
  return dst
254
253
 
255
254
 
@@ -86,8 +86,12 @@ def extract_paragraph_within_figure(paragraphs, figures):
86
86
  check_list[i] = True
87
87
 
88
88
  figure["direction"] = judge_page_direction(contained_paragraphs)
89
+ reading_order = (
90
+ "left2right" if figure["direction"] == "horizontal" else "right2left"
91
+ )
92
+
89
93
  figure_paragraphs = prediction_reading_order(
90
- contained_paragraphs, figure["direction"]
94
+ contained_paragraphs, reading_order
91
95
  )
92
96
  figure["paragraphs"] = sorted(figure_paragraphs, key=lambda x: x.order)
93
97
  figure = FigureSchema(**figure)
@@ -126,8 +130,8 @@ def extract_words_within_element(pred_words, element):
126
130
  cnt_vertical = word_direction.count("vertical")
127
131
 
128
132
  element_direction = "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
129
-
130
- prediction_reading_order(contained_words, element_direction)
133
+ order = "left2right" if element_direction == "horizontal" else "right2left"
134
+ prediction_reading_order(contained_words, order)
131
135
  contained_words = sorted(contained_words, key=lambda x: x.order)
132
136
 
133
137
  contained_words = "\n".join([content.contents for content in contained_words])
@@ -328,6 +332,7 @@ class DocumentAnalyzer:
328
332
  device="cuda",
329
333
  visualize=False,
330
334
  ignore_meta=False,
335
+ reading_order="auto",
331
336
  ):
332
337
  default_configs = {
333
338
  "ocr": {
@@ -352,6 +357,8 @@ class DocumentAnalyzer:
352
357
  },
353
358
  }
354
359
 
360
+ self.reading_order = reading_order
361
+
355
362
  if isinstance(configs, dict):
356
363
  recursive_update(default_configs, configs)
357
364
  else:
@@ -452,9 +459,17 @@ class DocumentAnalyzer:
452
459
 
453
460
  elements = page_contents + layout_res.tables + figures
454
461
 
455
- prediction_reading_order(headers, page_direction)
456
- prediction_reading_order(footers, page_direction)
457
- prediction_reading_order(elements, page_direction, self.img)
462
+ prediction_reading_order(headers, "left2right")
463
+ prediction_reading_order(footers, "left2right")
464
+
465
+ if self.reading_order == "auto":
466
+ reading_order = (
467
+ "right2left" if page_direction == "vertical" else "top2bottom"
468
+ )
469
+ else:
470
+ reading_order = self.reading_order
471
+
472
+ prediction_reading_order(elements, reading_order, self.img)
458
473
 
459
474
  for i, element in enumerate(elements):
460
475
  element.order += len(headers)
yomitoku/reading_order.py CHANGED
@@ -17,7 +17,6 @@ def _priority_dfs(nodes, direction):
17
17
 
18
18
  pending_nodes = sorted(nodes, key=lambda x: x.prop["distance"])
19
19
  visited = [False] * len(nodes)
20
-
21
20
  start = pending_nodes.pop(0)
22
21
  stack = [start]
23
22
 
@@ -53,11 +52,11 @@ def _priority_dfs(nodes, direction):
53
52
  children.append(node)
54
53
  stack.remove(node)
55
54
 
56
- if direction == "horizontal":
55
+ if direction in "top2bottom":
57
56
  children = sorted(
58
57
  children, key=lambda x: x.prop["box"][0], reverse=True
59
58
  )
60
- else:
59
+ elif direction in ["right2left", "left2right"]:
61
60
  children = sorted(
62
61
  children, key=lambda x: x.prop["box"][1], reverse=True
63
62
  )
@@ -121,7 +120,7 @@ def _exist_other_node_between_horizontal(node, other_node, nodes):
121
120
  return False
122
121
 
123
122
 
124
- def _create_graph_horizontal(nodes):
123
+ def _create_graph_top2bottom(nodes):
125
124
  for i, node in enumerate(nodes):
126
125
  for j, other_node in enumerate(nodes):
127
126
  if i == j:
@@ -146,7 +145,7 @@ def _create_graph_horizontal(nodes):
146
145
  node.children = sorted(node.children, key=lambda x: x.prop["box"][0])
147
146
 
148
147
 
149
- def _create_graph_vertical(nodes):
148
+ def _create_graph_right2left(nodes):
150
149
  max_x = max([node.prop["box"][2] for node in nodes])
151
150
 
152
151
  for i, node in enumerate(nodes):
@@ -172,15 +171,46 @@ def _create_graph_vertical(nodes):
172
171
  node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
173
172
 
174
173
 
174
+ def _create_graph_left2right(nodes, x_weight=1, y_weight=5):
175
+ for i, node in enumerate(nodes):
176
+ for j, other_node in enumerate(nodes):
177
+ if i == j:
178
+ continue
179
+
180
+ if is_intersected_horizontal(node.prop["box"], other_node.prop["box"]):
181
+ tx = node.prop["box"][2]
182
+ ox = other_node.prop["box"][2]
183
+
184
+ if _exist_other_node_between_horizontal(node, other_node, nodes):
185
+ continue
186
+
187
+ if ox < tx:
188
+ other_node.add_link(node)
189
+ else:
190
+ node.add_link(other_node)
191
+
192
+ node_distance = (
193
+ node.prop["box"][0] * x_weight + node.prop["box"][1] * y_weight
194
+ )
195
+ node.prop["distance"] = node_distance
196
+
197
+ for node in nodes:
198
+ node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
199
+
200
+
175
201
  def prediction_reading_order(elements, direction, img=None):
176
202
  if len(elements) < 2:
177
203
  return elements
178
204
 
179
205
  nodes = [Node(i, element.dict()) for i, element in enumerate(elements)]
180
- if direction == "horizontal":
181
- _create_graph_horizontal(nodes)
206
+ if direction == "top2bottom":
207
+ _create_graph_top2bottom(nodes)
208
+ elif direction == "right2left":
209
+ _create_graph_right2left(nodes)
210
+ elif direction == "left2right":
211
+ _create_graph_left2right(nodes)
182
212
  else:
183
- _create_graph_vertical(nodes)
213
+ raise ValueError(f"Invalid direction: {direction}")
184
214
 
185
215
  # For debugging
186
216
  # if img is not None:
yomitoku/utils/misc.py CHANGED
@@ -80,7 +80,7 @@ def calc_intersection(rect_a, rect_b):
80
80
  return [ix1, iy1, ix2, iy2]
81
81
 
82
82
 
83
- def is_intersected_horizontal(rect_a, rect_b):
83
+ def is_intersected_horizontal(rect_a, rect_b, threshold=0.5):
84
84
  _, ay1, _, ay2 = map(int, rect_a)
85
85
  _, by1, _, by2 = map(int, rect_b)
86
86
 
@@ -88,9 +88,11 @@ def is_intersected_horizontal(rect_a, rect_b):
88
88
  iy1 = max(ay1, by1)
89
89
  iy2 = min(ay2, by2)
90
90
 
91
+ min_height = min(ay2 - ay1, by2 - by1)
92
+
91
93
  overlap_height = max(0, iy2 - iy1)
92
94
 
93
- if overlap_height == 0:
95
+ if (overlap_height / min_height) < threshold:
94
96
  return False
95
97
 
96
98
  return True
@@ -119,3 +121,48 @@ def quad_to_xyxy(quad):
119
121
  y2 = max([y for _, y in quad])
120
122
 
121
123
  return x1, y1, x2, y2
124
+
125
+
126
+ def convert_table_array(table):
127
+ n_rows = table.n_row
128
+ n_cols = table.n_col
129
+
130
+ table_array = [["" for _ in range(n_cols)] for _ in range(n_rows)]
131
+
132
+ for cell in table.cells:
133
+ row = cell.row - 1
134
+ col = cell.col - 1
135
+ row_span = cell.row_span
136
+ col_span = cell.col_span
137
+ contents = cell.contents
138
+
139
+ for i in range(row, row + row_span):
140
+ for j in range(col, col + col_span):
141
+ table_array[i][j] = contents
142
+
143
+ return table_array
144
+
145
+
146
+ def convert_table_array_to_dict(table_array, header_row=1):
147
+ n_cols = len(table_array[0])
148
+ n_rows = len(table_array)
149
+
150
+ header_cols = []
151
+ for i in range(n_cols):
152
+ header = []
153
+ for j in range(header_row):
154
+ header.append(table_array[j][i])
155
+
156
+ if len(header) > 0:
157
+ header_cols.append("_".join(header))
158
+ else:
159
+ header_cols.append(f"col_{i}")
160
+
161
+ table_dict = []
162
+ for i in range(header_row, n_rows):
163
+ row_dict = {}
164
+ for j in range(n_cols):
165
+ row_dict[header_cols[j]] = table_array[i][j]
166
+ table_dict.append(row_dict)
167
+
168
+ return table_dict
@@ -0,0 +1,116 @@
1
+ import os
2
+
3
+ from PIL import Image
4
+ from io import BytesIO
5
+
6
+ from reportlab.pdfgen import canvas
7
+ from reportlab.pdfbase.ttfonts import TTFont
8
+ from reportlab.pdfbase import pdfmetrics
9
+ from reportlab.pdfbase.pdfmetrics import stringWidth
10
+
11
+ import numpy as np
12
+ import jaconv
13
+
14
+ from ..constants import ROOT_DIR
15
+
16
+ FONT_PATH = ROOT_DIR + "/resource/MPLUS1p-Medium.ttf"
17
+ pdfmetrics.registerFont(TTFont("MPLUS1p-Medium", FONT_PATH))
18
+
19
+
20
+ def _poly2rect(points):
21
+ """
22
+ Convert a polygon defined by its corner points to a rectangle.
23
+ The points should be in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]].
24
+ """
25
+ points = np.array(points, dtype=int)
26
+ x_min = points[:, 0].min()
27
+ x_max = points[:, 0].max()
28
+ y_min = points[:, 1].min()
29
+ y_max = points[:, 1].max()
30
+
31
+ return [x_min, y_min, x_max, y_max]
32
+
33
+
34
+ def _calc_font_size(content, bbox_height, bbox_width):
35
+ rates = np.arange(0.5, 1.0, 0.01)
36
+
37
+ min_diff = np.inf
38
+ best_font_size = None
39
+ for rate in rates:
40
+ font_size = bbox_height * rate
41
+ text_w = stringWidth(content, "MPLUS1p-Medium", font_size)
42
+ diff = abs(text_w - bbox_width)
43
+ if diff < min_diff:
44
+ min_diff = diff
45
+ best_font_size = font_size
46
+
47
+ return best_font_size
48
+
49
+
50
+ def to_full_width(text):
51
+ fw_map = {
52
+ "\u00a5": "\uffe5", # ¥ → ¥
53
+ "\u00b7": "\u30fb", # · → ・
54
+ " ": "\u3000", # 半角スペース→全角スペース
55
+ }
56
+
57
+ TO_FULLWIDTH = str.maketrans(fw_map)
58
+
59
+ jaconv_text = jaconv.h2z(text, kana=True, ascii=True, digit=True)
60
+ jaconv_text = jaconv_text.translate(TO_FULLWIDTH)
61
+
62
+ return jaconv_text
63
+
64
+
65
+ def create_searchable_pdf(images, ocr_results, output_path):
66
+ packet = BytesIO()
67
+ c = canvas.Canvas(packet)
68
+
69
+ for i, (image, ocr_result) in enumerate(zip(images, ocr_results)):
70
+ image = Image.fromarray(image[:, :, ::-1]) # Convert BGR to RGB
71
+ pdfmetrics.registerFont(TTFont("MPLUS1p-Medium", FONT_PATH))
72
+
73
+ image_path = f"tmp_{i}.png"
74
+ image.save(image_path)
75
+ w, h = image.size
76
+
77
+ c.setPageSize((w, h))
78
+ c.drawImage(image_path, 0, 0, width=w, height=h)
79
+ os.remove(image_path) # Clean up temporary image file
80
+
81
+ for word in ocr_result.words:
82
+ text = word.content
83
+ bbox = _poly2rect(word.points)
84
+ direction = word.direction
85
+
86
+ x1, y1, x2, y2 = bbox
87
+ bbox_height = y2 - y1
88
+ bbox_width = x2 - x1
89
+
90
+ if direction == "vertical":
91
+ text = to_full_width(text)
92
+
93
+ if direction == "horizontal":
94
+ font_size = _calc_font_size(text, bbox_height, bbox_width)
95
+ else:
96
+ font_size = _calc_font_size(text, bbox_width, bbox_height)
97
+
98
+ c.setFont("MPLUS1p-Medium", font_size)
99
+ c.setFillColorRGB(1, 1, 1, alpha=0) # 透明
100
+ # c.setFillColorRGB(0, 0, 0)
101
+ if direction == "vertical":
102
+ base_y = h - y2 + (bbox_height - font_size)
103
+ for j, ch in enumerate(text):
104
+ c.saveState()
105
+ c.translate(x1 + font_size * 0.5, base_y - (j - 1) * font_size)
106
+ c.rotate(-90)
107
+ c.drawString(0, 0, ch)
108
+ c.restoreState()
109
+ else:
110
+ base_y = h - y2 + (bbox_height - font_size) * 0.5
111
+ c.drawString(x1, base_y, text)
112
+ c.showPage()
113
+ c.save()
114
+
115
+ with open(output_path, "wb") as f:
116
+ f.write(packet.getvalue())
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.9.0
3
+ Version: 0.9.2
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
7
7
  Keywords: Deep Learning,Japanese,OCR
8
8
  Requires-Python: <3.13,>=3.10
9
9
  Requires-Dist: huggingface-hub>=0.26.1
10
+ Requires-Dist: jaconv>=0.4.0
10
11
  Requires-Dist: lxml>=5.3.0
11
12
  Requires-Dist: omegaconf>=2.3.0
12
13
  Requires-Dist: onnx>=1.17.0
@@ -15,6 +16,7 @@ Requires-Dist: opencv-python>=4.10.0.84
15
16
  Requires-Dist: pyclipper>=1.3.0.post6
16
17
  Requires-Dist: pydantic>=2.9.2
17
18
  Requires-Dist: pypdfium2>=4.30.0
19
+ Requires-Dist: reportlab>=4.4.1
18
20
  Requires-Dist: shapely>=2.0.6
19
21
  Requires-Dist: timm>=1.0.11
20
22
  Requires-Dist: torch>=2.5.0
@@ -41,7 +43,7 @@ YomiToku は日本語に特化した AI 文章画像解析エンジン(Document
41
43
  - 🤖 日本語データセットで学習した 4 種類(文字位置の検知、文字列認識、レイアウト解析、表の構造認識)の AI モデルを搭載しています。4 種類のモデルはすべて独自に学習されたモデルで日本語文書に対して、高精度に推論可能です。
42
44
  - 🇯🇵 各モデルは日本語の文書画像に特化して学習されており、7000 文字を超える日本語文字の認識をサーポート、手書き文字、縦書きなど日本語特有のレイアウト構造の文書画像の解析も可能です。(日本語以外にも英語の文書に対しても対応しています)。
43
45
  - 📈 レイアウト解析、表の構造解析, 読み順推定機能により、文書画像のレイアウトの意味的構造を壊さずに情報を抽出することが可能です。
44
- - 📄 多様な出力形式をサポートしています。html やマークダウン、json、csv のいずれかのフォーマットに変換可能です。また、文書内に含まれる図表、画像の抽出の出力も可能です。
46
+ - 📄 多様な出力形式をサポートしています。html やマークダウン、json、csv のいずれかのフォーマットに変換可能です。また、文書内に含まれる図表、画像の抽出の出力も可能です。文書画像をサーチャブルPDFに変換する処理もサポートしています。
45
47
  - ⚡ GPU 環境で高速に動作し、効率的に文書の文字起こし解析が可能です。また、VRAM も 8GB 以内で動作し、ハイエンドな GPU を用意する必要はありません。
46
48
 
47
49
  ## 🖼️ デモ
@@ -66,6 +68,7 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
66
68
 
67
69
  ## 📣 リリース情報
68
70
 
71
+ - 2025 年 4 月 4 日 YomiToku v0.8.0 手書き文字認識のサポート
69
72
  - 2024 年 11 月 26 日 YomiToku v0.5.1 (beta) を公開
70
73
 
71
74
  ## 💡 インストールの方法
@@ -95,6 +98,7 @@ yomitoku ${path_data} -f md -o results -v --figure --lite
95
98
  - `--encoding` エクスポートする出力ファイルの文字エンコーディングを指定します。サポートされていない文字コードが含まれる場合は、その文字を無視します。(utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
96
99
  - `--combine` PDFを入力に与えたときに、複数ページが含まれる場合に、それらの予測結果を一つのファイルに統合してエクスポートします。
97
100
  - `--ignore_meta` 文章のheater, fotterなどの文字情報を出力ファイルに含めません。
101
+ - `--searchable_pdf` 読み取った文字情報をPDFに埋め込み全文検索可能なPDFを出力します。
98
102
 
99
103
  その他のオプションに関しては、ヘルプを参照
100
104
 
@@ -1,17 +1,17 @@
1
1
  yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
2
2
  yomitoku/base.py,sha256=9U3sfe69O6vuO430JzzKQQNkgPsLM9WdLfOUUhp3Ljs,3878
3
3
  yomitoku/constants.py,sha256=zlW5QRc_u_F3C2RAgBFWyHJZexBnJT5N15GC-9d3iLo,686
4
- yomitoku/document_analyzer.py,sha256=wQMmXACDsDmyaxg2OnG9Og5Nx53WPUkQdUmgYtljACQ,16412
4
+ yomitoku/document_analyzer.py,sha256=xliAelQdfsK64FtVuFvstDBr9uf2TwhqW31g2g91_CY,16888
5
5
  yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
6
6
  yomitoku/layout_parser.py,sha256=0MgbCsD90srQdsxkGEL0TgKm4rkmGzsQYx0sjKQ03yc,7718
7
7
  yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
8
- yomitoku/reading_order.py,sha256=OfhOS9ttPDoPSuHrIRKyOzG19GGeRufbuSKDqhsohh4,6404
8
+ yomitoku/reading_order.py,sha256=_T09PqT7guk57zWo4HdSazLSQTwM91piyELA_wNHQAQ,7521
9
9
  yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK-ynm4ALxg,9625
10
10
  yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
11
11
  yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
12
12
  yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yomitoku/cli/main.py,sha256=9X8QWwsTAv82uNn5Ns9T_laGJPKHDyeEwenaQxnAmn4,12062
14
- yomitoku/cli/mcp.py,sha256=5h704SsUGNAqVnoO_5S-HY2-bApy_Rf8ajDxl1pkT2k,4888
13
+ yomitoku/cli/main.py,sha256=7AaaFzMf33ER__XPDBNkrJkKwclne7QyVFWeBvpUYBY,12849
14
+ yomitoku/cli/mcp_server.py,sha256=WnWzxd13HaemC3b-5i9B9NVBGc3WGfum2nYhoBolEnk,5641
15
15
  yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
16
16
  yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
17
17
  yomitoku/configs/cfg_layout_parser_rtdtrv2_v2.py,sha256=nMrL3uvoVmyzZ909Bz2zmfp9b6AEBLKhIprOvQ5yiQE,2324
@@ -22,8 +22,8 @@ yomitoku/configs/cfg_text_recognizer_parseq.py,sha256=hpFs3nKqh4XdU3BZMTultegtLE
22
22
  yomitoku/configs/cfg_text_recognizer_parseq_small.py,sha256=uCm_VC_G79IbZpOiK8fgYzAJ4b98H5pf328wyQomtfo,1259
23
23
  yomitoku/configs/cfg_text_recognizer_parseq_v2.py,sha256=GfHzbByOKjH21PRTxT8x_fU4r4Mda6F750Z8pjNeb8g,1249
24
24
  yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
25
- yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
26
- yomitoku/data/functions.py,sha256=HIrffs0zCJOq8IvQiI_z-b4MwTb-H2wmZjEE_5VpxFs,8040
25
+ yomitoku/data/dataset.py,sha256=lpBcpkMuQzRIyLJ4_mqtuhR9s2ZmzgBgc-XYuE_b2Sc,1326
26
+ yomitoku/data/functions.py,sha256=RExCUxI3-gccIMw-H0ribX2jeGKkrJWhS4fNn_12c3Y,7878
27
27
  yomitoku/export/__init__.py,sha256=gmlikMHRXfzfJ_8q4fyDlnpGms-x1oggQOwJEWHMgBU,508
28
28
  yomitoku/export/export_csv.py,sha256=VY8mntUCPDbDco_dyvq5O0_Q4wga9_GTyjHCS-y4UiQ,3399
29
29
  yomitoku/export/export_html.py,sha256=LQDyZgbzmI0qJ0-FEK-54r9816H3L9hD10ChMcw0KyA,5620
@@ -50,9 +50,10 @@ yomitoku/resource/charset.txt,sha256=sU91kSi-9Wk4733bCXy4j_UDmvcsj96sHOq1ppUJlOY
50
50
  yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
52
52
  yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
53
- yomitoku/utils/misc.py,sha256=cIUrvSJwfWwTui7ueYistf9XPapPR3XgqD2wQjWit40,2901
53
+ yomitoku/utils/misc.py,sha256=r92x45kQR8lC5jO1MZaHBDtcCWBkQXg_WS9H4RXJzSY,4127
54
+ yomitoku/utils/searchable_pdf.py,sha256=40JbcxWrHzYTtzvI9MPYHMrWqLWKiLWo4mWDNRFXwHY,3530
54
55
  yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
55
- yomitoku-0.9.0.dist-info/METADATA,sha256=vUbrNm2w-7OIqEEXNzFQBDm8y57mTuh1UeJYHBGRo9U,8622
56
- yomitoku-0.9.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
- yomitoku-0.9.0.dist-info/entry_points.txt,sha256=N3PzzSo-fdgri5liPpZ3ItMmRH6oVX14pIU_5pUJiAs,99
58
- yomitoku-0.9.0.dist-info/RECORD,,
56
+ yomitoku-0.9.2.dist-info/METADATA,sha256=vDEaaXAimCBfVwMeWmfyJBqzb7sXtZk4-ia3PXrtk7c,8966
57
+ yomitoku-0.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
+ yomitoku-0.9.2.dist-info/entry_points.txt,sha256=n3c8bQSj5Be5GHAOv_NZ8cldJFmWeigQxSmteFTmu_k,96
59
+ yomitoku-0.9.2.dist-info/RECORD,,
@@ -1,3 +1,3 @@
1
1
  [console_scripts]
2
2
  yomitoku = yomitoku.cli.main:main
3
- yomitoku_mcp = yomitoku.cli.mcp:run_mcp_server
3
+ yomitoku_mcp = yomitoku.cli.mcp_server:main