yomitoku 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yomitoku/cli/main.py +18 -8
- yomitoku/cli/mcp.py +165 -0
- yomitoku/data/dataset.py +20 -10
- yomitoku/data/functions.py +19 -20
- yomitoku/document_analyzer.py +21 -6
- yomitoku/export/export_csv.py +2 -2
- yomitoku/export/export_html.py +10 -5
- yomitoku/export/export_json.py +2 -2
- yomitoku/export/export_markdown.py +2 -2
- yomitoku/reading_order.py +38 -8
- yomitoku/utils/misc.py +61 -2
- {yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/METADATA +4 -1
- {yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/RECORD +15 -14
- {yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/entry_points.txt +1 -0
- {yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/WHEEL +0 -0
yomitoku/cli/main.py
CHANGED
@@ -3,7 +3,6 @@ import os
|
|
3
3
|
import time
|
4
4
|
from pathlib import Path
|
5
5
|
|
6
|
-
import cv2
|
7
6
|
import torch
|
8
7
|
|
9
8
|
from ..constants import SUPPORT_OUTPUT_FORMAT
|
@@ -14,6 +13,8 @@ from ..utils.logger import set_logger
|
|
14
13
|
from ..export import save_csv, save_html, save_json, save_markdown
|
15
14
|
from ..export import convert_json, convert_csv, convert_html, convert_markdown
|
16
15
|
|
16
|
+
from ..utils.misc import save_image
|
17
|
+
|
17
18
|
logger = set_logger(__name__, "INFO")
|
18
19
|
|
19
20
|
|
@@ -91,21 +92,23 @@ def process_single_file(args, analyzer, path, format):
|
|
91
92
|
|
92
93
|
if ocr is not None:
|
93
94
|
out_path = os.path.join(
|
94
|
-
args.outdir, f"{dirname}_{filename}_p{page+1}_ocr.jpg"
|
95
|
+
args.outdir, f"{dirname}_{filename}_p{page + 1}_ocr.jpg"
|
95
96
|
)
|
96
97
|
|
97
|
-
|
98
|
+
save_image(ocr, out_path)
|
98
99
|
logger.info(f"Output file: {out_path}")
|
99
100
|
|
100
101
|
if layout is not None:
|
101
102
|
out_path = os.path.join(
|
102
|
-
args.outdir, f"{dirname}_{filename}_p{page+1}_layout.jpg"
|
103
|
+
args.outdir, f"{dirname}_{filename}_p{page + 1}_layout.jpg"
|
103
104
|
)
|
104
105
|
|
105
|
-
|
106
|
+
save_image(layout, out_path)
|
106
107
|
logger.info(f"Output file: {out_path}")
|
107
108
|
|
108
|
-
out_path = os.path.join(
|
109
|
+
out_path = os.path.join(
|
110
|
+
args.outdir, f"{dirname}_{filename}_p{page + 1}.{format}"
|
111
|
+
)
|
109
112
|
|
110
113
|
if format == "json":
|
111
114
|
if args.combine:
|
@@ -340,6 +343,12 @@ def main():
|
|
340
343
|
action="store_true",
|
341
344
|
help="if set, ignore meta information(header, footer) in the output",
|
342
345
|
)
|
346
|
+
parser.add_argument(
|
347
|
+
"--reading_order",
|
348
|
+
default="auto",
|
349
|
+
type=str,
|
350
|
+
choices=["auto", "left2right", "top2bottom", "right2left"],
|
351
|
+
)
|
343
352
|
|
344
353
|
args = parser.parse_args()
|
345
354
|
|
@@ -393,6 +402,7 @@ def main():
|
|
393
402
|
visualize=args.vis,
|
394
403
|
device=args.device,
|
395
404
|
ignore_meta=args.ignore_meta,
|
405
|
+
reading_order=args.reading_order,
|
396
406
|
)
|
397
407
|
|
398
408
|
os.makedirs(args.outdir, exist_ok=True)
|
@@ -407,7 +417,7 @@ def main():
|
|
407
417
|
logger.info(f"Processing file: {file_path}")
|
408
418
|
process_single_file(args, analyzer, file_path, format)
|
409
419
|
end = time.time()
|
410
|
-
logger.info(f"Total Processing time: {end-start:.2f} sec")
|
420
|
+
logger.info(f"Total Processing time: {end - start:.2f} sec")
|
411
421
|
except Exception:
|
412
422
|
continue
|
413
423
|
else:
|
@@ -415,7 +425,7 @@ def main():
|
|
415
425
|
logger.info(f"Processing file: {path}")
|
416
426
|
process_single_file(args, analyzer, path, format)
|
417
427
|
end = time.time()
|
418
|
-
logger.info(f"Total Processing time: {end-start:.2f} sec")
|
428
|
+
logger.info(f"Total Processing time: {end - start:.2f} sec")
|
419
429
|
|
420
430
|
|
421
431
|
if __name__ == "__main__":
|
yomitoku/cli/mcp.py
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
import json
|
2
|
+
import io
|
3
|
+
import csv
|
4
|
+
import os
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
from mcp.server.fastmcp import Context, FastMCP
|
8
|
+
|
9
|
+
from yomitoku import DocumentAnalyzer
|
10
|
+
from yomitoku.data.functions import load_image, load_pdf
|
11
|
+
from yomitoku.export import convert_json, convert_markdown, convert_csv, convert_html
|
12
|
+
|
13
|
+
try:
|
14
|
+
RESOURCE_DIR = os.environ["RESOURCE_DIR"]
|
15
|
+
except KeyError:
|
16
|
+
raise ValueError("Environment variable 'RESOURCE_DIR' is not set.")
|
17
|
+
|
18
|
+
|
19
|
+
analyzer = None
|
20
|
+
|
21
|
+
|
22
|
+
async def load_analyzer(ctx: Context) -> DocumentAnalyzer:
|
23
|
+
"""
|
24
|
+
Load the DocumentAnalyzer instance if not already loaded.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
ctx (Context): The context in which the analyzer is being loaded.
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
DocumentAnalyzer: The loaded document analyzer instance.
|
31
|
+
"""
|
32
|
+
global analyzer
|
33
|
+
if analyzer is None:
|
34
|
+
await ctx.info("Load document analyzer")
|
35
|
+
analyzer = DocumentAnalyzer(visualize=False, device="cuda")
|
36
|
+
return analyzer
|
37
|
+
|
38
|
+
|
39
|
+
mcp = FastMCP("yomitoku")
|
40
|
+
|
41
|
+
|
42
|
+
@mcp.tool()
|
43
|
+
async def process_ocr(ctx: Context, filename: str, output_format: str) -> str:
|
44
|
+
"""
|
45
|
+
Perform OCR on the specified file in the resource direcory and convert
|
46
|
+
the results to the desired format.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
ctx (Context): The context in which the OCR processing is executed.
|
50
|
+
filename (str): The name of the file to process in the resource directory.
|
51
|
+
output_format (str): The desired format for the output. The available options are:
|
52
|
+
- json: Outputs the text as structured data along with positional information.
|
53
|
+
- markdown: Outputs texts and tables in Markdown format.
|
54
|
+
- html: Outputs texts and tables in HTML format.
|
55
|
+
- csv: Outputs texts and tables in CSV format.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
str: The OCR results converted to the specified format.
|
59
|
+
"""
|
60
|
+
analyzer = await load_analyzer(ctx)
|
61
|
+
|
62
|
+
await ctx.info("Start ocr processing")
|
63
|
+
|
64
|
+
file_path = os.path.join(RESOURCE_DIR, filename)
|
65
|
+
if Path(file_path).suffix[1:].lower() in ["pdf"]:
|
66
|
+
imgs = load_pdf(file_path)
|
67
|
+
else:
|
68
|
+
imgs = load_image(file_path)
|
69
|
+
|
70
|
+
results = []
|
71
|
+
for page, img in enumerate(imgs):
|
72
|
+
analyzer.img = img
|
73
|
+
result, _, _ = await analyzer.run(img)
|
74
|
+
results.append(result)
|
75
|
+
await ctx.report_progress(page + 1, len(imgs))
|
76
|
+
|
77
|
+
if output_format == "json":
|
78
|
+
return json.dumps(
|
79
|
+
[
|
80
|
+
convert_json(
|
81
|
+
result,
|
82
|
+
out_path=None,
|
83
|
+
ignore_line_break=True,
|
84
|
+
img=img,
|
85
|
+
export_figure=False,
|
86
|
+
figure_dir=None,
|
87
|
+
).model_dump()
|
88
|
+
for img, result in zip(imgs, results)
|
89
|
+
],
|
90
|
+
ensure_ascii=False,
|
91
|
+
sort_keys=True,
|
92
|
+
separators=(",", ": "),
|
93
|
+
)
|
94
|
+
elif output_format == "markdown":
|
95
|
+
return "\n".join(
|
96
|
+
[
|
97
|
+
convert_markdown(
|
98
|
+
result,
|
99
|
+
out_path=None,
|
100
|
+
ignore_line_break=True,
|
101
|
+
img=img,
|
102
|
+
export_figure=False,
|
103
|
+
)[0]
|
104
|
+
for img, result in zip(imgs, results)
|
105
|
+
]
|
106
|
+
)
|
107
|
+
elif output_format == "html":
|
108
|
+
return "\n".join(
|
109
|
+
[
|
110
|
+
convert_html(
|
111
|
+
result,
|
112
|
+
out_path=None,
|
113
|
+
ignore_line_break=True,
|
114
|
+
img=img,
|
115
|
+
export_figure=False,
|
116
|
+
export_figure_letter="",
|
117
|
+
)[0]
|
118
|
+
for img, result in zip(imgs, results)
|
119
|
+
]
|
120
|
+
)
|
121
|
+
elif output_format == "csv":
|
122
|
+
output = io.StringIO()
|
123
|
+
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
|
124
|
+
for img, result in zip(imgs, results):
|
125
|
+
elements = convert_csv(
|
126
|
+
result,
|
127
|
+
out_path=None,
|
128
|
+
ignore_line_break=True,
|
129
|
+
img=img,
|
130
|
+
export_figure=False,
|
131
|
+
)
|
132
|
+
for element in elements:
|
133
|
+
if element["type"] == "table":
|
134
|
+
writer.writerows(element["element"])
|
135
|
+
else:
|
136
|
+
writer.writerow([element["element"]])
|
137
|
+
writer.writerow([""])
|
138
|
+
return output.getvalue()
|
139
|
+
else:
|
140
|
+
raise ValueError(
|
141
|
+
f"Unsupported output format: {output_format}."
|
142
|
+
" Supported formats are json, markdown, html or csv."
|
143
|
+
)
|
144
|
+
|
145
|
+
|
146
|
+
@mcp.resource("file://list")
|
147
|
+
async def get_file_list() -> list[str]:
|
148
|
+
"""
|
149
|
+
Retrieve a list of files in the resource directory.
|
150
|
+
|
151
|
+
Returns:
|
152
|
+
list[str]: A list of filenames in the resource directory.
|
153
|
+
"""
|
154
|
+
return os.listdir(RESOURCE_DIR)
|
155
|
+
|
156
|
+
|
157
|
+
def run_mcp_server():
|
158
|
+
"""
|
159
|
+
Run the MCP server.
|
160
|
+
"""
|
161
|
+
mcp.run(transport="stdio")
|
162
|
+
|
163
|
+
|
164
|
+
if __name__ == "__main__":
|
165
|
+
run_mcp_server()
|
yomitoku/data/dataset.py
CHANGED
@@ -8,9 +8,11 @@ from .functions import (
|
|
8
8
|
validate_quads,
|
9
9
|
)
|
10
10
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor
|
12
|
+
|
11
13
|
|
12
14
|
class ParseqDataset(Dataset):
|
13
|
-
def __init__(self, cfg, img, quads):
|
15
|
+
def __init__(self, cfg, img, quads, num_workers=8):
|
14
16
|
self.img = img[:, :, ::-1]
|
15
17
|
self.quads = quads
|
16
18
|
self.cfg = cfg
|
@@ -22,19 +24,27 @@ class ParseqDataset(Dataset):
|
|
22
24
|
]
|
23
25
|
)
|
24
26
|
|
25
|
-
|
27
|
+
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
28
|
+
data = list(executor.map(self.preprocess, self.quads))
|
26
29
|
|
27
|
-
|
28
|
-
|
30
|
+
self.data = [tensor for tensor in data if tensor is not None]
|
31
|
+
|
32
|
+
def preprocess(self, quad):
|
33
|
+
if validate_quads(self.img, quad) is None:
|
34
|
+
return None
|
35
|
+
|
36
|
+
roi_img = extract_roi_with_perspective(self.img, quad)
|
29
37
|
|
30
|
-
def __getitem__(self, index):
|
31
|
-
polygon = self.quads[index]
|
32
|
-
roi_img = extract_roi_with_perspective(self.img, polygon)
|
33
38
|
if roi_img is None:
|
34
|
-
return
|
39
|
+
return None
|
35
40
|
|
36
41
|
roi_img = rotate_text_image(roi_img, thresh_aspect=2)
|
37
42
|
resized = resize_with_padding(roi_img, self.cfg.data.img_size)
|
38
|
-
tensor = self.transform(resized)
|
39
43
|
|
40
|
-
return
|
44
|
+
return resized
|
45
|
+
|
46
|
+
def __len__(self):
|
47
|
+
return len(self.data)
|
48
|
+
|
49
|
+
def __getitem__(self, index):
|
50
|
+
return self.transform(self.data[index])
|
yomitoku/data/functions.py
CHANGED
@@ -191,7 +191,7 @@ def array_to_tensor(img: np.ndarray) -> torch.Tensor:
|
|
191
191
|
return tensor
|
192
192
|
|
193
193
|
|
194
|
-
def validate_quads(img: np.ndarray,
|
194
|
+
def validate_quads(img: np.ndarray, quad: list[list[list[int]]]):
|
195
195
|
"""
|
196
196
|
Validate the vertices of the quadrilateral.
|
197
197
|
|
@@ -204,23 +204,23 @@ def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
|
|
204
204
|
"""
|
205
205
|
|
206
206
|
h, w = img.shape[:2]
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
207
|
+
if len(quad) != 4:
|
208
|
+
# raise ValueError("The number of vertices must be 4.")
|
209
|
+
return None
|
210
|
+
|
211
|
+
for point in quad:
|
212
|
+
if len(point) != 2:
|
213
|
+
return None
|
214
|
+
|
215
|
+
quad = np.array(quad, dtype=int)
|
216
|
+
x1 = np.min(quad[:, 0])
|
217
|
+
x2 = np.max(quad[:, 0])
|
218
|
+
y1 = np.min(quad[:, 1])
|
219
|
+
y2 = np.max(quad[:, 1])
|
220
|
+
h, w = img.shape[:2]
|
221
221
|
|
222
|
-
|
223
|
-
|
222
|
+
if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
|
223
|
+
return None
|
224
224
|
|
225
225
|
return True
|
226
226
|
|
@@ -237,19 +237,18 @@ def extract_roi_with_perspective(img, quad):
|
|
237
237
|
np.ndarray: extracted image
|
238
238
|
"""
|
239
239
|
dst = img.copy()
|
240
|
-
quad = np.array(quad, dtype=np.
|
240
|
+
quad = np.array(quad, dtype=np.int64)
|
241
|
+
|
241
242
|
width = np.linalg.norm(quad[0] - quad[1])
|
242
243
|
height = np.linalg.norm(quad[1] - quad[2])
|
243
244
|
|
244
245
|
width = int(width)
|
245
246
|
height = int(height)
|
246
|
-
|
247
247
|
pts1 = np.float32(quad)
|
248
248
|
pts2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
|
249
249
|
|
250
250
|
M = cv2.getPerspectiveTransform(pts1, pts2)
|
251
251
|
dst = cv2.warpPerspective(dst, M, (width, height))
|
252
|
-
|
253
252
|
return dst
|
254
253
|
|
255
254
|
|
yomitoku/document_analyzer.py
CHANGED
@@ -86,8 +86,12 @@ def extract_paragraph_within_figure(paragraphs, figures):
|
|
86
86
|
check_list[i] = True
|
87
87
|
|
88
88
|
figure["direction"] = judge_page_direction(contained_paragraphs)
|
89
|
+
reading_order = (
|
90
|
+
"left2right" if figure["direction"] == "horizontal" else "right2left"
|
91
|
+
)
|
92
|
+
|
89
93
|
figure_paragraphs = prediction_reading_order(
|
90
|
-
contained_paragraphs,
|
94
|
+
contained_paragraphs, reading_order
|
91
95
|
)
|
92
96
|
figure["paragraphs"] = sorted(figure_paragraphs, key=lambda x: x.order)
|
93
97
|
figure = FigureSchema(**figure)
|
@@ -126,8 +130,8 @@ def extract_words_within_element(pred_words, element):
|
|
126
130
|
cnt_vertical = word_direction.count("vertical")
|
127
131
|
|
128
132
|
element_direction = "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
|
129
|
-
|
130
|
-
prediction_reading_order(contained_words,
|
133
|
+
order = "left2right" if element_direction == "horizontal" else "right2left"
|
134
|
+
prediction_reading_order(contained_words, order)
|
131
135
|
contained_words = sorted(contained_words, key=lambda x: x.order)
|
132
136
|
|
133
137
|
contained_words = "\n".join([content.contents for content in contained_words])
|
@@ -328,6 +332,7 @@ class DocumentAnalyzer:
|
|
328
332
|
device="cuda",
|
329
333
|
visualize=False,
|
330
334
|
ignore_meta=False,
|
335
|
+
reading_order="auto",
|
331
336
|
):
|
332
337
|
default_configs = {
|
333
338
|
"ocr": {
|
@@ -352,6 +357,8 @@ class DocumentAnalyzer:
|
|
352
357
|
},
|
353
358
|
}
|
354
359
|
|
360
|
+
self.reading_order = reading_order
|
361
|
+
|
355
362
|
if isinstance(configs, dict):
|
356
363
|
recursive_update(default_configs, configs)
|
357
364
|
else:
|
@@ -452,9 +459,17 @@ class DocumentAnalyzer:
|
|
452
459
|
|
453
460
|
elements = page_contents + layout_res.tables + figures
|
454
461
|
|
455
|
-
prediction_reading_order(headers,
|
456
|
-
prediction_reading_order(footers,
|
457
|
-
|
462
|
+
prediction_reading_order(headers, "left2right")
|
463
|
+
prediction_reading_order(footers, "left2right")
|
464
|
+
|
465
|
+
if self.reading_order == "auto":
|
466
|
+
reading_order = (
|
467
|
+
"right2left" if page_direction == "vertical" else "top2bottom"
|
468
|
+
)
|
469
|
+
else:
|
470
|
+
reading_order = self.reading_order
|
471
|
+
|
472
|
+
prediction_reading_order(elements, reading_order, self.img)
|
458
473
|
|
459
474
|
for i, element in enumerate(elements):
|
460
475
|
element.order += len(headers)
|
yomitoku/export/export_csv.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import csv
|
2
2
|
import os
|
3
3
|
|
4
|
-
import
|
4
|
+
from ..utils.misc import save_image
|
5
5
|
|
6
6
|
|
7
7
|
def table_to_csv(table, ignore_line_break):
|
@@ -54,7 +54,7 @@ def save_figure(
|
|
54
54
|
filename = os.path.splitext(os.path.basename(out_path))[0]
|
55
55
|
figure_name = f"{filename}_figure_{i}.png"
|
56
56
|
figure_path = os.path.join(save_dir, figure_name)
|
57
|
-
|
57
|
+
save_image(figure_img, figure_path)
|
58
58
|
|
59
59
|
|
60
60
|
def convert_csv(
|
yomitoku/export/export_html.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
import os
|
2
2
|
import re
|
3
3
|
from html import escape
|
4
|
-
|
5
|
-
import cv2
|
6
4
|
from lxml import etree, html
|
7
5
|
|
6
|
+
from ..utils.misc import save_image
|
7
|
+
|
8
8
|
|
9
9
|
def convert_text_to_html(text):
|
10
10
|
"""
|
@@ -122,7 +122,7 @@ def figure_to_html(
|
|
122
122
|
filename = os.path.splitext(os.path.basename(out_path))[0]
|
123
123
|
figure_name = f"{filename}_figure_{i}.png"
|
124
124
|
figure_path = os.path.join(save_dir, figure_name)
|
125
|
-
|
125
|
+
save_image(figure_img, figure_path)
|
126
126
|
|
127
127
|
elements.append(
|
128
128
|
{
|
@@ -180,8 +180,13 @@ def convert_html(
|
|
180
180
|
elements = sorted(elements, key=lambda x: x["order"])
|
181
181
|
|
182
182
|
html_string = "".join([element["html"] for element in elements])
|
183
|
-
|
184
|
-
|
183
|
+
if not len(html_string) == 0:
|
184
|
+
parsed_html = html.fromstring(html_string)
|
185
|
+
formatted_html = etree.tostring(
|
186
|
+
parsed_html, pretty_print=True, encoding="unicode"
|
187
|
+
)
|
188
|
+
else:
|
189
|
+
formatted_html = ""
|
185
190
|
|
186
191
|
return formatted_html, elements
|
187
192
|
|
yomitoku/export/export_json.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
3
|
|
4
|
-
import
|
4
|
+
from ..utils.misc import save_image
|
5
5
|
|
6
6
|
|
7
7
|
def paragraph_to_json(paragraph, ignore_line_break):
|
@@ -33,7 +33,7 @@ def save_figure(
|
|
33
33
|
filename = os.path.splitext(os.path.basename(out_path))[0]
|
34
34
|
figure_name = f"{filename}_figure_{i}.png"
|
35
35
|
figure_path = os.path.join(save_dir, figure_name)
|
36
|
-
|
36
|
+
save_image(figure_img, figure_path)
|
37
37
|
|
38
38
|
|
39
39
|
def convert_json(inputs, out_path, ignore_line_break, img, export_figure, figure_dir):
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import os
|
2
2
|
import re
|
3
3
|
|
4
|
-
import
|
4
|
+
from ..utils.misc import save_image
|
5
5
|
|
6
6
|
|
7
7
|
def escape_markdown_special_chars(text):
|
@@ -89,7 +89,7 @@ def figure_to_md(
|
|
89
89
|
filename = os.path.splitext(os.path.basename(out_path))[0]
|
90
90
|
figure_name = f"{filename}_figure_{i}.png"
|
91
91
|
figure_path = os.path.join(save_dir, figure_name)
|
92
|
-
|
92
|
+
save_image(figure_img, figure_path)
|
93
93
|
|
94
94
|
elements.append(
|
95
95
|
{
|
yomitoku/reading_order.py
CHANGED
@@ -17,7 +17,6 @@ def _priority_dfs(nodes, direction):
|
|
17
17
|
|
18
18
|
pending_nodes = sorted(nodes, key=lambda x: x.prop["distance"])
|
19
19
|
visited = [False] * len(nodes)
|
20
|
-
|
21
20
|
start = pending_nodes.pop(0)
|
22
21
|
stack = [start]
|
23
22
|
|
@@ -53,11 +52,11 @@ def _priority_dfs(nodes, direction):
|
|
53
52
|
children.append(node)
|
54
53
|
stack.remove(node)
|
55
54
|
|
56
|
-
if direction
|
55
|
+
if direction in "top2bottom":
|
57
56
|
children = sorted(
|
58
57
|
children, key=lambda x: x.prop["box"][0], reverse=True
|
59
58
|
)
|
60
|
-
|
59
|
+
elif direction in ["right2left", "left2right"]:
|
61
60
|
children = sorted(
|
62
61
|
children, key=lambda x: x.prop["box"][1], reverse=True
|
63
62
|
)
|
@@ -121,7 +120,7 @@ def _exist_other_node_between_horizontal(node, other_node, nodes):
|
|
121
120
|
return False
|
122
121
|
|
123
122
|
|
124
|
-
def
|
123
|
+
def _create_graph_top2bottom(nodes):
|
125
124
|
for i, node in enumerate(nodes):
|
126
125
|
for j, other_node in enumerate(nodes):
|
127
126
|
if i == j:
|
@@ -146,7 +145,7 @@ def _create_graph_horizontal(nodes):
|
|
146
145
|
node.children = sorted(node.children, key=lambda x: x.prop["box"][0])
|
147
146
|
|
148
147
|
|
149
|
-
def
|
148
|
+
def _create_graph_right2left(nodes):
|
150
149
|
max_x = max([node.prop["box"][2] for node in nodes])
|
151
150
|
|
152
151
|
for i, node in enumerate(nodes):
|
@@ -172,15 +171,46 @@ def _create_graph_vertical(nodes):
|
|
172
171
|
node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
|
173
172
|
|
174
173
|
|
174
|
+
def _create_graph_left2right(nodes, x_weight=1, y_weight=5):
|
175
|
+
for i, node in enumerate(nodes):
|
176
|
+
for j, other_node in enumerate(nodes):
|
177
|
+
if i == j:
|
178
|
+
continue
|
179
|
+
|
180
|
+
if is_intersected_horizontal(node.prop["box"], other_node.prop["box"]):
|
181
|
+
tx = node.prop["box"][2]
|
182
|
+
ox = other_node.prop["box"][2]
|
183
|
+
|
184
|
+
if _exist_other_node_between_horizontal(node, other_node, nodes):
|
185
|
+
continue
|
186
|
+
|
187
|
+
if ox < tx:
|
188
|
+
other_node.add_link(node)
|
189
|
+
else:
|
190
|
+
node.add_link(other_node)
|
191
|
+
|
192
|
+
node_distance = (
|
193
|
+
node.prop["box"][0] * x_weight + node.prop["box"][1] * y_weight
|
194
|
+
)
|
195
|
+
node.prop["distance"] = node_distance
|
196
|
+
|
197
|
+
for node in nodes:
|
198
|
+
node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
|
199
|
+
|
200
|
+
|
175
201
|
def prediction_reading_order(elements, direction, img=None):
|
176
202
|
if len(elements) < 2:
|
177
203
|
return elements
|
178
204
|
|
179
205
|
nodes = [Node(i, element.dict()) for i, element in enumerate(elements)]
|
180
|
-
if direction == "
|
181
|
-
|
206
|
+
if direction == "top2bottom":
|
207
|
+
_create_graph_top2bottom(nodes)
|
208
|
+
elif direction == "right2left":
|
209
|
+
_create_graph_right2left(nodes)
|
210
|
+
elif direction == "left2right":
|
211
|
+
_create_graph_left2right(nodes)
|
182
212
|
else:
|
183
|
-
|
213
|
+
raise ValueError(f"Invalid direction: {direction}")
|
184
214
|
|
185
215
|
# For debugging
|
186
216
|
# if img is not None:
|
yomitoku/utils/misc.py
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
import cv2
|
2
|
+
|
3
|
+
|
1
4
|
def load_charset(charset_path):
|
2
5
|
with open(charset_path, "r", encoding="utf-8") as f:
|
3
6
|
charset = f.read()
|
@@ -9,6 +12,15 @@ def filter_by_flag(elements, flags):
|
|
9
12
|
return [element for element, flag in zip(elements, flags) if flag]
|
10
13
|
|
11
14
|
|
15
|
+
def save_image(img, path):
|
16
|
+
success, buffer = cv2.imencode(".jpg", img)
|
17
|
+
if not success:
|
18
|
+
raise ValueError("Failed to encode image")
|
19
|
+
|
20
|
+
with open(path, "wb") as f:
|
21
|
+
f.write(buffer.tobytes())
|
22
|
+
|
23
|
+
|
12
24
|
def calc_overlap_ratio(rect_a, rect_b):
|
13
25
|
intersection = calc_intersection(rect_a, rect_b)
|
14
26
|
if intersection is None:
|
@@ -68,7 +80,7 @@ def calc_intersection(rect_a, rect_b):
|
|
68
80
|
return [ix1, iy1, ix2, iy2]
|
69
81
|
|
70
82
|
|
71
|
-
def is_intersected_horizontal(rect_a, rect_b):
|
83
|
+
def is_intersected_horizontal(rect_a, rect_b, threshold=0.5):
|
72
84
|
_, ay1, _, ay2 = map(int, rect_a)
|
73
85
|
_, by1, _, by2 = map(int, rect_b)
|
74
86
|
|
@@ -76,9 +88,11 @@ def is_intersected_horizontal(rect_a, rect_b):
|
|
76
88
|
iy1 = max(ay1, by1)
|
77
89
|
iy2 = min(ay2, by2)
|
78
90
|
|
91
|
+
min_height = min(ay2 - ay1, by2 - by1)
|
92
|
+
|
79
93
|
overlap_height = max(0, iy2 - iy1)
|
80
94
|
|
81
|
-
if overlap_height
|
95
|
+
if (overlap_height / min_height) < threshold:
|
82
96
|
return False
|
83
97
|
|
84
98
|
return True
|
@@ -107,3 +121,48 @@ def quad_to_xyxy(quad):
|
|
107
121
|
y2 = max([y for _, y in quad])
|
108
122
|
|
109
123
|
return x1, y1, x2, y2
|
124
|
+
|
125
|
+
|
126
|
+
def convert_table_array(table):
|
127
|
+
n_rows = table.n_row
|
128
|
+
n_cols = table.n_col
|
129
|
+
|
130
|
+
table_array = [["" for _ in range(n_cols)] for _ in range(n_rows)]
|
131
|
+
|
132
|
+
for cell in table.cells:
|
133
|
+
row = cell.row - 1
|
134
|
+
col = cell.col - 1
|
135
|
+
row_span = cell.row_span
|
136
|
+
col_span = cell.col_span
|
137
|
+
contents = cell.contents
|
138
|
+
|
139
|
+
for i in range(row, row + row_span):
|
140
|
+
for j in range(col, col + col_span):
|
141
|
+
table_array[i][j] = contents
|
142
|
+
|
143
|
+
return table_array
|
144
|
+
|
145
|
+
|
146
|
+
def convert_table_array_to_dict(table_array, header_row=1):
|
147
|
+
n_cols = len(table_array[0])
|
148
|
+
n_rows = len(table_array)
|
149
|
+
|
150
|
+
header_cols = []
|
151
|
+
for i in range(n_cols):
|
152
|
+
header = []
|
153
|
+
for j in range(header_row):
|
154
|
+
header.append(table_array[j][i])
|
155
|
+
|
156
|
+
if len(header) > 0:
|
157
|
+
header_cols.append("_".join(header))
|
158
|
+
else:
|
159
|
+
header_cols.append(f"col_{i}")
|
160
|
+
|
161
|
+
table_dict = []
|
162
|
+
for i in range(header_row, n_rows):
|
163
|
+
row_dict = {}
|
164
|
+
for j in range(n_cols):
|
165
|
+
row_dict[header_cols[j]] = table_array[i][j]
|
166
|
+
table_dict.append(row_dict)
|
167
|
+
|
168
|
+
return table_dict
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: yomitoku
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.9.1
|
4
4
|
Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
|
5
5
|
Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
|
6
6
|
License: CC BY-NC-SA 4.0
|
@@ -19,6 +19,8 @@ Requires-Dist: shapely>=2.0.6
|
|
19
19
|
Requires-Dist: timm>=1.0.11
|
20
20
|
Requires-Dist: torch>=2.5.0
|
21
21
|
Requires-Dist: torchvision>=0.20.0
|
22
|
+
Provides-Extra: mcp
|
23
|
+
Requires-Dist: mcp[cli]>=1.6.0; extra == 'mcp'
|
22
24
|
Description-Content-Type: text/markdown
|
23
25
|
|
24
26
|
日本語版 | [English](README_EN.md)
|
@@ -64,6 +66,7 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
|
|
64
66
|
|
65
67
|
## 📣 リリース情報
|
66
68
|
|
69
|
+
- 2025 年 4 月 4 日 YomiToku v0.8.0 手書き文字認識のサポート
|
67
70
|
- 2024 年 11 月 26 日 YomiToku v0.5.1 (beta) を公開
|
68
71
|
|
69
72
|
## 💡 インストールの方法
|
@@ -1,16 +1,17 @@
|
|
1
1
|
yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
|
2
2
|
yomitoku/base.py,sha256=9U3sfe69O6vuO430JzzKQQNkgPsLM9WdLfOUUhp3Ljs,3878
|
3
3
|
yomitoku/constants.py,sha256=zlW5QRc_u_F3C2RAgBFWyHJZexBnJT5N15GC-9d3iLo,686
|
4
|
-
yomitoku/document_analyzer.py,sha256=
|
4
|
+
yomitoku/document_analyzer.py,sha256=xliAelQdfsK64FtVuFvstDBr9uf2TwhqW31g2g91_CY,16888
|
5
5
|
yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
|
6
6
|
yomitoku/layout_parser.py,sha256=0MgbCsD90srQdsxkGEL0TgKm4rkmGzsQYx0sjKQ03yc,7718
|
7
7
|
yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
|
8
|
-
yomitoku/reading_order.py,sha256=
|
8
|
+
yomitoku/reading_order.py,sha256=_T09PqT7guk57zWo4HdSazLSQTwM91piyELA_wNHQAQ,7521
|
9
9
|
yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK-ynm4ALxg,9625
|
10
10
|
yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
|
11
11
|
yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
|
12
12
|
yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
yomitoku/cli/main.py,sha256=
|
13
|
+
yomitoku/cli/main.py,sha256=VZG8DZf-k_QytlDZtB91eBNY69MRpbryQg1rkn3fs20,12304
|
14
|
+
yomitoku/cli/mcp.py,sha256=5h704SsUGNAqVnoO_5S-HY2-bApy_Rf8ajDxl1pkT2k,4888
|
14
15
|
yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
|
15
16
|
yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
|
16
17
|
yomitoku/configs/cfg_layout_parser_rtdtrv2_v2.py,sha256=nMrL3uvoVmyzZ909Bz2zmfp9b6AEBLKhIprOvQ5yiQE,2324
|
@@ -21,13 +22,13 @@ yomitoku/configs/cfg_text_recognizer_parseq.py,sha256=hpFs3nKqh4XdU3BZMTultegtLE
|
|
21
22
|
yomitoku/configs/cfg_text_recognizer_parseq_small.py,sha256=uCm_VC_G79IbZpOiK8fgYzAJ4b98H5pf328wyQomtfo,1259
|
22
23
|
yomitoku/configs/cfg_text_recognizer_parseq_v2.py,sha256=GfHzbByOKjH21PRTxT8x_fU4r4Mda6F750Z8pjNeb8g,1249
|
23
24
|
yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
|
24
|
-
yomitoku/data/dataset.py,sha256
|
25
|
-
yomitoku/data/functions.py,sha256=
|
25
|
+
yomitoku/data/dataset.py,sha256=lpBcpkMuQzRIyLJ4_mqtuhR9s2ZmzgBgc-XYuE_b2Sc,1326
|
26
|
+
yomitoku/data/functions.py,sha256=RExCUxI3-gccIMw-H0ribX2jeGKkrJWhS4fNn_12c3Y,7878
|
26
27
|
yomitoku/export/__init__.py,sha256=gmlikMHRXfzfJ_8q4fyDlnpGms-x1oggQOwJEWHMgBU,508
|
27
|
-
yomitoku/export/export_csv.py,sha256=
|
28
|
-
yomitoku/export/export_html.py,sha256=
|
29
|
-
yomitoku/export/export_json.py,sha256=
|
30
|
-
yomitoku/export/export_markdown.py,sha256=
|
28
|
+
yomitoku/export/export_csv.py,sha256=VY8mntUCPDbDco_dyvq5O0_Q4wga9_GTyjHCS-y4UiQ,3399
|
29
|
+
yomitoku/export/export_html.py,sha256=LQDyZgbzmI0qJ0-FEK-54r9816H3L9hD10ChMcw0KyA,5620
|
30
|
+
yomitoku/export/export_json.py,sha256=iNG37tdIuYG2x3NiiZemKaB6-X45WrhVPZhbX7RUzRI,2410
|
31
|
+
yomitoku/export/export_markdown.py,sha256=KrdxDmKzVP_LbTKuDNGGsT31QOPKVsNNlb6wtLEW-1Q,4705
|
31
32
|
yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
|
32
33
|
yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
|
33
34
|
yomitoku/models/parseq.py,sha256=psCPjP3eKjOFAUZJPQQhbD0nWEV5FeOZ0tTK27Rvvbw,8748
|
@@ -49,9 +50,9 @@ yomitoku/resource/charset.txt,sha256=sU91kSi-9Wk4733bCXy4j_UDmvcsj96sHOq1ppUJlOY
|
|
49
50
|
yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
51
|
yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
|
51
52
|
yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
|
52
|
-
yomitoku/utils/misc.py,sha256=
|
53
|
+
yomitoku/utils/misc.py,sha256=r92x45kQR8lC5jO1MZaHBDtcCWBkQXg_WS9H4RXJzSY,4127
|
53
54
|
yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
|
54
|
-
yomitoku-0.
|
55
|
-
yomitoku-0.
|
56
|
-
yomitoku-0.
|
57
|
-
yomitoku-0.
|
55
|
+
yomitoku-0.9.1.dist-info/METADATA,sha256=ozEkYekTPuEP1GwnCCQKgJC9DzEQpyActU_DltQGMHc,8700
|
56
|
+
yomitoku-0.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
57
|
+
yomitoku-0.9.1.dist-info/entry_points.txt,sha256=N3PzzSo-fdgri5liPpZ3ItMmRH6oVX14pIU_5pUJiAs,99
|
58
|
+
yomitoku-0.9.1.dist-info/RECORD,,
|
File without changes
|