yomitoku 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yomitoku/cli/main.py +33 -2
- yomitoku/document_analyzer.py +229 -26
- yomitoku/export/export_csv.py +39 -2
- yomitoku/export/export_html.py +2 -1
- yomitoku/export/export_json.py +40 -2
- yomitoku/export/export_markdown.py +2 -1
- yomitoku/layout_analyzer.py +1 -5
- yomitoku/layout_parser.py +5 -1
- yomitoku/ocr.py +24 -27
- yomitoku/table_structure_recognizer.py +24 -7
- yomitoku/text_detector.py +8 -3
- yomitoku/text_recognizer.py +22 -7
- yomitoku/utils/misc.py +20 -13
- yomitoku/utils/visualizer.py +5 -5
- {yomitoku-0.6.0.dist-info → yomitoku-0.7.0.dist-info}/METADATA +11 -4
- {yomitoku-0.6.0.dist-info → yomitoku-0.7.0.dist-info}/RECORD +18 -18
- {yomitoku-0.6.0.dist-info → yomitoku-0.7.0.dist-info}/WHEEL +1 -1
- {yomitoku-0.6.0.dist-info → yomitoku-0.7.0.dist-info}/entry_points.txt +0 -0
yomitoku/cli/main.py
CHANGED
@@ -13,6 +13,18 @@ from ..utils.logger import set_logger
|
|
13
13
|
logger = set_logger(__name__, "INFO")
|
14
14
|
|
15
15
|
|
16
|
+
def validate_encoding(encoding):
|
17
|
+
if encoding not in [
|
18
|
+
"utf-8",
|
19
|
+
"utf-8-sig",
|
20
|
+
"shift-jis",
|
21
|
+
"euc-jp",
|
22
|
+
"cp932",
|
23
|
+
]:
|
24
|
+
raise ValueError(f"Invalid encoding: {encoding}")
|
25
|
+
return True
|
26
|
+
|
27
|
+
|
16
28
|
def process_single_file(args, analyzer, path, format):
|
17
29
|
if path.suffix[1:].lower() in ["pdf"]:
|
18
30
|
imgs = load_pdf(path)
|
@@ -21,7 +33,6 @@ def process_single_file(args, analyzer, path, format):
|
|
21
33
|
|
22
34
|
for page, img in enumerate(imgs):
|
23
35
|
results, ocr, layout = analyzer(img)
|
24
|
-
|
25
36
|
dirname = path.parent.name
|
26
37
|
filename = path.stem
|
27
38
|
|
@@ -47,11 +58,19 @@ def process_single_file(args, analyzer, path, format):
|
|
47
58
|
results.to_json(
|
48
59
|
out_path,
|
49
60
|
ignore_line_break=args.ignore_line_break,
|
61
|
+
encoding=args.encoding,
|
62
|
+
img=img,
|
63
|
+
export_figure=args.figure,
|
64
|
+
figure_dir=args.figure_dir,
|
50
65
|
)
|
51
66
|
elif format == "csv":
|
52
67
|
results.to_csv(
|
53
68
|
out_path,
|
54
69
|
ignore_line_break=args.ignore_line_break,
|
70
|
+
encoding=args.encoding,
|
71
|
+
img=img,
|
72
|
+
export_figure=args.figure,
|
73
|
+
figure_dir=args.figure_dir,
|
55
74
|
)
|
56
75
|
elif format == "html":
|
57
76
|
results.to_html(
|
@@ -62,6 +81,7 @@ def process_single_file(args, analyzer, path, format):
|
|
62
81
|
export_figure_letter=args.figure_letter,
|
63
82
|
figure_width=args.figure_width,
|
64
83
|
figure_dir=args.figure_dir,
|
84
|
+
encoding=args.encoding,
|
65
85
|
)
|
66
86
|
elif format == "md":
|
67
87
|
results.to_markdown(
|
@@ -72,6 +92,7 @@ def process_single_file(args, analyzer, path, format):
|
|
72
92
|
export_figure_letter=args.figure_letter,
|
73
93
|
figure_width=args.figure_width,
|
74
94
|
figure_dir=args.figure_dir,
|
95
|
+
encoding=args.encoding,
|
75
96
|
)
|
76
97
|
|
77
98
|
logger.info(f"Output file: {out_path}")
|
@@ -168,6 +189,12 @@ def main():
|
|
168
189
|
default="figures",
|
169
190
|
help="directory to save figure images",
|
170
191
|
)
|
192
|
+
parser.add_argument(
|
193
|
+
"--encoding",
|
194
|
+
type=str,
|
195
|
+
default="utf-8",
|
196
|
+
help="Specifies the character encoding for the output file to be exported. If unsupported characters are included, they will be ignored.",
|
197
|
+
)
|
171
198
|
|
172
199
|
args = parser.parse_args()
|
173
200
|
|
@@ -181,6 +208,8 @@ def main():
|
|
181
208
|
f"Invalid output format: {args.format}. Supported formats are {SUPPORT_OUTPUT_FORMAT}"
|
182
209
|
)
|
183
210
|
|
211
|
+
validate_encoding(args.encoding)
|
212
|
+
|
184
213
|
if format == "markdown":
|
185
214
|
format = "md"
|
186
215
|
|
@@ -205,7 +234,9 @@ def main():
|
|
205
234
|
|
206
235
|
if args.lite:
|
207
236
|
configs["ocr"]["text_recognizer"]["model_name"] = "parseq-small"
|
208
|
-
|
237
|
+
|
238
|
+
if args.device == "cpu":
|
239
|
+
configs["ocr"]["text_detector"]["infer_onnx"] = True
|
209
240
|
|
210
241
|
# Note: Text Detector以外はONNX推論よりもPyTorch推論の方が速いため、ONNX推論は行わない
|
211
242
|
# configs["ocr"]["text_recognizer"]["infer_onnx"] = True
|
yomitoku/document_analyzer.py
CHANGED
@@ -2,17 +2,26 @@ import asyncio
|
|
2
2
|
from concurrent.futures import ThreadPoolExecutor
|
3
3
|
from typing import List, Union
|
4
4
|
|
5
|
+
import numpy as np
|
6
|
+
|
5
7
|
from pydantic import conlist
|
6
8
|
|
7
9
|
from .base import BaseSchema
|
8
10
|
from .export import export_csv, export_html, export_markdown
|
9
11
|
from .layout_analyzer import LayoutAnalyzer
|
10
|
-
from .ocr import
|
11
|
-
from .table_structure_recognizer import TableStructureRecognizerSchema
|
12
|
-
from .utils.misc import is_contained, quad_to_xyxy
|
12
|
+
from .ocr import OCRSchema, WordPrediction, ocr_aggregate
|
13
13
|
from .reading_order import prediction_reading_order
|
14
|
-
|
14
|
+
from .table_structure_recognizer import TableStructureRecognizerSchema
|
15
|
+
from .utils.misc import (
|
16
|
+
is_contained,
|
17
|
+
quad_to_xyxy,
|
18
|
+
calc_overlap_ratio,
|
19
|
+
)
|
15
20
|
from .utils.visualizer import reading_order_visualizer
|
21
|
+
from yomitoku.text_detector import TextDetector
|
22
|
+
from yomitoku.text_recognizer import TextRecognizer
|
23
|
+
|
24
|
+
from .utils.visualizer import det_visualizer
|
16
25
|
|
17
26
|
|
18
27
|
class ParagraphSchema(BaseSchema):
|
@@ -98,41 +107,57 @@ def extract_words_within_element(pred_words, element):
|
|
98
107
|
word_sum_width = 0
|
99
108
|
word_sum_height = 0
|
100
109
|
check_list = [False] * len(pred_words)
|
110
|
+
|
101
111
|
for i, word in enumerate(pred_words):
|
102
112
|
word_box = quad_to_xyxy(word.points)
|
103
113
|
if is_contained(element.box, word_box, threshold=0.5):
|
104
|
-
contained_words.append(word)
|
105
114
|
word_sum_width += word_box[2] - word_box[0]
|
106
115
|
word_sum_height += word_box[3] - word_box[1]
|
107
116
|
check_list[i] = True
|
108
117
|
|
118
|
+
word_element = ParagraphSchema(
|
119
|
+
box=word_box,
|
120
|
+
contents=word.content,
|
121
|
+
direction=word.direction,
|
122
|
+
order=0,
|
123
|
+
role=None,
|
124
|
+
)
|
125
|
+
contained_words.append(word_element)
|
126
|
+
|
109
127
|
if len(contained_words) == 0:
|
110
128
|
return None, None, check_list
|
111
129
|
|
112
|
-
|
113
|
-
# mean_height = word_sum_height / len(contained_words)
|
114
|
-
|
130
|
+
element_direction = "horizontal"
|
115
131
|
word_direction = [word.direction for word in contained_words]
|
116
132
|
cnt_horizontal = word_direction.count("horizontal")
|
117
133
|
cnt_vertical = word_direction.count("vertical")
|
118
134
|
|
119
135
|
element_direction = "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
|
120
|
-
if element_direction == "horizontal":
|
121
|
-
contained_words = sorted(
|
122
|
-
contained_words,
|
123
|
-
key=lambda x: (sum([p[1] for p in x.points]) / 4),
|
124
|
-
)
|
125
|
-
else:
|
126
|
-
contained_words = sorted(
|
127
|
-
contained_words,
|
128
|
-
key=lambda x: (sum([p[0] for p in x.points]) / 4),
|
129
|
-
reverse=True,
|
130
|
-
)
|
131
136
|
|
132
|
-
contained_words
|
137
|
+
prediction_reading_order(contained_words, element_direction)
|
138
|
+
contained_words = sorted(contained_words, key=lambda x: x.order)
|
139
|
+
|
140
|
+
contained_words = "\n".join([content.contents for content in contained_words])
|
141
|
+
|
133
142
|
return (contained_words, element_direction, check_list)
|
134
143
|
|
135
144
|
|
145
|
+
def is_vertical(quad, thresh_aspect=2):
|
146
|
+
quad = np.array(quad)
|
147
|
+
width = np.linalg.norm(quad[0] - quad[1])
|
148
|
+
height = np.linalg.norm(quad[1] - quad[2])
|
149
|
+
|
150
|
+
return height > width * thresh_aspect
|
151
|
+
|
152
|
+
|
153
|
+
def is_noise(quad, thresh=15):
|
154
|
+
quad = np.array(quad)
|
155
|
+
width = np.linalg.norm(quad[0] - quad[1])
|
156
|
+
height = np.linalg.norm(quad[1] - quad[2])
|
157
|
+
|
158
|
+
return width < thresh or height < thresh
|
159
|
+
|
160
|
+
|
136
161
|
def recursive_update(original, new_data):
|
137
162
|
for key, value in new_data.items():
|
138
163
|
# `value`が辞書の場合、再帰的に更新
|
@@ -148,8 +173,163 @@ def recursive_update(original, new_data):
|
|
148
173
|
return original
|
149
174
|
|
150
175
|
|
176
|
+
def _extract_words_within_table(words, table, check_list):
|
177
|
+
horizontal_words = []
|
178
|
+
vertical_words = []
|
179
|
+
|
180
|
+
for i, (points, score) in enumerate(zip(words.points, words.scores)):
|
181
|
+
word_box = quad_to_xyxy(points)
|
182
|
+
if is_contained(table.box, word_box, threshold=0.5):
|
183
|
+
if is_vertical(points):
|
184
|
+
vertical_words.append({"points": points, "score": score})
|
185
|
+
else:
|
186
|
+
horizontal_words.append({"points": points, "score": score})
|
187
|
+
|
188
|
+
check_list[i] = True
|
189
|
+
|
190
|
+
return (horizontal_words, vertical_words, check_list)
|
191
|
+
|
192
|
+
|
193
|
+
def _calc_overlap_words_on_lines(lines, words):
|
194
|
+
overlap_ratios = [[0 for _ in lines] for _ in words]
|
195
|
+
|
196
|
+
for i, word in enumerate(words):
|
197
|
+
word_box = quad_to_xyxy(word["points"])
|
198
|
+
for j, row in enumerate(lines):
|
199
|
+
overlap_ratio, _ = calc_overlap_ratio(
|
200
|
+
row.box,
|
201
|
+
word_box,
|
202
|
+
)
|
203
|
+
overlap_ratios[i][j] = overlap_ratio
|
204
|
+
|
205
|
+
return overlap_ratios
|
206
|
+
|
207
|
+
|
208
|
+
def _correct_vertical_word_boxes(overlap_ratios_vertical, table, table_words_vertical):
|
209
|
+
allocated_cols = [cols.index(max(cols)) for cols in overlap_ratios_vertical]
|
210
|
+
|
211
|
+
new_points = []
|
212
|
+
new_scores = []
|
213
|
+
for i, col_index in enumerate(allocated_cols):
|
214
|
+
col_cells = []
|
215
|
+
for cell in table.cells:
|
216
|
+
if cell.col <= (col_index + 1) < (cell.col + cell.col_span):
|
217
|
+
col_cells.append(cell)
|
218
|
+
|
219
|
+
word_point = table_words_vertical[i]["points"]
|
220
|
+
word_score = table_words_vertical[i]["score"]
|
221
|
+
|
222
|
+
for cell in col_cells:
|
223
|
+
word_box = quad_to_xyxy(word_point)
|
224
|
+
|
225
|
+
_, intersection = calc_overlap_ratio(
|
226
|
+
cell.box,
|
227
|
+
word_box,
|
228
|
+
)
|
229
|
+
|
230
|
+
if intersection is not None:
|
231
|
+
_, y1, _, y2 = intersection
|
232
|
+
|
233
|
+
new_point = [
|
234
|
+
[word_point[0][0], max(word_point[0][1], y1)],
|
235
|
+
[word_point[1][0], max(word_point[1][1], y1)],
|
236
|
+
[word_point[2][0], min(word_point[2][1], y2)],
|
237
|
+
[word_point[3][0], min(word_point[3][1], y2)],
|
238
|
+
]
|
239
|
+
|
240
|
+
if not is_noise(new_point):
|
241
|
+
new_points.append(new_point)
|
242
|
+
new_scores.append(word_score)
|
243
|
+
|
244
|
+
return new_points, new_scores
|
245
|
+
|
246
|
+
|
247
|
+
def _correct_horizontal_word_boxes(
|
248
|
+
overlap_ratios_horizontal, table, table_words_horizontal
|
249
|
+
):
|
250
|
+
allocated_rows = [rows.index(max(rows)) for rows in overlap_ratios_horizontal]
|
251
|
+
|
252
|
+
new_points = []
|
253
|
+
new_scores = []
|
254
|
+
for i, row_index in enumerate(allocated_rows):
|
255
|
+
row_cells = []
|
256
|
+
for cell in table.cells:
|
257
|
+
if cell.row <= (row_index + 1) < (cell.row + cell.row_span):
|
258
|
+
row_cells.append(cell)
|
259
|
+
|
260
|
+
word_point = table_words_horizontal[i]["points"]
|
261
|
+
word_score = table_words_horizontal[i]["score"]
|
262
|
+
|
263
|
+
for cell in row_cells:
|
264
|
+
word_box = quad_to_xyxy(word_point)
|
265
|
+
|
266
|
+
_, intersection = calc_overlap_ratio(
|
267
|
+
cell.box,
|
268
|
+
word_box,
|
269
|
+
)
|
270
|
+
|
271
|
+
if intersection is not None:
|
272
|
+
x1, _, x2, _ = intersection
|
273
|
+
|
274
|
+
new_point = [
|
275
|
+
[max(word_point[0][0], x1), word_point[0][1]],
|
276
|
+
[min(word_point[1][0], x2), word_point[1][1]],
|
277
|
+
[min(word_point[2][0], x2), word_point[2][1]],
|
278
|
+
[max(word_point[3][0], x1), word_point[3][1]],
|
279
|
+
]
|
280
|
+
|
281
|
+
if not is_noise(new_point):
|
282
|
+
new_points.append(new_point)
|
283
|
+
new_scores.append(word_score)
|
284
|
+
|
285
|
+
return new_points, new_scores
|
286
|
+
|
287
|
+
|
288
|
+
def _split_text_across_cells(results_det, results_layout):
|
289
|
+
check_list = [False] * len(results_det.points)
|
290
|
+
new_points = []
|
291
|
+
new_scores = []
|
292
|
+
for table in results_layout.tables:
|
293
|
+
table_words_horizontal, table_words_vertical, check_list = (
|
294
|
+
_extract_words_within_table(results_det, table, check_list)
|
295
|
+
)
|
296
|
+
|
297
|
+
overlap_ratios_horizontal = _calc_overlap_words_on_lines(
|
298
|
+
table.rows,
|
299
|
+
table_words_horizontal,
|
300
|
+
)
|
301
|
+
|
302
|
+
overlap_ratios_vertical = _calc_overlap_words_on_lines(
|
303
|
+
table.cols,
|
304
|
+
table_words_vertical,
|
305
|
+
)
|
306
|
+
|
307
|
+
new_points_horizontal, new_scores_horizontal = _correct_horizontal_word_boxes(
|
308
|
+
overlap_ratios_horizontal, table, table_words_horizontal
|
309
|
+
)
|
310
|
+
|
311
|
+
new_points_vertical, new_scores_vertical = _correct_vertical_word_boxes(
|
312
|
+
overlap_ratios_vertical, table, table_words_vertical
|
313
|
+
)
|
314
|
+
|
315
|
+
new_points.extend(new_points_horizontal)
|
316
|
+
new_scores.extend(new_scores_horizontal)
|
317
|
+
new_points.extend(new_points_vertical)
|
318
|
+
new_scores.extend(new_scores_vertical)
|
319
|
+
|
320
|
+
for i, flag in enumerate(check_list):
|
321
|
+
if not flag:
|
322
|
+
new_points.append(results_det.points[i])
|
323
|
+
new_scores.append(results_det.scores[i])
|
324
|
+
|
325
|
+
results_det.points = new_points
|
326
|
+
results_det.scores = new_scores
|
327
|
+
|
328
|
+
return results_det
|
329
|
+
|
330
|
+
|
151
331
|
class DocumentAnalyzer:
|
152
|
-
def __init__(self, configs=
|
332
|
+
def __init__(self, configs={}, device="cuda", visualize=False):
|
153
333
|
default_configs = {
|
154
334
|
"ocr": {
|
155
335
|
"text_detector": {
|
@@ -180,8 +360,16 @@ class DocumentAnalyzer:
|
|
180
360
|
"configs must be a dict. See the https://kotaro-kinoshita.github.io/yomitoku-dev/usage/"
|
181
361
|
)
|
182
362
|
|
183
|
-
self.
|
184
|
-
|
363
|
+
self.text_detector = TextDetector(
|
364
|
+
**default_configs["ocr"]["text_detector"],
|
365
|
+
)
|
366
|
+
self.text_recognizer = TextRecognizer(
|
367
|
+
**default_configs["ocr"]["text_recognizer"]
|
368
|
+
)
|
369
|
+
|
370
|
+
self.layout = LayoutAnalyzer(
|
371
|
+
configs=default_configs["layout_analyzer"],
|
372
|
+
)
|
185
373
|
self.visualize = visualize
|
186
374
|
|
187
375
|
def aggregate(self, ocr_res, layout_res):
|
@@ -286,16 +474,31 @@ class DocumentAnalyzer:
|
|
286
474
|
with ThreadPoolExecutor(max_workers=2) as executor:
|
287
475
|
loop = asyncio.get_running_loop()
|
288
476
|
tasks = [
|
289
|
-
loop.run_in_executor(executor, self.ocr, img),
|
477
|
+
# loop.run_in_executor(executor, self.ocr, img),
|
478
|
+
loop.run_in_executor(executor, self.text_detector, img),
|
290
479
|
loop.run_in_executor(executor, self.layout, img),
|
291
480
|
]
|
292
481
|
|
293
482
|
results = await asyncio.gather(*tasks)
|
294
483
|
|
295
|
-
|
484
|
+
results_det, _ = results[0]
|
296
485
|
results_layout, layout = results[1]
|
297
486
|
|
298
|
-
|
487
|
+
results_det = _split_text_across_cells(results_det, results_layout)
|
488
|
+
|
489
|
+
vis_det = None
|
490
|
+
if self.visualize:
|
491
|
+
vis_det = det_visualizer(
|
492
|
+
img,
|
493
|
+
results_det.points,
|
494
|
+
)
|
495
|
+
|
496
|
+
results_rec, ocr = self.text_recognizer(img, results_det.points, vis_det)
|
497
|
+
|
498
|
+
outputs = {"words": ocr_aggregate(results_det, results_rec)}
|
499
|
+
results_ocr = OCRSchema(**outputs)
|
500
|
+
outputs = self.aggregate(results_ocr, results_layout)
|
501
|
+
|
299
502
|
results = DocumentAnalyzerSchema(**outputs)
|
300
503
|
return results, ocr, layout
|
301
504
|
|
yomitoku/export/export_csv.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
import csv
|
2
|
+
import cv2
|
3
|
+
import os
|
2
4
|
|
3
5
|
|
4
6
|
def table_to_csv(table, ignore_line_break):
|
@@ -33,7 +35,34 @@ def paragraph_to_csv(paragraph, ignore_line_break):
|
|
33
35
|
return contents
|
34
36
|
|
35
37
|
|
36
|
-
def
|
38
|
+
def save_figure(
|
39
|
+
figures,
|
40
|
+
img,
|
41
|
+
out_path,
|
42
|
+
figure_dir="figures",
|
43
|
+
):
|
44
|
+
for i, figure in enumerate(figures):
|
45
|
+
x1, y1, x2, y2 = map(int, figure.box)
|
46
|
+
figure_img = img[y1:y2, x1:x2, :]
|
47
|
+
save_dir = os.path.dirname(out_path)
|
48
|
+
save_dir = os.path.join(save_dir, figure_dir)
|
49
|
+
os.makedirs(save_dir, exist_ok=True)
|
50
|
+
|
51
|
+
filename = os.path.splitext(os.path.basename(out_path))[0]
|
52
|
+
figure_name = f"{filename}_figure_{i}.png"
|
53
|
+
figure_path = os.path.join(save_dir, figure_name)
|
54
|
+
cv2.imwrite(figure_path, figure_img)
|
55
|
+
|
56
|
+
|
57
|
+
def export_csv(
|
58
|
+
inputs,
|
59
|
+
out_path: str,
|
60
|
+
ignore_line_break: bool = False,
|
61
|
+
encoding: str = "utf-8",
|
62
|
+
img=None,
|
63
|
+
export_figure: bool = True,
|
64
|
+
figure_dir="figures",
|
65
|
+
):
|
37
66
|
elements = []
|
38
67
|
for table in inputs.tables:
|
39
68
|
table_csv = table_to_csv(table, ignore_line_break)
|
@@ -58,9 +87,17 @@ def export_csv(inputs, out_path: str, ignore_line_break: bool = False):
|
|
58
87
|
}
|
59
88
|
)
|
60
89
|
|
90
|
+
if export_figure:
|
91
|
+
save_figure(
|
92
|
+
inputs.figures,
|
93
|
+
img,
|
94
|
+
out_path,
|
95
|
+
figure_dir=figure_dir,
|
96
|
+
)
|
97
|
+
|
61
98
|
elements = sorted(elements, key=lambda x: x["order"])
|
62
99
|
|
63
|
-
with open(out_path, "w", newline="", encoding="
|
100
|
+
with open(out_path, "w", newline="", encoding=encoding, errors="ignore") as f:
|
64
101
|
writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
65
102
|
for element in elements:
|
66
103
|
if element["type"] == "table":
|
yomitoku/export/export_html.py
CHANGED
@@ -154,6 +154,7 @@ def export_html(
|
|
154
154
|
img=None,
|
155
155
|
figure_width=200,
|
156
156
|
figure_dir="figures",
|
157
|
+
encoding: str = "utf-8",
|
157
158
|
):
|
158
159
|
html_string = ""
|
159
160
|
elements = []
|
@@ -184,5 +185,5 @@ def export_html(
|
|
184
185
|
parsed_html = html.fromstring(html_string)
|
185
186
|
formatted_html = etree.tostring(parsed_html, pretty_print=True, encoding="unicode")
|
186
187
|
|
187
|
-
with open(out_path, "w", encoding="
|
188
|
+
with open(out_path, "w", encoding=encoding, errors="ignore") as f:
|
188
189
|
f.write(formatted_html)
|
yomitoku/export/export_json.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
import json
|
2
2
|
|
3
|
+
import cv2
|
4
|
+
import os
|
5
|
+
|
3
6
|
|
4
7
|
def paragraph_to_json(paragraph, ignore_line_break):
|
5
8
|
if ignore_line_break:
|
@@ -12,7 +15,34 @@ def table_to_json(table, ignore_line_break):
|
|
12
15
|
cell.contents = cell.contents.replace("\n", "")
|
13
16
|
|
14
17
|
|
15
|
-
def
|
18
|
+
def save_figure(
|
19
|
+
figures,
|
20
|
+
img,
|
21
|
+
out_path,
|
22
|
+
figure_dir="figures",
|
23
|
+
):
|
24
|
+
for i, figure in enumerate(figures):
|
25
|
+
x1, y1, x2, y2 = map(int, figure.box)
|
26
|
+
figure_img = img[y1:y2, x1:x2, :]
|
27
|
+
save_dir = os.path.dirname(out_path)
|
28
|
+
save_dir = os.path.join(save_dir, figure_dir)
|
29
|
+
os.makedirs(save_dir, exist_ok=True)
|
30
|
+
|
31
|
+
filename = os.path.splitext(os.path.basename(out_path))[0]
|
32
|
+
figure_name = f"{filename}_figure_{i}.png"
|
33
|
+
figure_path = os.path.join(save_dir, figure_name)
|
34
|
+
cv2.imwrite(figure_path, figure_img)
|
35
|
+
|
36
|
+
|
37
|
+
def export_json(
|
38
|
+
inputs,
|
39
|
+
out_path,
|
40
|
+
ignore_line_break=False,
|
41
|
+
encoding: str = "utf-8",
|
42
|
+
img=None,
|
43
|
+
export_figure=False,
|
44
|
+
figure_dir="figures",
|
45
|
+
):
|
16
46
|
from yomitoku.document_analyzer import DocumentAnalyzerSchema
|
17
47
|
|
18
48
|
if isinstance(inputs, DocumentAnalyzerSchema):
|
@@ -23,7 +53,15 @@ def export_json(inputs, out_path, ignore_line_break=False):
|
|
23
53
|
for paragraph in inputs.paragraphs:
|
24
54
|
paragraph_to_json(paragraph, ignore_line_break)
|
25
55
|
|
26
|
-
|
56
|
+
if export_figure:
|
57
|
+
save_figure(
|
58
|
+
inputs.figures,
|
59
|
+
img,
|
60
|
+
out_path,
|
61
|
+
figure_dir=figure_dir,
|
62
|
+
)
|
63
|
+
|
64
|
+
with open(out_path, "w", encoding=encoding, errors="ignore") as f:
|
27
65
|
json.dump(
|
28
66
|
inputs.model_dump(),
|
29
67
|
f,
|
@@ -117,6 +117,7 @@ def export_markdown(
|
|
117
117
|
export_figure=True,
|
118
118
|
figure_width=200,
|
119
119
|
figure_dir="figures",
|
120
|
+
encoding: str = "utf-8",
|
120
121
|
):
|
121
122
|
elements = []
|
122
123
|
for table in inputs.tables:
|
@@ -141,5 +142,5 @@ def export_markdown(
|
|
141
142
|
elements = sorted(elements, key=lambda x: x["order"])
|
142
143
|
markdown = "\n".join([element["md"] for element in elements])
|
143
144
|
|
144
|
-
with open(out_path, "w", encoding="
|
145
|
+
with open(out_path, "w", encoding=encoding, errors="ignore") as f:
|
145
146
|
f.write(markdown)
|
yomitoku/layout_analyzer.py
CHANGED
@@ -15,7 +15,7 @@ class LayoutAnalyzerSchema(BaseSchema):
|
|
15
15
|
|
16
16
|
|
17
17
|
class LayoutAnalyzer:
|
18
|
-
def __init__(self, configs=
|
18
|
+
def __init__(self, configs={}, device="cuda", visualize=False):
|
19
19
|
layout_parser_kwargs = {
|
20
20
|
"device": device,
|
21
21
|
"visualize": visualize,
|
@@ -26,10 +26,6 @@ class LayoutAnalyzer:
|
|
26
26
|
}
|
27
27
|
|
28
28
|
if isinstance(configs, dict):
|
29
|
-
assert (
|
30
|
-
"layout_parser" in configs or "table_structure_recognizer" in configs
|
31
|
-
), "Invalid config key. Please check the config keys."
|
32
|
-
|
33
29
|
if "layout_parser" in configs:
|
34
30
|
layout_parser_kwargs.update(configs["layout_parser"])
|
35
31
|
|
yomitoku/layout_parser.py
CHANGED
@@ -104,7 +104,6 @@ class LayoutParser(BaseModule):
|
|
104
104
|
self.visualize = visualize
|
105
105
|
|
106
106
|
self.model.eval()
|
107
|
-
self.model.to(self.device)
|
108
107
|
|
109
108
|
self.postprocessor = RTDETRPostProcessor(
|
110
109
|
num_classes=self._cfg.RTDETRTransformerv2.num_classes,
|
@@ -132,6 +131,8 @@ class LayoutParser(BaseModule):
|
|
132
131
|
if not os.path.exists(path_onnx):
|
133
132
|
self.convert_onnx(path_onnx)
|
134
133
|
|
134
|
+
self.model = None
|
135
|
+
|
135
136
|
model = onnx.load(path_onnx)
|
136
137
|
if torch.cuda.is_available() and device == "cuda":
|
137
138
|
self.sess = onnxruntime.InferenceSession(
|
@@ -140,6 +141,9 @@ class LayoutParser(BaseModule):
|
|
140
141
|
else:
|
141
142
|
self.sess = onnxruntime.InferenceSession(model.SerializeToString())
|
142
143
|
|
144
|
+
if self.model is not None:
|
145
|
+
self.model.to(self.device)
|
146
|
+
|
143
147
|
def convert_onnx(self, path_onnx):
|
144
148
|
dynamic_axes = {
|
145
149
|
"input": {0: "batch_size"},
|
yomitoku/ocr.py
CHANGED
@@ -16,16 +16,37 @@ class WordPrediction(BaseSchema):
|
|
16
16
|
)
|
17
17
|
content: str
|
18
18
|
direction: str
|
19
|
-
det_score: float
|
20
19
|
rec_score: float
|
20
|
+
det_score: float
|
21
21
|
|
22
22
|
|
23
23
|
class OCRSchema(BaseSchema):
|
24
24
|
words: List[WordPrediction]
|
25
25
|
|
26
26
|
|
27
|
+
def ocr_aggregate(det_outputs, rec_outputs):
|
28
|
+
words = []
|
29
|
+
for points, det_score, pred, rec_score, direction in zip(
|
30
|
+
det_outputs.points,
|
31
|
+
det_outputs.scores,
|
32
|
+
rec_outputs.contents,
|
33
|
+
rec_outputs.scores,
|
34
|
+
rec_outputs.directions,
|
35
|
+
):
|
36
|
+
words.append(
|
37
|
+
{
|
38
|
+
"points": points,
|
39
|
+
"content": pred,
|
40
|
+
"direction": direction,
|
41
|
+
"det_score": det_score,
|
42
|
+
"rec_score": rec_score,
|
43
|
+
}
|
44
|
+
)
|
45
|
+
return words
|
46
|
+
|
47
|
+
|
27
48
|
class OCR:
|
28
|
-
def __init__(self, configs=
|
49
|
+
def __init__(self, configs={}, device="cuda", visualize=False):
|
29
50
|
text_detector_kwargs = {
|
30
51
|
"device": device,
|
31
52
|
"visualize": visualize,
|
@@ -36,10 +57,6 @@ class OCR:
|
|
36
57
|
}
|
37
58
|
|
38
59
|
if isinstance(configs, dict):
|
39
|
-
assert (
|
40
|
-
"text_detector" in configs or "text_recognizer" in configs
|
41
|
-
), "Invalid config key. Please check the config keys."
|
42
|
-
|
43
60
|
if "text_detector" in configs:
|
44
61
|
text_detector_kwargs.update(configs["text_detector"])
|
45
62
|
if "text_recognizer" in configs:
|
@@ -52,26 +69,6 @@ class OCR:
|
|
52
69
|
self.detector = TextDetector(**text_detector_kwargs)
|
53
70
|
self.recognizer = TextRecognizer(**text_recognizer_kwargs)
|
54
71
|
|
55
|
-
def aggregate(self, det_outputs, rec_outputs):
|
56
|
-
words = []
|
57
|
-
for points, det_score, pred, rec_score, direction in zip(
|
58
|
-
det_outputs.points,
|
59
|
-
det_outputs.scores,
|
60
|
-
rec_outputs.contents,
|
61
|
-
rec_outputs.scores,
|
62
|
-
rec_outputs.directions,
|
63
|
-
):
|
64
|
-
words.append(
|
65
|
-
{
|
66
|
-
"points": points,
|
67
|
-
"content": pred,
|
68
|
-
"direction": direction,
|
69
|
-
"det_score": det_score,
|
70
|
-
"rec_score": rec_score,
|
71
|
-
}
|
72
|
-
)
|
73
|
-
return words
|
74
|
-
|
75
72
|
def __call__(self, img):
|
76
73
|
"""_summary_
|
77
74
|
|
@@ -82,6 +79,6 @@ class OCR:
|
|
82
79
|
det_outputs, vis = self.detector(img)
|
83
80
|
rec_outputs, vis = self.recognizer(img, det_outputs.points, vis=vis)
|
84
81
|
|
85
|
-
outputs = {"words":
|
82
|
+
outputs = {"words": ocr_aggregate(det_outputs, rec_outputs)}
|
86
83
|
results = OCRSchema(**outputs)
|
87
84
|
return results, vis
|
@@ -35,10 +35,17 @@ class TableCellSchema(BaseSchema):
|
|
35
35
|
contents: Union[str, None]
|
36
36
|
|
37
37
|
|
38
|
+
class TableLineSchema(BaseSchema):
|
39
|
+
box: conlist(int, min_length=4, max_length=4)
|
40
|
+
score: float
|
41
|
+
|
42
|
+
|
38
43
|
class TableStructureRecognizerSchema(BaseSchema):
|
39
44
|
box: conlist(int, min_length=4, max_length=4)
|
40
45
|
n_row: int
|
41
46
|
n_col: int
|
47
|
+
rows: List[TableLineSchema]
|
48
|
+
cols: List[TableLineSchema]
|
42
49
|
cells: List[TableCellSchema]
|
43
50
|
order: int
|
44
51
|
|
@@ -133,8 +140,6 @@ class TableStructureRecognizer(BaseModule):
|
|
133
140
|
num_top_queries=self._cfg.RTDETRTransformerv2.num_queries,
|
134
141
|
)
|
135
142
|
|
136
|
-
self.save_config("table_structure_recognitizer.yaml")
|
137
|
-
|
138
143
|
self.transforms = T.Compose(
|
139
144
|
[
|
140
145
|
T.Resize(self._cfg.data.img_size),
|
@@ -155,6 +160,8 @@ class TableStructureRecognizer(BaseModule):
|
|
155
160
|
if not os.path.exists(path_onnx):
|
156
161
|
self.convert_onnx(path_onnx)
|
157
162
|
|
163
|
+
self.model = None
|
164
|
+
|
158
165
|
model = onnx.load(path_onnx)
|
159
166
|
if torch.cuda.is_available() and device == "cuda":
|
160
167
|
self.sess = onnxruntime.InferenceSession(
|
@@ -163,6 +170,9 @@ class TableStructureRecognizer(BaseModule):
|
|
163
170
|
else:
|
164
171
|
self.sess = onnxruntime.InferenceSession(model.SerializeToString())
|
165
172
|
|
173
|
+
if self.model is not None:
|
174
|
+
self.model.to(self.device)
|
175
|
+
|
166
176
|
def convert_onnx(self, path_onnx):
|
167
177
|
dynamic_axes = {
|
168
178
|
"input": {0: "batch_size"},
|
@@ -232,7 +242,7 @@ class TableStructureRecognizer(BaseModule):
|
|
232
242
|
category_elements
|
233
243
|
)
|
234
244
|
|
235
|
-
cells,
|
245
|
+
cells, rows, cols = self.extract_cell_elements(category_elements)
|
236
246
|
|
237
247
|
table_x, table_y = data["offset"]
|
238
248
|
table_x2 = table_x + data["size"][1]
|
@@ -241,8 +251,10 @@ class TableStructureRecognizer(BaseModule):
|
|
241
251
|
|
242
252
|
table = {
|
243
253
|
"box": table_box,
|
244
|
-
"n_row":
|
245
|
-
"n_col":
|
254
|
+
"n_row": len(rows),
|
255
|
+
"n_col": len(cols),
|
256
|
+
"rows": rows,
|
257
|
+
"cols": cols,
|
246
258
|
"cells": cells,
|
247
259
|
"order": 0,
|
248
260
|
}
|
@@ -262,7 +274,10 @@ class TableStructureRecognizer(BaseModule):
|
|
262
274
|
cells = extract_cells(row_boxes, col_boxes)
|
263
275
|
cells = filter_contained_cells_within_spancell(cells, span_boxes)
|
264
276
|
|
265
|
-
|
277
|
+
rows = sorted(elements["row"], key=lambda x: x["box"][1])
|
278
|
+
cols = sorted(elements["col"], key=lambda x: x["box"][0])
|
279
|
+
|
280
|
+
return cells, rows, cols
|
266
281
|
|
267
282
|
def __call__(self, img, table_boxes, vis=None):
|
268
283
|
img_tensors = self.preprocess(img, table_boxes)
|
@@ -282,7 +297,9 @@ class TableStructureRecognizer(BaseModule):
|
|
282
297
|
pred = self.model(data["tensor"])
|
283
298
|
|
284
299
|
table = self.postprocess(pred, data)
|
285
|
-
|
300
|
+
|
301
|
+
if table.n_row > 0 and table.n_col > 0:
|
302
|
+
outputs.append(table)
|
286
303
|
|
287
304
|
if vis is None and self.visualize:
|
288
305
|
vis = img.copy()
|
yomitoku/text_detector.py
CHANGED
@@ -61,8 +61,6 @@ class TextDetector(BaseModule):
|
|
61
61
|
self.visualize = visualize
|
62
62
|
|
63
63
|
self.model.eval()
|
64
|
-
self.model.to(self.device)
|
65
|
-
|
66
64
|
self.post_processor = DBnetPostProcessor(**self._cfg.post_process)
|
67
65
|
self.infer_onnx = infer_onnx
|
68
66
|
|
@@ -72,6 +70,8 @@ class TextDetector(BaseModule):
|
|
72
70
|
if not os.path.exists(path_onnx):
|
73
71
|
self.convert_onnx(path_onnx)
|
74
72
|
|
73
|
+
self.model = None
|
74
|
+
|
75
75
|
model = onnx.load(path_onnx)
|
76
76
|
if torch.cuda.is_available() and device == "cuda":
|
77
77
|
self.sess = onnxruntime.InferenceSession(
|
@@ -80,6 +80,11 @@ class TextDetector(BaseModule):
|
|
80
80
|
else:
|
81
81
|
self.sess = onnxruntime.InferenceSession(model.SerializeToString())
|
82
82
|
|
83
|
+
self.model = None
|
84
|
+
|
85
|
+
if self.model is not None:
|
86
|
+
self.model.to(self.device)
|
87
|
+
|
83
88
|
def convert_onnx(self, path_onnx):
|
84
89
|
dynamic_axes = {
|
85
90
|
"input": {0: "batch_size", 2: "height", 3: "width"},
|
@@ -138,9 +143,9 @@ class TextDetector(BaseModule):
|
|
138
143
|
vis = None
|
139
144
|
if self.visualize:
|
140
145
|
vis = det_visualizer(
|
141
|
-
preds,
|
142
146
|
img,
|
143
147
|
quads,
|
148
|
+
preds=preds,
|
144
149
|
vis_heatmap=self._cfg.visualize.heatmap,
|
145
150
|
line_color=tuple(self._cfg.visualize.color[::-1]),
|
146
151
|
)
|
yomitoku/text_recognizer.py
CHANGED
@@ -64,7 +64,6 @@ class TextRecognizer(BaseModule):
|
|
64
64
|
|
65
65
|
self.model.tokenizer = self.tokenizer
|
66
66
|
self.model.eval()
|
67
|
-
self.model.to(self.device)
|
68
67
|
|
69
68
|
self.visualize = visualize
|
70
69
|
|
@@ -76,6 +75,8 @@ class TextRecognizer(BaseModule):
|
|
76
75
|
if not os.path.exists(path_onnx):
|
77
76
|
self.convert_onnx(path_onnx)
|
78
77
|
|
78
|
+
self.model = None
|
79
|
+
|
79
80
|
model = onnx.load(path_onnx)
|
80
81
|
if torch.cuda.is_available() and device == "cuda":
|
81
82
|
self.sess = onnxruntime.InferenceSession(
|
@@ -84,17 +85,31 @@ class TextRecognizer(BaseModule):
|
|
84
85
|
else:
|
85
86
|
self.sess = onnxruntime.InferenceSession(model.SerializeToString())
|
86
87
|
|
88
|
+
if self.model is not None:
|
89
|
+
self.model.to(self.device)
|
90
|
+
|
87
91
|
def preprocess(self, img, polygons):
|
88
92
|
dataset = ParseqDataset(self._cfg, img, polygons)
|
89
|
-
dataloader =
|
90
|
-
dataset,
|
91
|
-
batch_size=self._cfg.data.batch_size,
|
92
|
-
shuffle=False,
|
93
|
-
num_workers=self._cfg.data.num_workers,
|
94
|
-
)
|
93
|
+
dataloader = self._make_mini_batch(dataset)
|
95
94
|
|
96
95
|
return dataloader
|
97
96
|
|
97
|
+
def _make_mini_batch(self, dataset):
|
98
|
+
mini_batches = []
|
99
|
+
mini_batch = []
|
100
|
+
for data in dataset:
|
101
|
+
data = torch.unsqueeze(data, 0)
|
102
|
+
mini_batch.append(data)
|
103
|
+
|
104
|
+
if len(mini_batch) == self._cfg.data.batch_size:
|
105
|
+
mini_batches.append(torch.cat(mini_batch, 0))
|
106
|
+
mini_batch = []
|
107
|
+
else:
|
108
|
+
if len(mini_batch) > 0:
|
109
|
+
mini_batches.append(torch.cat(mini_batch, 0))
|
110
|
+
|
111
|
+
return mini_batches
|
112
|
+
|
98
113
|
def convert_onnx(self, path_onnx):
|
99
114
|
img_size = self._cfg.data.img_size
|
100
115
|
input = torch.randn(1, 3, *img_size, requires_grad=True)
|
yomitoku/utils/misc.py
CHANGED
@@ -9,6 +9,24 @@ def filter_by_flag(elements, flags):
|
|
9
9
|
return [element for element, flag in zip(elements, flags) if flag]
|
10
10
|
|
11
11
|
|
12
|
+
def calc_overlap_ratio(rect_a, rect_b):
|
13
|
+
intersection = calc_intersection(rect_a, rect_b)
|
14
|
+
if intersection is None:
|
15
|
+
return 0, None
|
16
|
+
|
17
|
+
ix1, iy1, ix2, iy2 = intersection
|
18
|
+
|
19
|
+
overlap_width = ix2 - ix1
|
20
|
+
overlap_height = iy2 - iy1
|
21
|
+
bx1, by1, bx2, by2 = rect_b
|
22
|
+
|
23
|
+
b_area = (bx2 - bx1) * (by2 - by1)
|
24
|
+
overlap_area = overlap_width * overlap_height
|
25
|
+
|
26
|
+
overlap_ratio = overlap_area / b_area
|
27
|
+
return overlap_ratio, intersection
|
28
|
+
|
29
|
+
|
12
30
|
def is_contained(rect_a, rect_b, threshold=0.8):
|
13
31
|
"""二つの矩形A, Bが与えられたとき、矩形Bが矩形Aに含まれるかどうかを判定する。
|
14
32
|
ずれを許容するため、重複率求め、thresholdを超える場合にTrueを返す。
|
@@ -23,20 +41,9 @@ def is_contained(rect_a, rect_b, threshold=0.8):
|
|
23
41
|
bool: 矩形Bが矩形Aに含まれる場合True
|
24
42
|
"""
|
25
43
|
|
26
|
-
|
27
|
-
if intersection is None:
|
28
|
-
return False
|
29
|
-
|
30
|
-
ix1, iy1, ix2, iy2 = intersection
|
31
|
-
|
32
|
-
overlap_width = ix2 - ix1
|
33
|
-
overlap_height = iy2 - iy1
|
34
|
-
bx1, by1, bx2, by2 = rect_b
|
35
|
-
|
36
|
-
b_area = (bx2 - bx1) * (by2 - by1)
|
37
|
-
overlap_area = overlap_width * overlap_height
|
44
|
+
overlap_ratio, _ = calc_overlap_ratio(rect_a, rect_b)
|
38
45
|
|
39
|
-
if
|
46
|
+
if overlap_ratio > threshold:
|
40
47
|
return True
|
41
48
|
|
42
49
|
return False
|
yomitoku/utils/visualizer.py
CHANGED
@@ -66,14 +66,14 @@ def reading_order_visualizer(
|
|
66
66
|
return out
|
67
67
|
|
68
68
|
|
69
|
-
def det_visualizer(
|
70
|
-
preds = preds["binary"][0]
|
71
|
-
binary = preds.detach().cpu().numpy()
|
69
|
+
def det_visualizer(img, quads, preds=None, vis_heatmap=False, line_color=(0, 255, 0)):
|
72
70
|
out = img.copy()
|
73
71
|
h, w = out.shape[:2]
|
74
|
-
binary = binary.squeeze(0)
|
75
|
-
binary = (binary * 255).astype(np.uint8)
|
76
72
|
if vis_heatmap:
|
73
|
+
preds = preds["binary"][0]
|
74
|
+
binary = preds.detach().cpu().numpy()
|
75
|
+
binary = binary.squeeze(0)
|
76
|
+
binary = (binary * 255).astype(np.uint8)
|
77
77
|
binary = cv2.resize(binary, (w, h), interpolation=cv2.INTER_LINEAR)
|
78
78
|
heatmap = cv2.applyColorMap(binary, cv2.COLORMAP_JET)
|
79
79
|
out = cv2.addWeighted(out, 0.5, heatmap, 0.5, 0)
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: yomitoku
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.0
|
4
4
|
Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
|
5
5
|
Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
|
6
6
|
License: CC BY-NC-SA 4.0
|
@@ -10,7 +10,6 @@ Requires-Dist: huggingface-hub>=0.26.1
|
|
10
10
|
Requires-Dist: lxml>=5.3.0
|
11
11
|
Requires-Dist: omegaconf>=2.3.0
|
12
12
|
Requires-Dist: onnx>=1.17.0
|
13
|
-
Requires-Dist: onnxruntime-gpu>=1.20.1
|
14
13
|
Requires-Dist: onnxruntime>=1.20.1
|
15
14
|
Requires-Dist: opencv-python>=4.10.0.84
|
16
15
|
Requires-Dist: pyclipper>=1.3.0.post6
|
@@ -20,6 +19,8 @@ Requires-Dist: shapely>=2.0.6
|
|
20
19
|
Requires-Dist: timm>=1.0.11
|
21
20
|
Requires-Dist: torch>=2.5.0
|
22
21
|
Requires-Dist: torchvision>=0.20.0
|
22
|
+
Provides-Extra: gpu
|
23
|
+
Requires-Dist: onnxruntime-gpu>=1.20.1; extra == 'gpu'
|
23
24
|
Description-Content-Type: text/markdown
|
24
25
|
|
25
26
|
日本語版 | [English](README_EN.md)
|
@@ -72,6 +73,11 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
|
|
72
73
|
pip install yomitoku
|
73
74
|
```
|
74
75
|
|
76
|
+
onnxruntimeの実行にGPUを使用する場合
|
77
|
+
```
|
78
|
+
pip install yomitoku[gpu]
|
79
|
+
```
|
80
|
+
|
75
81
|
- pytorch はご自身の CUDA のバージョンにあったものをインストールしてください。デフォルトでは CUDA12.4 以上に対応したものがインストールされます。
|
76
82
|
- pytorch は 2.5 以上のバージョンに対応しています。その関係で CUDA11.8 以上のバージョンが必要になります。対応できない場合は、リポジトリ内の Dockerfile を利用してください。
|
77
83
|
|
@@ -89,7 +95,8 @@ yomitoku ${path_data} -f md -o results -v --figure --lite
|
|
89
95
|
- `-d`, `--device` モデルを実行するためのデバイスを指定します。gpu が利用できない場合は cpu で推論が実行されます。(デフォルト: cuda)
|
90
96
|
- `--ignore_line_break` 画像の改行位置を無視して、段落内の文章を連結して返します。(デフォルト:画像通りの改行位置位置で改行します。)
|
91
97
|
- `--figure_letter` 検出した図表に含まれる文字も出力ファイルにエクスポートします。
|
92
|
-
- `--figure` 検出した図、画像を出力ファイルにエクスポートします。
|
98
|
+
- `--figure` 検出した図、画像を出力ファイルにエクスポートします。
|
99
|
+
- `--encoding` エクスポートする出力ファイルの文字エンコーディングを指定します。サポートされていない文字コードが含まれる場合は、その文字を無視します。(utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
|
93
100
|
|
94
101
|
その他のオプションに関しては、ヘルプを参照
|
95
102
|
|
@@ -1,16 +1,16 @@
|
|
1
1
|
yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
|
2
2
|
yomitoku/base.py,sha256=lzR_V8t87aRasmFdFwD-8KAeSahSTI3AZaEn6g8sOv8,3871
|
3
3
|
yomitoku/constants.py,sha256=zlW5QRc_u_F3C2RAgBFWyHJZexBnJT5N15GC-9d3iLo,686
|
4
|
-
yomitoku/document_analyzer.py,sha256=
|
5
|
-
yomitoku/layout_analyzer.py,sha256=
|
6
|
-
yomitoku/layout_parser.py,sha256=
|
7
|
-
yomitoku/ocr.py,sha256=
|
4
|
+
yomitoku/document_analyzer.py,sha256=85j93l-6rvvRZsL0FD7EQG--84ZLPiKoNm2CE1Ss8LM,16271
|
5
|
+
yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
|
6
|
+
yomitoku/layout_parser.py,sha256=V_mAkZxke1gwHfnxBFMTOJ8hnz2X_kfZu2lLiMd8cAs,7610
|
7
|
+
yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
|
8
8
|
yomitoku/reading_order.py,sha256=OfhOS9ttPDoPSuHrIRKyOzG19GGeRufbuSKDqhsohh4,6404
|
9
|
-
yomitoku/table_structure_recognizer.py,sha256=
|
10
|
-
yomitoku/text_detector.py,sha256=
|
11
|
-
yomitoku/text_recognizer.py,sha256=
|
9
|
+
yomitoku/table_structure_recognizer.py,sha256=Eam9t7OjW4a-UWk_dl-ylbOcinN_Te_ovuri2naldL0,9482
|
10
|
+
yomitoku/text_detector.py,sha256=XgqhtbNcJww2x3BrH8EFz45qC6kqPKCX9hsa-dzRoIA,4274
|
11
|
+
yomitoku/text_recognizer.py,sha256=LVMjy-PaGlDQqfJrjKX_7vOQXDyFg6FaCeIQIyWUJX8,5833
|
12
12
|
yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
yomitoku/cli/main.py,sha256=
|
13
|
+
yomitoku/cli/main.py,sha256=N0X4-z_jfFM5_buUpiLHHA68B5oPVVdmvwzXWn7qoUs,7822
|
14
14
|
yomitoku/configs/__init__.py,sha256=e1Alss5QJLZSNfD6zLEG6xu5vDQDw-4Jayiqq8bq52s,571
|
15
15
|
yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
|
16
16
|
yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py,sha256=o70GMHD8k-zeBeJtuhPS8x7vVB-ffucnJXeSyn-0AXo,2116
|
@@ -21,10 +21,10 @@ yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
|
|
21
21
|
yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
|
22
22
|
yomitoku/data/functions.py,sha256=eOyxo8S6EoAf1xGSPLWQFb9-t5Rg52NggD9MFIrOSpY,7506
|
23
23
|
yomitoku/export/__init__.py,sha256=aANEfuovH2aevFjb2pGrBLFP-4iRzEzD9wcriCR-M7I,229
|
24
|
-
yomitoku/export/export_csv.py,sha256
|
25
|
-
yomitoku/export/export_html.py,sha256=
|
26
|
-
yomitoku/export/export_json.py,sha256=
|
27
|
-
yomitoku/export/export_markdown.py,sha256=
|
24
|
+
yomitoku/export/export_csv.py,sha256=MzGS1Y6kiHo7vZV3heKkd_v5gdxJBrpa8Zt9gFMwG88,2869
|
25
|
+
yomitoku/export/export_html.py,sha256=ezj96wQNqkBOCUOIPHFJW_BCh1I4Ij_8RDiKUxqaFok,4913
|
26
|
+
yomitoku/export/export_json.py,sha256=Kz8MgWM0bd6SNaSiHZjs-IjhsvX19Y0ovlIxGcm1vIw,1910
|
27
|
+
yomitoku/export/export_markdown.py,sha256=w9jT-A0__4rw1PaeGtRicuLu1rqeZO-ZLwyJm5F5PXQ,4033
|
28
28
|
yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
|
29
29
|
yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
|
30
30
|
yomitoku/models/parseq.py,sha256=-DQMQuON2jwtb4Ib2V0O19un9w-WG4rXS0SiscydrXU,8593
|
@@ -46,9 +46,9 @@ yomitoku/resource/charset.txt,sha256=sU91kSi-9Wk4733bCXy4j_UDmvcsj96sHOq1ppUJlOY
|
|
46
46
|
yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
47
|
yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
|
48
48
|
yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
|
49
|
-
yomitoku/utils/misc.py,sha256=
|
50
|
-
yomitoku/utils/visualizer.py,sha256=
|
51
|
-
yomitoku-0.
|
52
|
-
yomitoku-0.
|
53
|
-
yomitoku-0.
|
54
|
-
yomitoku-0.
|
49
|
+
yomitoku/utils/misc.py,sha256=FbwPLeIYYBvNf9wQh2RoEonTM5BF7_IwaEqmRsYHKA8,2673
|
50
|
+
yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
|
51
|
+
yomitoku-0.7.0.dist-info/METADATA,sha256=Yvpxy_oWORSz_db4yzledIhFHbuQbORz0DrMisf59zQ,8488
|
52
|
+
yomitoku-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
53
|
+
yomitoku-0.7.0.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
|
54
|
+
yomitoku-0.7.0.dist-info/RECORD,,
|
File without changes
|