yomitoku 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yomitoku/base.py +1 -1
- yomitoku/cli/main.py +117 -8
- yomitoku/document_analyzer.py +26 -21
- yomitoku/export/__init__.py +14 -5
- yomitoku/export/export_csv.py +7 -1
- yomitoku/export/export_html.py +9 -5
- yomitoku/export/export_json.py +8 -2
- yomitoku/export/export_markdown.py +8 -1
- {yomitoku-0.7.0.dist-info → yomitoku-0.7.2.dist-info}/METADATA +5 -9
- {yomitoku-0.7.0.dist-info → yomitoku-0.7.2.dist-info}/RECORD +12 -12
- {yomitoku-0.7.0.dist-info → yomitoku-0.7.2.dist-info}/WHEEL +0 -0
- {yomitoku-0.7.0.dist-info → yomitoku-0.7.2.dist-info}/entry_points.txt +0 -0
yomitoku/base.py
CHANGED
yomitoku/cli/main.py
CHANGED
@@ -1,18 +1,65 @@
|
|
1
1
|
import argparse
|
2
2
|
import os
|
3
|
+
import time
|
3
4
|
from pathlib import Path
|
4
5
|
|
5
6
|
import cv2
|
6
|
-
import
|
7
|
+
import torch
|
7
8
|
|
8
9
|
from ..constants import SUPPORT_OUTPUT_FORMAT
|
9
10
|
from ..data.functions import load_image, load_pdf
|
10
11
|
from ..document_analyzer import DocumentAnalyzer
|
11
12
|
from ..utils.logger import set_logger
|
12
13
|
|
14
|
+
from ..export import save_csv, save_html, save_json, save_markdown
|
15
|
+
|
13
16
|
logger = set_logger(__name__, "INFO")
|
14
17
|
|
15
18
|
|
19
|
+
def merge_all_pages(results):
|
20
|
+
out = None
|
21
|
+
for result in results:
|
22
|
+
format = result["format"]
|
23
|
+
data = result["data"]
|
24
|
+
|
25
|
+
if format == "json":
|
26
|
+
if out is None:
|
27
|
+
out = [data]
|
28
|
+
else:
|
29
|
+
out.append(data)
|
30
|
+
|
31
|
+
elif format == "csv":
|
32
|
+
if out is None:
|
33
|
+
out = data
|
34
|
+
else:
|
35
|
+
out.extend(data)
|
36
|
+
|
37
|
+
elif format == "html":
|
38
|
+
if out is None:
|
39
|
+
out = data
|
40
|
+
else:
|
41
|
+
out += "\n" + data
|
42
|
+
|
43
|
+
elif format == "md":
|
44
|
+
if out is None:
|
45
|
+
out = data
|
46
|
+
else:
|
47
|
+
out += "\n" + data
|
48
|
+
|
49
|
+
return out
|
50
|
+
|
51
|
+
|
52
|
+
def save_merged_file(out_path, args, out):
|
53
|
+
if args.format == "json":
|
54
|
+
save_json(out_path, args.encoding, out)
|
55
|
+
elif args.format == "csv":
|
56
|
+
save_csv(out_path, args.encoding, out)
|
57
|
+
elif args.format == "html":
|
58
|
+
save_html(out_path, args.encoding, out)
|
59
|
+
elif args.format == "md":
|
60
|
+
save_markdown(out_path, args.encoding, out)
|
61
|
+
|
62
|
+
|
16
63
|
def validate_encoding(encoding):
|
17
64
|
if encoding not in [
|
18
65
|
"utf-8",
|
@@ -31,8 +78,9 @@ def process_single_file(args, analyzer, path, format):
|
|
31
78
|
else:
|
32
79
|
imgs = [load_image(path)]
|
33
80
|
|
81
|
+
results = []
|
34
82
|
for page, img in enumerate(imgs):
|
35
|
-
|
83
|
+
result, ocr, layout = analyzer(img)
|
36
84
|
dirname = path.parent.name
|
37
85
|
filename = path.stem
|
38
86
|
|
@@ -55,7 +103,7 @@ def process_single_file(args, analyzer, path, format):
|
|
55
103
|
out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
|
56
104
|
|
57
105
|
if format == "json":
|
58
|
-
|
106
|
+
json = result.to_json(
|
59
107
|
out_path,
|
60
108
|
ignore_line_break=args.ignore_line_break,
|
61
109
|
encoding=args.encoding,
|
@@ -63,8 +111,19 @@ def process_single_file(args, analyzer, path, format):
|
|
63
111
|
export_figure=args.figure,
|
64
112
|
figure_dir=args.figure_dir,
|
65
113
|
)
|
114
|
+
|
115
|
+
results.append(
|
116
|
+
{
|
117
|
+
"format": format,
|
118
|
+
"data": json,
|
119
|
+
}
|
120
|
+
)
|
121
|
+
|
122
|
+
if not args.combine:
|
123
|
+
save_json(out_path, args.encoding, json)
|
124
|
+
|
66
125
|
elif format == "csv":
|
67
|
-
|
126
|
+
csv = result.to_csv(
|
68
127
|
out_path,
|
69
128
|
ignore_line_break=args.ignore_line_break,
|
70
129
|
encoding=args.encoding,
|
@@ -72,8 +131,19 @@ def process_single_file(args, analyzer, path, format):
|
|
72
131
|
export_figure=args.figure,
|
73
132
|
figure_dir=args.figure_dir,
|
74
133
|
)
|
134
|
+
|
135
|
+
results.append(
|
136
|
+
{
|
137
|
+
"format": format,
|
138
|
+
"data": csv,
|
139
|
+
}
|
140
|
+
)
|
141
|
+
|
142
|
+
if not args.combine:
|
143
|
+
save_csv(out_path, args.encoding, csv)
|
144
|
+
|
75
145
|
elif format == "html":
|
76
|
-
|
146
|
+
html = result.to_html(
|
77
147
|
out_path,
|
78
148
|
ignore_line_break=args.ignore_line_break,
|
79
149
|
img=img,
|
@@ -83,8 +153,19 @@ def process_single_file(args, analyzer, path, format):
|
|
83
153
|
figure_dir=args.figure_dir,
|
84
154
|
encoding=args.encoding,
|
85
155
|
)
|
156
|
+
|
157
|
+
results.append(
|
158
|
+
{
|
159
|
+
"format": format,
|
160
|
+
"data": html,
|
161
|
+
}
|
162
|
+
)
|
163
|
+
|
164
|
+
if not args.combine:
|
165
|
+
save_html(out_path, args.encoding, html)
|
166
|
+
|
86
167
|
elif format == "md":
|
87
|
-
|
168
|
+
md = result.to_markdown(
|
88
169
|
out_path,
|
89
170
|
ignore_line_break=args.ignore_line_break,
|
90
171
|
img=img,
|
@@ -95,7 +176,24 @@ def process_single_file(args, analyzer, path, format):
|
|
95
176
|
encoding=args.encoding,
|
96
177
|
)
|
97
178
|
|
98
|
-
|
179
|
+
results.append(
|
180
|
+
{
|
181
|
+
"format": format,
|
182
|
+
"data": md,
|
183
|
+
}
|
184
|
+
)
|
185
|
+
|
186
|
+
if not args.combine:
|
187
|
+
save_markdown(out_path, args.encoding, md)
|
188
|
+
|
189
|
+
out = merge_all_pages(results)
|
190
|
+
if args.combine:
|
191
|
+
out_path = os.path.join(args.outdir, f"{dirname}_{filename}.{format}")
|
192
|
+
save_merged_file(
|
193
|
+
out_path,
|
194
|
+
args,
|
195
|
+
out,
|
196
|
+
)
|
99
197
|
|
100
198
|
|
101
199
|
def main():
|
@@ -195,6 +293,16 @@ def main():
|
|
195
293
|
default="utf-8",
|
196
294
|
help="Specifies the character encoding for the output file to be exported. If unsupported characters are included, they will be ignored.",
|
197
295
|
)
|
296
|
+
parser.add_argument(
|
297
|
+
"--combine",
|
298
|
+
action="store_true",
|
299
|
+
help="if set, merge all pages in the output",
|
300
|
+
)
|
301
|
+
parser.add_argument(
|
302
|
+
"--ignore_meta",
|
303
|
+
action="store_true",
|
304
|
+
help="if set, ignore meta information(header, footer) in the output",
|
305
|
+
)
|
198
306
|
|
199
307
|
args = parser.parse_args()
|
200
308
|
|
@@ -235,7 +343,7 @@ def main():
|
|
235
343
|
if args.lite:
|
236
344
|
configs["ocr"]["text_recognizer"]["model_name"] = "parseq-small"
|
237
345
|
|
238
|
-
if args.device == "cpu":
|
346
|
+
if args.device == "cpu" or not torch.cuda.is_available():
|
239
347
|
configs["ocr"]["text_detector"]["infer_onnx"] = True
|
240
348
|
|
241
349
|
# Note: Text Detector以外はONNX推論よりもPyTorch推論の方が速いため、ONNX推論は行わない
|
@@ -247,6 +355,7 @@ def main():
|
|
247
355
|
configs=configs,
|
248
356
|
visualize=args.vis,
|
249
357
|
device=args.device,
|
358
|
+
ignore_meta=args.ignore_meta,
|
250
359
|
)
|
251
360
|
|
252
361
|
os.makedirs(args.outdir, exist_ok=True)
|
yomitoku/document_analyzer.py
CHANGED
@@ -3,25 +3,19 @@ from concurrent.futures import ThreadPoolExecutor
|
|
3
3
|
from typing import List, Union
|
4
4
|
|
5
5
|
import numpy as np
|
6
|
-
|
7
6
|
from pydantic import conlist
|
8
7
|
|
8
|
+
from yomitoku.text_detector import TextDetector
|
9
|
+
from yomitoku.text_recognizer import TextRecognizer
|
10
|
+
|
9
11
|
from .base import BaseSchema
|
10
12
|
from .export import export_csv, export_html, export_markdown
|
11
13
|
from .layout_analyzer import LayoutAnalyzer
|
12
14
|
from .ocr import OCRSchema, WordPrediction, ocr_aggregate
|
13
15
|
from .reading_order import prediction_reading_order
|
14
16
|
from .table_structure_recognizer import TableStructureRecognizerSchema
|
15
|
-
from .utils.misc import
|
16
|
-
|
17
|
-
quad_to_xyxy,
|
18
|
-
calc_overlap_ratio,
|
19
|
-
)
|
20
|
-
from .utils.visualizer import reading_order_visualizer
|
21
|
-
from yomitoku.text_detector import TextDetector
|
22
|
-
from yomitoku.text_recognizer import TextRecognizer
|
23
|
-
|
24
|
-
from .utils.visualizer import det_visualizer
|
17
|
+
from .utils.misc import calc_overlap_ratio, is_contained, quad_to_xyxy
|
18
|
+
from .utils.visualizer import det_visualizer, reading_order_visualizer
|
25
19
|
|
26
20
|
|
27
21
|
class ParagraphSchema(BaseSchema):
|
@@ -47,13 +41,13 @@ class DocumentAnalyzerSchema(BaseSchema):
|
|
47
41
|
figures: List[FigureSchema]
|
48
42
|
|
49
43
|
def to_html(self, out_path: str, **kwargs):
|
50
|
-
export_html(self, out_path, **kwargs)
|
44
|
+
return export_html(self, out_path, **kwargs)
|
51
45
|
|
52
46
|
def to_markdown(self, out_path: str, **kwargs):
|
53
|
-
export_markdown(self, out_path, **kwargs)
|
47
|
+
return export_markdown(self, out_path, **kwargs)
|
54
48
|
|
55
49
|
def to_csv(self, out_path: str, **kwargs):
|
56
|
-
export_csv(self, out_path, **kwargs)
|
50
|
+
return export_csv(self, out_path, **kwargs)
|
57
51
|
|
58
52
|
|
59
53
|
def combine_flags(flag1, flag2):
|
@@ -127,7 +121,6 @@ def extract_words_within_element(pred_words, element):
|
|
127
121
|
if len(contained_words) == 0:
|
128
122
|
return None, None, check_list
|
129
123
|
|
130
|
-
element_direction = "horizontal"
|
131
124
|
word_direction = [word.direction for word in contained_words]
|
132
125
|
cnt_horizontal = word_direction.count("horizontal")
|
133
126
|
cnt_vertical = word_direction.count("vertical")
|
@@ -329,7 +322,13 @@ def _split_text_across_cells(results_det, results_layout):
|
|
329
322
|
|
330
323
|
|
331
324
|
class DocumentAnalyzer:
|
332
|
-
def __init__(
|
325
|
+
def __init__(
|
326
|
+
self,
|
327
|
+
configs={},
|
328
|
+
device="cuda",
|
329
|
+
visualize=False,
|
330
|
+
ignore_meta=False,
|
331
|
+
):
|
333
332
|
default_configs = {
|
334
333
|
"ocr": {
|
335
334
|
"text_detector": {
|
@@ -372,6 +371,8 @@ class DocumentAnalyzer:
|
|
372
371
|
)
|
373
372
|
self.visualize = visualize
|
374
373
|
|
374
|
+
self.ignore_meta = ignore_meta
|
375
|
+
|
375
376
|
def aggregate(self, ocr_res, layout_res):
|
376
377
|
paragraphs = []
|
377
378
|
check_list = [False] * len(ocr_res.words)
|
@@ -432,11 +433,15 @@ class DocumentAnalyzer:
|
|
432
433
|
page_direction = judge_page_direction(paragraphs)
|
433
434
|
|
434
435
|
headers = [
|
435
|
-
paragraph
|
436
|
+
paragraph
|
437
|
+
for paragraph in paragraphs
|
438
|
+
if paragraph.role == "page_header" and not self.ignore_meta
|
436
439
|
]
|
437
440
|
|
438
441
|
footers = [
|
439
|
-
paragraph
|
442
|
+
paragraph
|
443
|
+
for paragraph in paragraphs
|
444
|
+
if paragraph.role == "page_footer" and not self.ignore_meta
|
440
445
|
]
|
441
446
|
|
442
447
|
page_contents = [
|
@@ -504,9 +509,9 @@ class DocumentAnalyzer:
|
|
504
509
|
|
505
510
|
def __call__(self, img):
|
506
511
|
self.img = img
|
507
|
-
|
512
|
+
results, ocr, layout = asyncio.run(self.run(img))
|
508
513
|
|
509
514
|
if self.visualize:
|
510
|
-
layout = reading_order_visualizer(layout,
|
515
|
+
layout = reading_order_visualizer(layout, results)
|
511
516
|
|
512
|
-
return
|
517
|
+
return results, ocr, layout
|
yomitoku/export/__init__.py
CHANGED
@@ -1,6 +1,15 @@
|
|
1
|
-
from .export_csv import export_csv
|
2
|
-
from .export_html import export_html
|
3
|
-
from .export_json import export_json
|
4
|
-
from .export_markdown import export_markdown
|
1
|
+
from .export_csv import export_csv, save_csv
|
2
|
+
from .export_html import export_html, save_html
|
3
|
+
from .export_json import export_json, save_json
|
4
|
+
from .export_markdown import export_markdown, save_markdown
|
5
5
|
|
6
|
-
__all__ = [
|
6
|
+
__all__ = [
|
7
|
+
"export_html",
|
8
|
+
"export_markdown",
|
9
|
+
"export_csv",
|
10
|
+
"export_json",
|
11
|
+
"save_html",
|
12
|
+
"save_markdown",
|
13
|
+
"save_csv",
|
14
|
+
"save_json",
|
15
|
+
]
|
yomitoku/export/export_csv.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
import csv
|
2
|
-
import cv2
|
3
2
|
import os
|
4
3
|
|
4
|
+
import cv2
|
5
|
+
|
5
6
|
|
6
7
|
def table_to_csv(table, ignore_line_break):
|
7
8
|
num_rows = table.n_row
|
@@ -41,6 +42,8 @@ def save_figure(
|
|
41
42
|
out_path,
|
42
43
|
figure_dir="figures",
|
43
44
|
):
|
45
|
+
assert img is not None, "img is required for saving figures"
|
46
|
+
|
44
47
|
for i, figure in enumerate(figures):
|
45
48
|
x1, y1, x2, y2 = map(int, figure.box)
|
46
49
|
figure_img = img[y1:y2, x1:x2, :]
|
@@ -96,7 +99,10 @@ def export_csv(
|
|
96
99
|
)
|
97
100
|
|
98
101
|
elements = sorted(elements, key=lambda x: x["order"])
|
102
|
+
return elements
|
103
|
+
|
99
104
|
|
105
|
+
def save_csv(out_path, encoding, elements):
|
100
106
|
with open(out_path, "w", newline="", encoding=encoding, errors="ignore") as f:
|
101
107
|
writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
102
108
|
for element in elements:
|
yomitoku/export/export_html.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1
|
-
import re
|
2
1
|
import os
|
3
|
-
import
|
4
|
-
|
2
|
+
import re
|
5
3
|
from html import escape
|
6
4
|
|
5
|
+
import cv2
|
7
6
|
from lxml import etree, html
|
8
7
|
|
9
8
|
|
@@ -110,6 +109,8 @@ def figure_to_html(
|
|
110
109
|
figure_dir="figures",
|
111
110
|
width=200,
|
112
111
|
):
|
112
|
+
assert img is not None, "img is required for saving figures"
|
113
|
+
|
113
114
|
elements = []
|
114
115
|
for i, figure in enumerate(figures):
|
115
116
|
x1, y1, x2, y2 = map(int, figure.box)
|
@@ -180,10 +181,13 @@ def export_html(
|
|
180
181
|
elements = sorted(elements, key=lambda x: x["order"])
|
181
182
|
|
182
183
|
html_string = "".join([element["html"] for element in elements])
|
183
|
-
html_string = add_html_tag(html_string)
|
184
|
+
# html_string = add_html_tag(html_string)
|
184
185
|
|
185
186
|
parsed_html = html.fromstring(html_string)
|
186
187
|
formatted_html = etree.tostring(parsed_html, pretty_print=True, encoding="unicode")
|
188
|
+
return formatted_html
|
189
|
+
|
187
190
|
|
191
|
+
def save_html(out_path, encoding, html):
|
188
192
|
with open(out_path, "w", encoding=encoding, errors="ignore") as f:
|
189
|
-
f.write(
|
193
|
+
f.write(html)
|
yomitoku/export/export_json.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import json
|
2
|
+
import os
|
2
3
|
|
3
4
|
import cv2
|
4
|
-
import os
|
5
5
|
|
6
6
|
|
7
7
|
def paragraph_to_json(paragraph, ignore_line_break):
|
@@ -21,6 +21,8 @@ def save_figure(
|
|
21
21
|
out_path,
|
22
22
|
figure_dir="figures",
|
23
23
|
):
|
24
|
+
assert img is not None, "img is required for saving figures"
|
25
|
+
|
24
26
|
for i, figure in enumerate(figures):
|
25
27
|
x1, y1, x2, y2 = map(int, figure.box)
|
26
28
|
figure_img = img[y1:y2, x1:x2, :]
|
@@ -61,9 +63,13 @@ def export_json(
|
|
61
63
|
figure_dir=figure_dir,
|
62
64
|
)
|
63
65
|
|
66
|
+
return inputs.model_dump()
|
67
|
+
|
68
|
+
|
69
|
+
def save_json(out_path, encoding, data):
|
64
70
|
with open(out_path, "w", encoding=encoding, errors="ignore") as f:
|
65
71
|
json.dump(
|
66
|
-
|
72
|
+
data,
|
67
73
|
f,
|
68
74
|
ensure_ascii=False,
|
69
75
|
indent=4,
|
@@ -1,6 +1,7 @@
|
|
1
|
+
import os
|
1
2
|
import re
|
3
|
+
|
2
4
|
import cv2
|
3
|
-
import os
|
4
5
|
|
5
6
|
|
6
7
|
def escape_markdown_special_chars(text):
|
@@ -75,6 +76,8 @@ def figure_to_md(
|
|
75
76
|
width=200,
|
76
77
|
figure_dir="figures",
|
77
78
|
):
|
79
|
+
assert img is not None, "img is required for saving figures"
|
80
|
+
|
78
81
|
elements = []
|
79
82
|
for i, figure in enumerate(figures):
|
80
83
|
x1, y1, x2, y2 = map(int, figure.box)
|
@@ -142,5 +145,9 @@ def export_markdown(
|
|
142
145
|
elements = sorted(elements, key=lambda x: x["order"])
|
143
146
|
markdown = "\n".join([element["md"] for element in elements])
|
144
147
|
|
148
|
+
return markdown
|
149
|
+
|
150
|
+
|
151
|
+
def save_markdown(out_path, encoding, markdown):
|
145
152
|
with open(out_path, "w", encoding=encoding, errors="ignore") as f:
|
146
153
|
f.write(markdown)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: yomitoku
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.2
|
4
4
|
Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
|
5
5
|
Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
|
6
6
|
License: CC BY-NC-SA 4.0
|
@@ -19,8 +19,6 @@ Requires-Dist: shapely>=2.0.6
|
|
19
19
|
Requires-Dist: timm>=1.0.11
|
20
20
|
Requires-Dist: torch>=2.5.0
|
21
21
|
Requires-Dist: torchvision>=0.20.0
|
22
|
-
Provides-Extra: gpu
|
23
|
-
Requires-Dist: onnxruntime-gpu>=1.20.1; extra == 'gpu'
|
24
22
|
Description-Content-Type: text/markdown
|
25
23
|
|
26
24
|
日本語版 | [English](README_EN.md)
|
@@ -32,6 +30,7 @@ Description-Content-Type: text/markdown
|
|
32
30
|

|
33
31
|

|
34
32
|
[](https://kotaro-kinoshita.github.io/yomitoku/)
|
33
|
+
[](https://pepy.tech/projects/yomitoku)
|
35
34
|
|
36
35
|
## 🌟 概要
|
37
36
|
|
@@ -73,18 +72,13 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
|
|
73
72
|
pip install yomitoku
|
74
73
|
```
|
75
74
|
|
76
|
-
onnxruntimeの実行にGPUを使用する場合
|
77
|
-
```
|
78
|
-
pip install yomitoku[gpu]
|
79
|
-
```
|
80
|
-
|
81
75
|
- pytorch はご自身の CUDA のバージョンにあったものをインストールしてください。デフォルトでは CUDA12.4 以上に対応したものがインストールされます。
|
82
76
|
- pytorch は 2.5 以上のバージョンに対応しています。その関係で CUDA11.8 以上のバージョンが必要になります。対応できない場合は、リポジトリ内の Dockerfile を利用してください。
|
83
77
|
|
84
78
|
## 🚀 実行方法
|
85
79
|
|
86
80
|
```
|
87
|
-
yomitoku ${path_data} -f md -o results -v --figure
|
81
|
+
yomitoku ${path_data} -f md -o results -v --figure --lite
|
88
82
|
```
|
89
83
|
|
90
84
|
- `${path_data}` 解析対象の画像が含まれたディレクトリか画像ファイルのパスを直接して指定してください。ディレクトリを対象とした場合はディレクトリのサブディレクトリ内の画像も含めて処理を実行します。
|
@@ -97,6 +91,8 @@ yomitoku ${path_data} -f md -o results -v --figure --lite
|
|
97
91
|
- `--figure_letter` 検出した図表に含まれる文字も出力ファイルにエクスポートします。
|
98
92
|
- `--figure` 検出した図、画像を出力ファイルにエクスポートします。
|
99
93
|
- `--encoding` エクスポートする出力ファイルの文字エンコーディングを指定します。サポートされていない文字コードが含まれる場合は、その文字を無視します。(utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
|
94
|
+
- `--combine` PDFを入力に与えたときに、複数ページが含まれる場合に、それらの予測結果を一つのファイルに統合してエクスポートします。
|
95
|
+
- `--ignore_meta` 文章のheater, fotterなどの文字情報を出力ファイルに含めません。
|
100
96
|
|
101
97
|
その他のオプションに関しては、ヘルプを参照
|
102
98
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
|
2
|
-
yomitoku/base.py,sha256=
|
2
|
+
yomitoku/base.py,sha256=9U3sfe69O6vuO430JzzKQQNkgPsLM9WdLfOUUhp3Ljs,3878
|
3
3
|
yomitoku/constants.py,sha256=zlW5QRc_u_F3C2RAgBFWyHJZexBnJT5N15GC-9d3iLo,686
|
4
|
-
yomitoku/document_analyzer.py,sha256=
|
4
|
+
yomitoku/document_analyzer.py,sha256=wQMmXACDsDmyaxg2OnG9Og5Nx53WPUkQdUmgYtljACQ,16412
|
5
5
|
yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
|
6
6
|
yomitoku/layout_parser.py,sha256=V_mAkZxke1gwHfnxBFMTOJ8hnz2X_kfZu2lLiMd8cAs,7610
|
7
7
|
yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
|
@@ -10,7 +10,7 @@ yomitoku/table_structure_recognizer.py,sha256=Eam9t7OjW4a-UWk_dl-ylbOcinN_Te_ovu
|
|
10
10
|
yomitoku/text_detector.py,sha256=XgqhtbNcJww2x3BrH8EFz45qC6kqPKCX9hsa-dzRoIA,4274
|
11
11
|
yomitoku/text_recognizer.py,sha256=LVMjy-PaGlDQqfJrjKX_7vOQXDyFg6FaCeIQIyWUJX8,5833
|
12
12
|
yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
yomitoku/cli/main.py,sha256=
|
13
|
+
yomitoku/cli/main.py,sha256=WvQO9V5HzxxvRAIsGkrDl9OGrmaKsAbDBrg4ApCSy_c,10527
|
14
14
|
yomitoku/configs/__init__.py,sha256=e1Alss5QJLZSNfD6zLEG6xu5vDQDw-4Jayiqq8bq52s,571
|
15
15
|
yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
|
16
16
|
yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py,sha256=o70GMHD8k-zeBeJtuhPS8x7vVB-ffucnJXeSyn-0AXo,2116
|
@@ -20,11 +20,11 @@ yomitoku/configs/cfg_text_recognizer_parseq_small.py,sha256=uCm_VC_G79IbZpOiK8fg
|
|
20
20
|
yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
|
21
21
|
yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
|
22
22
|
yomitoku/data/functions.py,sha256=eOyxo8S6EoAf1xGSPLWQFb9-t5Rg52NggD9MFIrOSpY,7506
|
23
|
-
yomitoku/export/__init__.py,sha256=
|
24
|
-
yomitoku/export/export_csv.py,sha256=
|
25
|
-
yomitoku/export/export_html.py,sha256=
|
26
|
-
yomitoku/export/export_json.py,sha256=
|
27
|
-
yomitoku/export/export_markdown.py,sha256=
|
23
|
+
yomitoku/export/__init__.py,sha256=fkwOtqH0lh6eZQW5b4EMSjIH1FmWYLKKszahR-jQYSg,366
|
24
|
+
yomitoku/export/export_csv.py,sha256=B234jlNeO4n5kQ_lwxxAZe_O2ipTbeDYlWU1zyyaVrw,3001
|
25
|
+
yomitoku/export/export_html.py,sha256=pCLoxV10_SzRWmZlDnHuyfPFIuUGB3ZkqSdABVU7DTs,5038
|
26
|
+
yomitoku/export/export_json.py,sha256=D6dD04gcPR5lmfHFVX-iGOYapsOVaJ_kH1Qhs6d2O0M,2035
|
27
|
+
yomitoku/export/export_markdown.py,sha256=4mk_7P4kta6RguThnQUh7hesvZNLuXFnPlxhA951qVE,4171
|
28
28
|
yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
|
29
29
|
yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
|
30
30
|
yomitoku/models/parseq.py,sha256=-DQMQuON2jwtb4Ib2V0O19un9w-WG4rXS0SiscydrXU,8593
|
@@ -48,7 +48,7 @@ yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
|
|
48
48
|
yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
|
49
49
|
yomitoku/utils/misc.py,sha256=FbwPLeIYYBvNf9wQh2RoEonTM5BF7_IwaEqmRsYHKA8,2673
|
50
50
|
yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
|
51
|
-
yomitoku-0.7.
|
52
|
-
yomitoku-0.7.
|
53
|
-
yomitoku-0.7.
|
54
|
-
yomitoku-0.7.
|
51
|
+
yomitoku-0.7.2.dist-info/METADATA,sha256=4nyP_4zWDOxWgt7qfwRbgtlHL9xiv_FLzdw8YtfJNYU,8717
|
52
|
+
yomitoku-0.7.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
53
|
+
yomitoku-0.7.2.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
|
54
|
+
yomitoku-0.7.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|