yomitoku 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yomitoku/cli/main.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import argparse
2
2
  import os
3
+ import re
3
4
  import time
4
5
  from pathlib import Path
5
6
 
@@ -96,7 +97,7 @@ def process_single_file(args, analyzer, path, format):
96
97
  format_results = []
97
98
  for page, img in enumerate(imgs):
98
99
  result, ocr, layout = analyzer(img)
99
- dirname = path.parent.name
100
+ dirname = _sanitize_path_component(path.parent.name)
100
101
  filename = path.stem
101
102
 
102
103
  # cv2.imwrite(
@@ -471,5 +472,12 @@ def main():
471
472
  logger.info(f"Total Processing time: {end - start:.2f} sec")
472
473
 
473
474
 
475
+ def _sanitize_path_component(component):
476
+ if not component:
477
+ return component
478
+
479
+ return re.sub(r"^\.+", lambda m: "_" * len(m.group(0)), component)
480
+
481
+
474
482
  if __name__ == "__main__":
475
483
  main()
@@ -1,53 +1,17 @@
1
1
  import asyncio
2
2
  from concurrent.futures import ThreadPoolExecutor
3
- from typing import List, Union
4
3
 
5
4
  import numpy as np
6
- from pydantic import conlist
7
5
 
8
6
  from yomitoku.text_detector import TextDetector
9
7
  from yomitoku.text_recognizer import TextRecognizer
10
8
 
11
- from .base import BaseSchema
12
- from .export import export_csv, export_html, export_markdown
13
9
  from .layout_analyzer import LayoutAnalyzer
14
- from .ocr import OCRSchema, WordPrediction, ocr_aggregate
10
+ from .ocr import OCRSchema, ocr_aggregate
15
11
  from .reading_order import prediction_reading_order
16
- from .table_structure_recognizer import TableStructureRecognizerSchema
17
12
  from .utils.misc import calc_overlap_ratio, is_contained, quad_to_xyxy
18
13
  from .utils.visualizer import det_visualizer, reading_order_visualizer
19
-
20
-
21
- class ParagraphSchema(BaseSchema):
22
- box: conlist(int, min_length=4, max_length=4)
23
- contents: Union[str, None]
24
- direction: Union[str, None]
25
- order: Union[int, None]
26
- role: Union[str, None]
27
-
28
-
29
- class FigureSchema(BaseSchema):
30
- box: conlist(int, min_length=4, max_length=4)
31
- order: Union[int, None]
32
- paragraphs: List[ParagraphSchema]
33
- order: Union[int, None]
34
- direction: Union[str, None]
35
-
36
-
37
- class DocumentAnalyzerSchema(BaseSchema):
38
- paragraphs: List[ParagraphSchema]
39
- tables: List[TableStructureRecognizerSchema]
40
- words: List[WordPrediction]
41
- figures: List[FigureSchema]
42
-
43
- def to_html(self, out_path: str, **kwargs):
44
- return export_html(self, out_path, **kwargs)
45
-
46
- def to_markdown(self, out_path: str, **kwargs):
47
- return export_markdown(self, out_path, **kwargs)
48
-
49
- def to_csv(self, out_path: str, **kwargs):
50
- return export_csv(self, out_path, **kwargs)
14
+ from .schemas import ParagraphSchema, FigureSchema, DocumentAnalyzerSchema
51
15
 
52
16
 
53
17
  def combine_flags(flag1, flag2):
@@ -333,6 +297,7 @@ class DocumentAnalyzer:
333
297
  visualize=False,
334
298
  ignore_meta=False,
335
299
  reading_order="auto",
300
+ split_text_across_cells=False,
336
301
  ):
337
302
  default_configs = {
338
303
  "ocr": {
@@ -363,7 +328,7 @@ class DocumentAnalyzer:
363
328
  recursive_update(default_configs, configs)
364
329
  else:
365
330
  raise ValueError(
366
- "configs must be a dict. See the https://kotaro-kinoshita.github.io/yomitoku-dev/usage/"
331
+ "configs must be a dict. See the https://kotaro-kinoshita.github.io/yomitoku/module/#config"
367
332
  )
368
333
 
369
334
  self.text_detector = TextDetector(
@@ -379,6 +344,7 @@ class DocumentAnalyzer:
379
344
  self.visualize = visualize
380
345
 
381
346
  self.ignore_meta = ignore_meta
347
+ self.split_text_across_cells = split_text_across_cells
382
348
 
383
349
  def aggregate(self, ocr_res, layout_res):
384
350
  paragraphs = []
@@ -504,7 +470,8 @@ class DocumentAnalyzer:
504
470
  results_det, _ = results[0]
505
471
  results_layout, layout = results[1]
506
472
 
507
- results_det = _split_text_across_cells(results_det, results_layout)
473
+ if self.split_text_across_cells:
474
+ results_det = _split_text_across_cells(results_det, results_layout)
508
475
 
509
476
  vis_det = None
510
477
  if self.visualize:
@@ -1,17 +1,7 @@
1
- from typing import List
1
+ from .layout_parser import LayoutParser
2
+ from .table_structure_recognizer import TableStructureRecognizer
2
3
 
3
- from .base import BaseSchema
4
- from .layout_parser import Element, LayoutParser
5
- from .table_structure_recognizer import (
6
- TableStructureRecognizer,
7
- TableStructureRecognizerSchema,
8
- )
9
-
10
-
11
- class LayoutAnalyzerSchema(BaseSchema):
12
- paragraphs: List[Element]
13
- tables: List[TableStructureRecognizerSchema]
14
- figures: List[Element]
4
+ from .schemas import LayoutAnalyzerSchema
15
5
 
16
6
 
17
7
  class LayoutAnalyzer:
yomitoku/layout_parser.py CHANGED
@@ -1,5 +1,3 @@
1
- from typing import List, Union
2
-
3
1
  import cv2
4
2
  import os
5
3
  import onnx
@@ -7,28 +5,17 @@ import onnxruntime
7
5
  import torch
8
6
  import torchvision.transforms as T
9
7
  from PIL import Image
10
- from pydantic import conlist
11
8
 
12
9
  from .constants import ROOT_DIR
13
10
 
14
- from .base import BaseModelCatalog, BaseModule, BaseSchema
11
+ from .base import BaseModelCatalog, BaseModule
15
12
  from .configs import LayoutParserRTDETRv2Config, LayoutParserRTDETRv2V2Config
16
13
  from .models import RTDETRv2
17
14
  from .postprocessor import RTDETRPostProcessor
18
15
  from .utils.misc import filter_by_flag, is_contained
19
16
  from .utils.visualizer import layout_visualizer
20
17
 
21
-
22
- class Element(BaseSchema):
23
- box: conlist(int, min_length=4, max_length=4)
24
- score: float
25
- role: Union[str, None]
26
-
27
-
28
- class LayoutParserSchema(BaseSchema):
29
- paragraphs: List[Element]
30
- tables: List[Element]
31
- figures: List[Element]
18
+ from .schemas import LayoutParserSchema
32
19
 
33
20
 
34
21
  class LayoutParserModelCatalog(BaseModelCatalog):
yomitoku/ocr.py CHANGED
@@ -1,27 +1,6 @@
1
- from typing import List
2
-
3
- from pydantic import conlist
4
-
5
1
  from yomitoku.text_detector import TextDetector
6
2
  from yomitoku.text_recognizer import TextRecognizer
7
-
8
- from .base import BaseSchema
9
-
10
-
11
- class WordPrediction(BaseSchema):
12
- points: conlist(
13
- conlist(int, min_length=2, max_length=2),
14
- min_length=4,
15
- max_length=4,
16
- )
17
- content: str
18
- direction: str
19
- rec_score: float
20
- det_score: float
21
-
22
-
23
- class OCRSchema(BaseSchema):
24
- words: List[WordPrediction]
3
+ from .schemas import OCRSchema
25
4
 
26
5
 
27
6
  def ocr_aggregate(det_outputs, rec_outputs):
yomitoku/schemas.py ADDED
@@ -0,0 +1,241 @@
1
+ from typing import List, Union
2
+ from pydantic import conlist, Field
3
+
4
+ from .base import BaseSchema
5
+ from .export import export_csv, export_html, export_markdown, export_json
6
+
7
+
8
+ class Element(BaseSchema):
9
+ box: conlist(int, min_length=4, max_length=4) = Field(
10
+ ...,
11
+ description="Bounding box of the layout element in the format [x1, y1, x2, y2]",
12
+ )
13
+ score: float = Field(
14
+ ...,
15
+ description="Confidence score of the layout element detection",
16
+ )
17
+ role: Union[str, None] = Field(
18
+ ...,
19
+ description="Role of the element, e.g., ['section_headings', 'page_header', 'page_footer', 'list_item', 'caption', 'inline_formula', 'display_formula', 'index']",
20
+ )
21
+
22
+
23
+ class ParagraphSchema(BaseSchema):
24
+ box: conlist(int, min_length=4, max_length=4) = Field(
25
+ ...,
26
+ description="Bounding box of the paragraph in the format [x1, y1, x2, y2]",
27
+ )
28
+ contents: Union[str, None] = Field(
29
+ ...,
30
+ description="Text content of the paragraph",
31
+ )
32
+ direction: Union[str, None] = Field(
33
+ ...,
34
+ description="Text direction, e.g., ['horizontal' or 'vertical']",
35
+ )
36
+ order: Union[int, None] = Field(
37
+ ...,
38
+ description="Order of the paragraph in the document",
39
+ )
40
+ role: Union[str, None] = Field(
41
+ ...,
42
+ description="Role of the paragraph, e.g., ['section_headings', 'page_header', 'page_footer'])",
43
+ )
44
+
45
+
46
+ class TableCellSchema(BaseSchema):
47
+ col: int = Field(
48
+ ...,
49
+ description="Column index of the cell",
50
+ )
51
+ row: int = Field(
52
+ ...,
53
+ description="Row index of the cell",
54
+ )
55
+ col_span: int = Field(
56
+ ...,
57
+ description="Number of columns spanned by the cell",
58
+ )
59
+ row_span: int = Field(
60
+ ...,
61
+ description="Number of rows spanned by the cell",
62
+ )
63
+ box: conlist(int, min_length=4, max_length=4) = Field(
64
+ ...,
65
+ description="Bounding box of the cell in the format [x1, y1, x2, y2]",
66
+ )
67
+ contents: Union[str, None] = Field(
68
+ ...,
69
+ description="Text content of the cell",
70
+ )
71
+
72
+
73
+ class TableLineSchema(BaseSchema):
74
+ box: conlist(int, min_length=4, max_length=4) = Field(
75
+ ...,
76
+ description="Bounding box of the table line in the format [x1, y1, x2, y2]",
77
+ )
78
+ score: float = Field(
79
+ ...,
80
+ description="Confidence score of the table line detection",
81
+ )
82
+
83
+
84
+ class TableStructureRecognizerSchema(BaseSchema):
85
+ box: conlist(int, min_length=4, max_length=4) = Field(
86
+ ...,
87
+ description="Bounding box of the table in the format [x1, y1, x2, y2]",
88
+ )
89
+ n_row: int = Field(..., description="Number of rows in the table")
90
+ n_col: int = Field(..., description="Number of columns in the table")
91
+ rows: List[TableLineSchema] = Field(
92
+ ...,
93
+ description="List of table lines representing rows",
94
+ )
95
+ cols: List[TableLineSchema] = Field(
96
+ ...,
97
+ description="List of table lines representing columns",
98
+ )
99
+ spans: List[TableLineSchema] = Field(
100
+ ...,
101
+ description="List of table lines representing spans",
102
+ )
103
+ cells: List[TableCellSchema] = Field(
104
+ ...,
105
+ description="List of table cells",
106
+ )
107
+ order: int = Field(
108
+ ...,
109
+ description="Order of the table in the document",
110
+ )
111
+
112
+
113
+ class LayoutAnalyzerSchema(BaseSchema):
114
+ paragraphs: List[Element] = Field(
115
+ ...,
116
+ description="List of detected paragraphs",
117
+ )
118
+ tables: List[TableStructureRecognizerSchema] = Field(
119
+ ...,
120
+ description="List of detected tables",
121
+ )
122
+ figures: List[Element] = Field(
123
+ ...,
124
+ description="List of detected figures",
125
+ )
126
+
127
+
128
+ class WordPrediction(BaseSchema):
129
+ points: conlist(
130
+ conlist(int, min_length=2, max_length=2),
131
+ min_length=4,
132
+ max_length=4,
133
+ ) = Field(
134
+ ...,
135
+ description="Bounding box of the word in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]",
136
+ )
137
+ content: str = Field(..., description="Text content of the word")
138
+ direction: str = Field(
139
+ ..., description="Text direction, e.g., 'horizontal' or 'vertical'"
140
+ )
141
+ rec_score: float = Field(
142
+ ..., description="Confidence score of the word recognition"
143
+ )
144
+ det_score: float = Field(
145
+ ...,
146
+ description="Confidence score of the word detection",
147
+ )
148
+
149
+
150
+ class TextDetectorSchema(BaseSchema):
151
+ points: List[
152
+ conlist(
153
+ conlist(int, min_length=2, max_length=2),
154
+ min_length=4,
155
+ max_length=4,
156
+ )
157
+ ] = Field(
158
+ ...,
159
+ description="List of bounding boxes of detected text regions in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]",
160
+ )
161
+ scores: List[float] = Field(
162
+ ...,
163
+ description="List of confidence scores for each detected text region",
164
+ )
165
+
166
+
167
+ class OCRSchema(BaseSchema):
168
+ words: List[WordPrediction] = Field(
169
+ ...,
170
+ description="List of recognized words with their bounding boxes, content, direction, and scores",
171
+ )
172
+
173
+
174
+ class LayoutParserSchema(BaseSchema):
175
+ paragraphs: List[Element] = Field(..., description="List of detected paragraphs")
176
+ tables: List[Element] = Field(..., description="List of detected tables")
177
+ figures: List[Element] = Field(..., description="List of detected figures")
178
+
179
+
180
+ class FigureSchema(BaseSchema):
181
+ box: conlist(int, min_length=4, max_length=4) = Field(
182
+ ..., description="Bounding box of the figure in the format [x1, y1, x2, y2]"
183
+ )
184
+ order: Union[int, None] = Field(
185
+ ..., description="Order of the figure in the document"
186
+ )
187
+ paragraphs: List[ParagraphSchema] = Field(
188
+ ..., description="List of paragraphs associated with the figure"
189
+ )
190
+ order: Union[int, None] = Field(
191
+ ..., description="Order of the figure in the document"
192
+ )
193
+ direction: Union[str, None] = Field(
194
+ ..., description="Text direction, e.g., ['horizontal' or 'vertical']"
195
+ )
196
+
197
+
198
+ class DocumentAnalyzerSchema(BaseSchema):
199
+ paragraphs: List[ParagraphSchema] = Field(
200
+ ..., description="List of detected paragraphs"
201
+ )
202
+ tables: List[TableStructureRecognizerSchema] = Field(
203
+ ..., description="List of detected tables"
204
+ )
205
+ words: List[WordPrediction] = Field(..., description="List of recognized words")
206
+ figures: List[FigureSchema] = Field(..., description="List of detected figures")
207
+
208
+ def to_html(self, out_path: str, **kwargs):
209
+ return export_html(self, out_path, **kwargs)
210
+
211
+ def to_markdown(self, out_path: str, **kwargs):
212
+ return export_markdown(self, out_path, **kwargs)
213
+
214
+ def to_csv(self, out_path: str, **kwargs):
215
+ return export_csv(self, out_path, **kwargs)
216
+
217
+ def to_json(self, out_path: str, **kwargs):
218
+ return export_json(self, out_path, **kwargs)
219
+
220
+
221
+ class TextRecognizerSchema(BaseSchema):
222
+ contents: List[str] = Field(
223
+ ...,
224
+ description="List of recognized text contents",
225
+ )
226
+ directions: List[str] = Field(
227
+ ..., description="List of text directions, e.g., ['horizontal' or 'vertical']"
228
+ )
229
+ scores: List[float] = Field(
230
+ ..., description="List of confidence scores for each recognized text"
231
+ )
232
+ points: List[
233
+ conlist(
234
+ conlist(int, min_length=2, max_length=2),
235
+ min_length=4,
236
+ max_length=4,
237
+ )
238
+ ] = Field(
239
+ ...,
240
+ description="List of bounding boxes of recognized text in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]",
241
+ )
@@ -1,5 +1,3 @@
1
- from typing import List, Union
2
-
3
1
  import cv2
4
2
  import os
5
3
  import onnx
@@ -7,17 +5,17 @@ import onnxruntime
7
5
  import torch
8
6
  import torchvision.transforms as T
9
7
  from PIL import Image
10
- from pydantic import conlist
11
8
 
12
9
  from .constants import ROOT_DIR
13
10
 
14
- from .base import BaseModelCatalog, BaseModule, BaseSchema
11
+ from .base import BaseModelCatalog, BaseModule
15
12
  from .configs import TableStructureRecognizerRTDETRv2Config
16
13
  from .layout_parser import filter_contained_rectangles_within_category
17
14
  from .models import RTDETRv2
18
15
  from .postprocessor import RTDETRPostProcessor
19
16
  from .utils.misc import calc_intersection, filter_by_flag, is_contained
20
17
  from .utils.visualizer import table_visualizer
18
+ from .schemas import TableStructureRecognizerSchema
21
19
 
22
20
 
23
21
  class TableStructureRecognizerModelCatalog(BaseModelCatalog):
@@ -26,31 +24,6 @@ class TableStructureRecognizerModelCatalog(BaseModelCatalog):
26
24
  self.register("rtdetrv2", TableStructureRecognizerRTDETRv2Config, RTDETRv2)
27
25
 
28
26
 
29
- class TableCellSchema(BaseSchema):
30
- col: int
31
- row: int
32
- col_span: int
33
- row_span: int
34
- box: conlist(int, min_length=4, max_length=4)
35
- contents: Union[str, None]
36
-
37
-
38
- class TableLineSchema(BaseSchema):
39
- box: conlist(int, min_length=4, max_length=4)
40
- score: float
41
-
42
-
43
- class TableStructureRecognizerSchema(BaseSchema):
44
- box: conlist(int, min_length=4, max_length=4)
45
- n_row: int
46
- n_col: int
47
- rows: List[TableLineSchema]
48
- cols: List[TableLineSchema]
49
- cells: List[TableCellSchema]
50
- spans: List[TableLineSchema]
51
- order: int
52
-
53
-
54
27
  def extract_cells(row_boxes, col_boxes):
55
28
  cells = []
56
29
  for i, row_box in enumerate(row_boxes):
yomitoku/text_detector.py CHANGED
@@ -1,11 +1,8 @@
1
- from typing import List
2
-
3
1
  import numpy as np
4
2
  import torch
5
3
  import os
6
- from pydantic import conlist
7
4
 
8
- from .base import BaseModelCatalog, BaseModule, BaseSchema
5
+ from .base import BaseModelCatalog, BaseModule
9
6
  from .configs import (
10
7
  TextDetectorDBNetConfig,
11
8
  TextDetectorDBNetV2Config,
@@ -19,6 +16,7 @@ from .models import DBNet
19
16
  from .postprocessor import DBnetPostProcessor
20
17
  from .utils.visualizer import det_visualizer
21
18
  from .constants import ROOT_DIR
19
+ from .schemas import TextDetectorSchema
22
20
 
23
21
  import onnx
24
22
  import onnxruntime
@@ -31,17 +29,6 @@ class TextDetectorModelCatalog(BaseModelCatalog):
31
29
  self.register("dbnetv2", TextDetectorDBNetV2Config, DBNet)
32
30
 
33
31
 
34
- class TextDetectorSchema(BaseSchema):
35
- points: List[
36
- conlist(
37
- conlist(int, min_length=2, max_length=2),
38
- min_length=4,
39
- max_length=4,
40
- )
41
- ]
42
- scores: List[float]
43
-
44
-
45
32
  class TextDetector(BaseModule):
46
33
  model_catalog = TextDetectorModelCatalog()
47
34
 
@@ -1,12 +1,9 @@
1
- from typing import List
2
-
3
1
  import numpy as np
4
2
  import torch
5
3
  import os
6
4
  import unicodedata
7
- from pydantic import conlist
8
5
 
9
- from .base import BaseModelCatalog, BaseModule, BaseSchema
6
+ from .base import BaseModelCatalog, BaseModule
10
7
  from .configs import (
11
8
  TextRecognizerPARSeqConfig,
12
9
  TextRecognizerPARSeqSmallConfig,
@@ -19,6 +16,8 @@ from .utils.misc import load_charset
19
16
  from .utils.visualizer import rec_visualizer
20
17
 
21
18
  from .constants import ROOT_DIR
19
+ from .schemas import TextRecognizerSchema
20
+
22
21
  import onnx
23
22
  import onnxruntime
24
23
 
@@ -31,19 +30,6 @@ class TextRecognizerModelCatalog(BaseModelCatalog):
31
30
  self.register("parseq-small", TextRecognizerPARSeqSmallConfig, PARSeq)
32
31
 
33
32
 
34
- class TextRecognizerSchema(BaseSchema):
35
- contents: List[str]
36
- directions: List[str]
37
- scores: List[float]
38
- points: List[
39
- conlist(
40
- conlist(int, min_length=2, max_length=2),
41
- min_length=4,
42
- max_length=4,
43
- )
44
- ]
45
-
46
-
47
33
  class TextRecognizer(BaseModule):
48
34
  model_catalog = TextRecognizerModelCatalog()
49
35
 
@@ -94,10 +80,21 @@ class TextRecognizer(BaseModule):
94
80
  self.model.to(self.device)
95
81
 
96
82
  def preprocess(self, img, polygons):
83
+ if polygons is None:
84
+ h, w = img.shape[:2]
85
+ polygons = [
86
+ [
87
+ [0, 0],
88
+ [w, 0],
89
+ [w, h],
90
+ [0, h],
91
+ ]
92
+ ]
93
+
97
94
  dataset = ParseqDataset(self._cfg, img, polygons)
98
95
  dataloader = self._make_mini_batch(dataset)
99
96
 
100
- return dataloader
97
+ return dataloader, polygons
101
98
 
102
99
  def _make_mini_batch(self, dataset):
103
100
  mini_batches = []
@@ -150,7 +147,7 @@ class TextRecognizer(BaseModule):
150
147
 
151
148
  return pred, score, directions
152
149
 
153
- def __call__(self, img, points, vis=None):
150
+ def __call__(self, img, points=None, vis=None):
154
151
  """
155
152
  Apply the recognition model to the input image.
156
153
 
@@ -160,7 +157,7 @@ class TextRecognizer(BaseModule):
160
157
  vis (np.ndarray, optional): rendering image. Defaults to None.
161
158
  """
162
159
 
163
- dataloader = self.preprocess(img, points)
160
+ dataloader, points = self.preprocess(img, points)
164
161
  preds = []
165
162
  scores = []
166
163
  directions = []
@@ -10,17 +10,28 @@ logger = set_logger(__name__, "INFO")
10
10
  def _reading_order_visualizer(img, elements, line_color, tip_size):
11
11
  out = img.copy()
12
12
  for i, element in enumerate(elements):
13
- if i == 0:
14
- continue
15
-
16
- prev_element = elements[i - 1]
17
13
  cur_x1, cur_y1, cur_x2, cur_y2 = element.box
18
- prev_x1, prev_y1, prev_x2, prev_y2 = prev_element.box
19
14
 
20
15
  cur_center = (
21
16
  cur_x1 + (cur_x2 - cur_x1) / 2,
22
17
  cur_y1 + (cur_y2 - cur_y1) / 2,
23
18
  )
19
+
20
+ cv2.putText(
21
+ out,
22
+ str(i),
23
+ (int(cur_center[0]), int(cur_center[1])),
24
+ cv2.FONT_HERSHEY_SIMPLEX,
25
+ 1,
26
+ (0, 200, 0),
27
+ 2,
28
+ )
29
+
30
+ if i == 0:
31
+ continue
32
+
33
+ prev_element = elements[i - 1]
34
+ prev_x1, prev_y1, prev_x2, prev_y2 = prev_element.box
24
35
  prev_center = (
25
36
  prev_x1 + (prev_x2 - prev_x1) / 2,
26
37
  prev_y1 + (prev_y2 - prev_y1) / 2,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.9.4
3
+ Version: 0.10.0
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
@@ -41,7 +41,7 @@ Description-Content-Type: text/markdown
41
41
  YomiToku は日本語に特化した AI 文章画像解析エンジン(Document AI)です。画像内の文字の全文 OCR およびレイアウト解析機能を有しており、画像内の文字情報や図表を認識、抽出、変換します。
42
42
 
43
43
  - 🤖 日本語データセットで学習した 4 種類(文字位置の検知、文字列認識、レイアウト解析、表の構造認識)の AI モデルを搭載しています。4 種類のモデルはすべて独自に学習されたモデルで日本語文書に対して、高精度に推論可能です。
44
- - 🇯🇵 各モデルは日本語の文書画像に特化して学習されており、7000 文字を超える日本語文字の認識をサーポート、手書き文字、縦書きなど日本語特有のレイアウト構造の文書画像の解析も可能です。(日本語以外にも英語の文書に対しても対応しています)。
44
+ - 🇯🇵 各モデルは日本語の文書画像に特化して学習されており、7000 文字を超える日本語文字の認識をサポート、手書き文字、縦書きなど日本語特有のレイアウト構造の文書画像の解析も可能です。(日本語以外にも英語の文書に対しても対応しています)。
45
45
  - 📈 レイアウト解析、表の構造解析, 読み順推定機能により、文書画像のレイアウトの意味的構造を壊さずに情報を抽出することが可能です。
46
46
  - 📄 多様な出力形式をサポートしています。html やマークダウン、json、csv のいずれかのフォーマットに変換可能です。また、文書内に含まれる図表、画像の抽出の出力も可能です。文書画像をサーチャブルPDFに変換する処理もサポートしています。
47
47
  - ⚡ GPU 環境で高速に動作し、効率的に文書の文字起こし解析が可能です。また、VRAM も 8GB 以内で動作し、ハイエンドな GPU を用意する必要はありません。
@@ -1,16 +1,17 @@
1
1
  yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
2
2
  yomitoku/base.py,sha256=9U3sfe69O6vuO430JzzKQQNkgPsLM9WdLfOUUhp3Ljs,3878
3
3
  yomitoku/constants.py,sha256=2jya14UflDkMdYWMKc-ZllkWbJW2qh59Cnt2brrgNb4,693
4
- yomitoku/document_analyzer.py,sha256=xliAelQdfsK64FtVuFvstDBr9uf2TwhqW31g2g91_CY,16888
5
- yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
6
- yomitoku/layout_parser.py,sha256=0MgbCsD90srQdsxkGEL0TgKm4rkmGzsQYx0sjKQ03yc,7718
7
- yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
4
+ yomitoku/document_analyzer.py,sha256=FyF85m7k-BxzpOKb3sIfBRpxh_4NDPC7EC3x91hxoGo,15959
5
+ yomitoku/layout_analyzer.py,sha256=soLDcX09NlNicRYenOhFLgq8L8ct9xo7N9Hsj1IWKZw,1643
6
+ yomitoku/layout_parser.py,sha256=BSWiL8Xl7c0CY2CXNteLye5e-bLdR1hXKtps94kon9w,7440
7
+ yomitoku/ocr.py,sha256=gKWNciOQIgUcYrNmKhksSK8TSNisK8wY2zG2ZPXh2Fk,1920
8
8
  yomitoku/reading_order.py,sha256=_T09PqT7guk57zWo4HdSazLSQTwM91piyELA_wNHQAQ,7521
9
- yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK-ynm4ALxg,9625
10
- yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
11
- yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
9
+ yomitoku/schemas.py,sha256=azI9iVQ88-JPSuRmDVxCcdr2KNICJmLuMl0AQmfof-0,7582
10
+ yomitoku/table_structure_recognizer.py,sha256=UjYdzY-9dIClWP9iz0HCLr1DU2UY7n7Rtr7L9vOJwDU,9043
11
+ yomitoku/text_detector.py,sha256=gXofo7ywFsI3hNMKKfYoOwlYVDerJym2Zg_Eq7NNGv4,4136
12
+ yomitoku/text_recognizer.py,sha256=hS_spLnINVkMFOWm1bBG3WVfI7rK4o7ONt_nTUnpMLM,5969
12
13
  yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yomitoku/cli/main.py,sha256=v1UYsnQdnylhLvDURuxLODU3IU-ssVGqOJT9r-TCVns,13623
14
+ yomitoku/cli/main.py,sha256=s7wxBtgxPu7P-ARtjVmQlOosus3srlfS4-RuV0BFpyM,13821
14
15
  yomitoku/cli/mcp_server.py,sha256=WnWzxd13HaemC3b-5i9B9NVBGc3WGfum2nYhoBolEnk,5641
15
16
  yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
16
17
  yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
@@ -52,8 +53,8 @@ yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
52
53
  yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
53
54
  yomitoku/utils/misc.py,sha256=r92x45kQR8lC5jO1MZaHBDtcCWBkQXg_WS9H4RXJzSY,4127
54
55
  yomitoku/utils/searchable_pdf.py,sha256=taZ-XtXN4RItePMDv4q0fRVlryusdkexA3TCXzwlXRo,3497
55
- yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
56
- yomitoku-0.9.4.dist-info/METADATA,sha256=oDIp-lxMIQjIfVtrzQXBcY2PJFHlRwktVGFXndQRJZo,8872
57
- yomitoku-0.9.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
- yomitoku-0.9.4.dist-info/entry_points.txt,sha256=n3c8bQSj5Be5GHAOv_NZ8cldJFmWeigQxSmteFTmu_k,96
59
- yomitoku-0.9.4.dist-info/RECORD,,
56
+ yomitoku/utils/visualizer.py,sha256=ycC7SGuyXGGnX9KMJecdcEe1PWq30fG-EghB0E0EmWY,5468
57
+ yomitoku-0.10.0.dist-info/METADATA,sha256=Xd2cOvxpBFl-jSyGK61MLEwwC7CDEIEUIUAVk0L58tI,8870
58
+ yomitoku-0.10.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
+ yomitoku-0.10.0.dist-info/entry_points.txt,sha256=n3c8bQSj5Be5GHAOv_NZ8cldJFmWeigQxSmteFTmu_k,96
60
+ yomitoku-0.10.0.dist-info/RECORD,,