yomitoku 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yomitoku/base.py CHANGED
@@ -54,7 +54,7 @@ class BaseSchema(BaseModel):
54
54
  validate_assignment = True
55
55
 
56
56
  def to_json(self, out_path: str, **kwargs):
57
- export_json(self, out_path, **kwargs)
57
+ return export_json(self, out_path, **kwargs)
58
58
 
59
59
 
60
60
  class BaseModule:
yomitoku/cli/main.py CHANGED
@@ -1,19 +1,65 @@
1
1
  import argparse
2
2
  import os
3
- import torch
3
+ import time
4
4
  from pathlib import Path
5
5
 
6
6
  import cv2
7
- import time
7
+ import torch
8
8
 
9
9
  from ..constants import SUPPORT_OUTPUT_FORMAT
10
10
  from ..data.functions import load_image, load_pdf
11
11
  from ..document_analyzer import DocumentAnalyzer
12
12
  from ..utils.logger import set_logger
13
13
 
14
+ from ..export import save_csv, save_html, save_json, save_markdown
15
+
14
16
  logger = set_logger(__name__, "INFO")
15
17
 
16
18
 
19
+ def merge_all_pages(results):
20
+ out = None
21
+ for result in results:
22
+ format = result["format"]
23
+ data = result["data"]
24
+
25
+ if format == "json":
26
+ if out is None:
27
+ out = [data]
28
+ else:
29
+ out.append(data)
30
+
31
+ elif format == "csv":
32
+ if out is None:
33
+ out = data
34
+ else:
35
+ out.extend(data)
36
+
37
+ elif format == "html":
38
+ if out is None:
39
+ out = data
40
+ else:
41
+ out += "\n" + data
42
+
43
+ elif format == "md":
44
+ if out is None:
45
+ out = data
46
+ else:
47
+ out += "\n" + data
48
+
49
+ return out
50
+
51
+
52
+ def save_merged_file(out_path, args, out):
53
+ if args.format == "json":
54
+ save_json(out_path, args.encoding, out)
55
+ elif args.format == "csv":
56
+ save_csv(out_path, args.encoding, out)
57
+ elif args.format == "html":
58
+ save_html(out_path, args.encoding, out)
59
+ elif args.format == "md":
60
+ save_markdown(out_path, args.encoding, out)
61
+
62
+
17
63
  def validate_encoding(encoding):
18
64
  if encoding not in [
19
65
  "utf-8",
@@ -32,8 +78,9 @@ def process_single_file(args, analyzer, path, format):
32
78
  else:
33
79
  imgs = [load_image(path)]
34
80
 
81
+ results = []
35
82
  for page, img in enumerate(imgs):
36
- results, ocr, layout = analyzer(img)
83
+ result, ocr, layout = analyzer(img)
37
84
  dirname = path.parent.name
38
85
  filename = path.stem
39
86
 
@@ -56,7 +103,7 @@ def process_single_file(args, analyzer, path, format):
56
103
  out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
57
104
 
58
105
  if format == "json":
59
- results.to_json(
106
+ json = result.to_json(
60
107
  out_path,
61
108
  ignore_line_break=args.ignore_line_break,
62
109
  encoding=args.encoding,
@@ -64,8 +111,19 @@ def process_single_file(args, analyzer, path, format):
64
111
  export_figure=args.figure,
65
112
  figure_dir=args.figure_dir,
66
113
  )
114
+
115
+ results.append(
116
+ {
117
+ "format": format,
118
+ "data": json,
119
+ }
120
+ )
121
+
122
+ if not args.combine:
123
+ save_json(out_path, args.encoding, json)
124
+
67
125
  elif format == "csv":
68
- results.to_csv(
126
+ csv = result.to_csv(
69
127
  out_path,
70
128
  ignore_line_break=args.ignore_line_break,
71
129
  encoding=args.encoding,
@@ -73,8 +131,19 @@ def process_single_file(args, analyzer, path, format):
73
131
  export_figure=args.figure,
74
132
  figure_dir=args.figure_dir,
75
133
  )
134
+
135
+ results.append(
136
+ {
137
+ "format": format,
138
+ "data": csv,
139
+ }
140
+ )
141
+
142
+ if not args.combine:
143
+ save_csv(out_path, args.encoding, csv)
144
+
76
145
  elif format == "html":
77
- results.to_html(
146
+ html = result.to_html(
78
147
  out_path,
79
148
  ignore_line_break=args.ignore_line_break,
80
149
  img=img,
@@ -84,8 +153,19 @@ def process_single_file(args, analyzer, path, format):
84
153
  figure_dir=args.figure_dir,
85
154
  encoding=args.encoding,
86
155
  )
156
+
157
+ results.append(
158
+ {
159
+ "format": format,
160
+ "data": html,
161
+ }
162
+ )
163
+
164
+ if not args.combine:
165
+ save_html(out_path, args.encoding, html)
166
+
87
167
  elif format == "md":
88
- results.to_markdown(
168
+ md = result.to_markdown(
89
169
  out_path,
90
170
  ignore_line_break=args.ignore_line_break,
91
171
  img=img,
@@ -96,7 +176,24 @@ def process_single_file(args, analyzer, path, format):
96
176
  encoding=args.encoding,
97
177
  )
98
178
 
99
- logger.info(f"Output file: {out_path}")
179
+ results.append(
180
+ {
181
+ "format": format,
182
+ "data": md,
183
+ }
184
+ )
185
+
186
+ if not args.combine:
187
+ save_markdown(out_path, args.encoding, md)
188
+
189
+ out = merge_all_pages(results)
190
+ if args.combine:
191
+ out_path = os.path.join(args.outdir, f"{dirname}_{filename}.{format}")
192
+ save_merged_file(
193
+ out_path,
194
+ args,
195
+ out,
196
+ )
100
197
 
101
198
 
102
199
  def main():
@@ -196,6 +293,16 @@ def main():
196
293
  default="utf-8",
197
294
  help="Specifies the character encoding for the output file to be exported. If unsupported characters are included, they will be ignored.",
198
295
  )
296
+ parser.add_argument(
297
+ "--combine",
298
+ action="store_true",
299
+ help="if set, merge all pages in the output",
300
+ )
301
+ parser.add_argument(
302
+ "--ignore_meta",
303
+ action="store_true",
304
+ help="if set, ignore meta information(header, footer) in the output",
305
+ )
199
306
 
200
307
  args = parser.parse_args()
201
308
 
@@ -248,6 +355,7 @@ def main():
248
355
  configs=configs,
249
356
  visualize=args.vis,
250
357
  device=args.device,
358
+ ignore_meta=args.ignore_meta,
251
359
  )
252
360
 
253
361
  os.makedirs(args.outdir, exist_ok=True)
@@ -132,7 +132,7 @@ def resize_shortest_edge(
132
132
  neww = max(int(new_w / 32) * 32, 32)
133
133
  newh = max(int(new_h / 32) * 32, 32)
134
134
 
135
- img = cv2.resize(img, (neww, newh))
135
+ img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA)
136
136
  return img
137
137
 
138
138
 
@@ -275,7 +275,7 @@ def resize_with_padding(img, target_size, background_color=(0, 0, 0)):
275
275
  new_w = int(w * min(scale_w, scale_h))
276
276
  new_h = int(h * min(scale_w, scale_h))
277
277
 
278
- resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
278
+ resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
279
279
 
280
280
  canvas = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
281
281
  canvas[:, :] = background_color
@@ -3,25 +3,19 @@ from concurrent.futures import ThreadPoolExecutor
3
3
  from typing import List, Union
4
4
 
5
5
  import numpy as np
6
-
7
6
  from pydantic import conlist
8
7
 
8
+ from yomitoku.text_detector import TextDetector
9
+ from yomitoku.text_recognizer import TextRecognizer
10
+
9
11
  from .base import BaseSchema
10
12
  from .export import export_csv, export_html, export_markdown
11
13
  from .layout_analyzer import LayoutAnalyzer
12
14
  from .ocr import OCRSchema, WordPrediction, ocr_aggregate
13
15
  from .reading_order import prediction_reading_order
14
16
  from .table_structure_recognizer import TableStructureRecognizerSchema
15
- from .utils.misc import (
16
- is_contained,
17
- quad_to_xyxy,
18
- calc_overlap_ratio,
19
- )
20
- from .utils.visualizer import reading_order_visualizer
21
- from yomitoku.text_detector import TextDetector
22
- from yomitoku.text_recognizer import TextRecognizer
23
-
24
- from .utils.visualizer import det_visualizer
17
+ from .utils.misc import calc_overlap_ratio, is_contained, quad_to_xyxy
18
+ from .utils.visualizer import det_visualizer, reading_order_visualizer
25
19
 
26
20
 
27
21
  class ParagraphSchema(BaseSchema):
@@ -47,13 +41,13 @@ class DocumentAnalyzerSchema(BaseSchema):
47
41
  figures: List[FigureSchema]
48
42
 
49
43
  def to_html(self, out_path: str, **kwargs):
50
- export_html(self, out_path, **kwargs)
44
+ return export_html(self, out_path, **kwargs)
51
45
 
52
46
  def to_markdown(self, out_path: str, **kwargs):
53
- export_markdown(self, out_path, **kwargs)
47
+ return export_markdown(self, out_path, **kwargs)
54
48
 
55
49
  def to_csv(self, out_path: str, **kwargs):
56
- export_csv(self, out_path, **kwargs)
50
+ return export_csv(self, out_path, **kwargs)
57
51
 
58
52
 
59
53
  def combine_flags(flag1, flag2):
@@ -328,7 +322,13 @@ def _split_text_across_cells(results_det, results_layout):
328
322
 
329
323
 
330
324
  class DocumentAnalyzer:
331
- def __init__(self, configs={}, device="cuda", visualize=False):
325
+ def __init__(
326
+ self,
327
+ configs={},
328
+ device="cuda",
329
+ visualize=False,
330
+ ignore_meta=False,
331
+ ):
332
332
  default_configs = {
333
333
  "ocr": {
334
334
  "text_detector": {
@@ -371,6 +371,8 @@ class DocumentAnalyzer:
371
371
  )
372
372
  self.visualize = visualize
373
373
 
374
+ self.ignore_meta = ignore_meta
375
+
374
376
  def aggregate(self, ocr_res, layout_res):
375
377
  paragraphs = []
376
378
  check_list = [False] * len(ocr_res.words)
@@ -431,11 +433,15 @@ class DocumentAnalyzer:
431
433
  page_direction = judge_page_direction(paragraphs)
432
434
 
433
435
  headers = [
434
- paragraph for paragraph in paragraphs if paragraph.role == "page_header"
436
+ paragraph
437
+ for paragraph in paragraphs
438
+ if paragraph.role == "page_header" and not self.ignore_meta
435
439
  ]
436
440
 
437
441
  footers = [
438
- paragraph for paragraph in paragraphs if paragraph.role == "page_footer"
442
+ paragraph
443
+ for paragraph in paragraphs
444
+ if paragraph.role == "page_footer" and not self.ignore_meta
439
445
  ]
440
446
 
441
447
  page_contents = [
@@ -503,9 +509,9 @@ class DocumentAnalyzer:
503
509
 
504
510
  def __call__(self, img):
505
511
  self.img = img
506
- resutls, ocr, layout = asyncio.run(self.run(img))
512
+ results, ocr, layout = asyncio.run(self.run(img))
507
513
 
508
514
  if self.visualize:
509
- layout = reading_order_visualizer(layout, resutls)
515
+ layout = reading_order_visualizer(layout, results)
510
516
 
511
- return resutls, ocr, layout
517
+ return results, ocr, layout
@@ -1,6 +1,15 @@
1
- from .export_csv import export_csv
2
- from .export_html import export_html
3
- from .export_json import export_json
4
- from .export_markdown import export_markdown
1
+ from .export_csv import export_csv, save_csv
2
+ from .export_html import export_html, save_html
3
+ from .export_json import export_json, save_json
4
+ from .export_markdown import export_markdown, save_markdown
5
5
 
6
- __all__ = ["export_html", "export_markdown", "export_csv", "export_json"]
6
+ __all__ = [
7
+ "export_html",
8
+ "export_markdown",
9
+ "export_csv",
10
+ "export_json",
11
+ "save_html",
12
+ "save_markdown",
13
+ "save_csv",
14
+ "save_json",
15
+ ]
@@ -1,7 +1,8 @@
1
1
  import csv
2
- import cv2
3
2
  import os
4
3
 
4
+ import cv2
5
+
5
6
 
6
7
  def table_to_csv(table, ignore_line_break):
7
8
  num_rows = table.n_row
@@ -98,7 +99,10 @@ def export_csv(
98
99
  )
99
100
 
100
101
  elements = sorted(elements, key=lambda x: x["order"])
102
+ return elements
103
+
101
104
 
105
+ def save_csv(out_path, encoding, elements):
102
106
  with open(out_path, "w", newline="", encoding=encoding, errors="ignore") as f:
103
107
  writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
104
108
  for element in elements:
@@ -1,9 +1,8 @@
1
- import re
2
1
  import os
3
- import cv2
4
-
2
+ import re
5
3
  from html import escape
6
4
 
5
+ import cv2
7
6
  from lxml import etree, html
8
7
 
9
8
 
@@ -182,10 +181,13 @@ def export_html(
182
181
  elements = sorted(elements, key=lambda x: x["order"])
183
182
 
184
183
  html_string = "".join([element["html"] for element in elements])
185
- html_string = add_html_tag(html_string)
184
+ # html_string = add_html_tag(html_string)
186
185
 
187
186
  parsed_html = html.fromstring(html_string)
188
187
  formatted_html = etree.tostring(parsed_html, pretty_print=True, encoding="unicode")
188
+ return formatted_html
189
+
189
190
 
191
+ def save_html(out_path, encoding, html):
190
192
  with open(out_path, "w", encoding=encoding, errors="ignore") as f:
191
- f.write(formatted_html)
193
+ f.write(html)
@@ -1,7 +1,7 @@
1
1
  import json
2
+ import os
2
3
 
3
4
  import cv2
4
- import os
5
5
 
6
6
 
7
7
  def paragraph_to_json(paragraph, ignore_line_break):
@@ -63,9 +63,13 @@ def export_json(
63
63
  figure_dir=figure_dir,
64
64
  )
65
65
 
66
+ return inputs.model_dump()
67
+
68
+
69
+ def save_json(out_path, encoding, data):
66
70
  with open(out_path, "w", encoding=encoding, errors="ignore") as f:
67
71
  json.dump(
68
- inputs.model_dump(),
72
+ data,
69
73
  f,
70
74
  ensure_ascii=False,
71
75
  indent=4,
@@ -1,10 +1,11 @@
1
+ import os
1
2
  import re
3
+
2
4
  import cv2
3
- import os
4
5
 
5
6
 
6
7
  def escape_markdown_special_chars(text):
7
- special_chars = r"([`*_{}[\]()#+.!|-])"
8
+ special_chars = r"([`*{}[\]()#+!~|-])"
8
9
  return re.sub(special_chars, r"\\\1", text)
9
10
 
10
11
 
@@ -144,5 +145,9 @@ def export_markdown(
144
145
  elements = sorted(elements, key=lambda x: x["order"])
145
146
  markdown = "\n".join([element["md"] for element in elements])
146
147
 
148
+ return markdown
149
+
150
+
151
+ def save_markdown(out_path, encoding, markdown):
147
152
  with open(out_path, "w", encoding=encoding, errors="ignore") as f:
148
153
  f.write(markdown)
yomitoku/models/parseq.py CHANGED
@@ -81,6 +81,8 @@ class PARSeq(nn.Module, PyTorchModelHubMixin):
81
81
  named_apply(partial(init_weights, exclude=["encoder"]), self)
82
82
  nn.init.trunc_normal_(self.pos_queries, std=0.02)
83
83
 
84
+ self.export_onnx = False
85
+
84
86
  @property
85
87
  def _device(self) -> torch.device:
86
88
  return next(self.head.parameters(recurse=False)).device
@@ -175,7 +177,11 @@ class PARSeq(nn.Module, PyTorchModelHubMixin):
175
177
  # greedy decode. add the next token index to the target input
176
178
  tgt_in[:, j] = p_i.squeeze().argmax(-1)
177
179
  # Efficient batch decoding: If all output words have at least one EOS token, end decoding.
178
- if testing and (tgt_in == self.tokenizer.eos_id).any(dim=-1).all():
180
+ if (
181
+ not self.export_onnx
182
+ and testing
183
+ and (tgt_in == self.tokenizer.eos_id).any(dim=-1).all()
184
+ ):
179
185
  break
180
186
 
181
187
  logits = torch.cat(logits, dim=1)
@@ -47,6 +47,7 @@ class TableStructureRecognizerSchema(BaseSchema):
47
47
  rows: List[TableLineSchema]
48
48
  cols: List[TableLineSchema]
49
49
  cells: List[TableCellSchema]
50
+ spans: List[TableLineSchema]
50
51
  order: int
51
52
 
52
53
 
@@ -242,7 +243,7 @@ class TableStructureRecognizer(BaseModule):
242
243
  category_elements
243
244
  )
244
245
 
245
- cells, rows, cols = self.extract_cell_elements(category_elements)
246
+ cells, rows, cols, spans = self.extract_cell_elements(category_elements)
246
247
 
247
248
  table_x, table_y = data["offset"]
248
249
  table_x2 = table_x + data["size"][1]
@@ -255,6 +256,7 @@ class TableStructureRecognizer(BaseModule):
255
256
  "n_col": len(cols),
256
257
  "rows": rows,
257
258
  "cols": cols,
259
+ "spans": spans,
258
260
  "cells": cells,
259
261
  "order": 0,
260
262
  }
@@ -276,8 +278,9 @@ class TableStructureRecognizer(BaseModule):
276
278
 
277
279
  rows = sorted(elements["row"], key=lambda x: x["box"][1])
278
280
  cols = sorted(elements["col"], key=lambda x: x["box"][0])
281
+ spans = sorted(elements["span"], key=lambda x: x["box"][1])
279
282
 
280
- return cells, rows, cols
283
+ return cells, rows, cols, spans
281
284
 
282
285
  def __call__(self, img, table_boxes, vis=None):
283
286
  img_tensors = self.preprocess(img, table_boxes)
@@ -118,6 +118,7 @@ class TextRecognizer(BaseModule):
118
118
  "output": {0: "batch_size"},
119
119
  }
120
120
 
121
+ self.model.export_onnx = True
121
122
  torch.onnx.export(
122
123
  self.model,
123
124
  input,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
@@ -19,8 +19,6 @@ Requires-Dist: shapely>=2.0.6
19
19
  Requires-Dist: timm>=1.0.11
20
20
  Requires-Dist: torch>=2.5.0
21
21
  Requires-Dist: torchvision>=0.20.0
22
- Provides-Extra: gpu
23
- Requires-Dist: onnxruntime-gpu>=1.20.1; extra == 'gpu'
24
22
  Description-Content-Type: text/markdown
25
23
 
26
24
  日本語版 | [English](README_EN.md)
@@ -32,6 +30,7 @@ Description-Content-Type: text/markdown
32
30
  ![CUDA](https://img.shields.io/badge/CUDA->=11.8-76B900.svg?logo=NVIDIA&style=fla)
33
31
  ![OS](https://img.shields.io/badge/OS-Linux|Mac|Win-1793D1.svg?&style=fla)
34
32
  [![Document](https://img.shields.io/badge/docs-live-brightgreen)](https://kotaro-kinoshita.github.io/yomitoku/)
33
+ [![PyPI Downloads](https://static.pepy.tech/badge/yomitoku)](https://pepy.tech/projects/yomitoku)
35
34
 
36
35
  ## 🌟 概要
37
36
 
@@ -73,18 +72,13 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
73
72
  pip install yomitoku
74
73
  ```
75
74
 
76
- onnxruntimeの実行にGPUを使用する場合
77
- ```
78
- pip install yomitoku[gpu]
79
- ```
80
-
81
75
  - pytorch はご自身の CUDA のバージョンにあったものをインストールしてください。デフォルトでは CUDA12.4 以上に対応したものがインストールされます。
82
76
  - pytorch は 2.5 以上のバージョンに対応しています。その関係で CUDA11.8 以上のバージョンが必要になります。対応できない場合は、リポジトリ内の Dockerfile を利用してください。
83
77
 
84
78
  ## 🚀 実行方法
85
79
 
86
80
  ```
87
- yomitoku ${path_data} -f md -o results -v --figure --lite
81
+ yomitoku ${path_data} -f md -o results -v --figure --lite
88
82
  ```
89
83
 
90
84
  - `${path_data}` 解析対象の画像が含まれたディレクトリか画像ファイルのパスを直接して指定してください。ディレクトリを対象とした場合はディレクトリのサブディレクトリ内の画像も含めて処理を実行します。
@@ -97,6 +91,8 @@ yomitoku ${path_data} -f md -o results -v --figure --lite
97
91
  - `--figure_letter` 検出した図表に含まれる文字も出力ファイルにエクスポートします。
98
92
  - `--figure` 検出した図、画像を出力ファイルにエクスポートします。
99
93
  - `--encoding` エクスポートする出力ファイルの文字エンコーディングを指定します。サポートされていない文字コードが含まれる場合は、その文字を無視します。(utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
94
+ - `--combine` PDFを入力に与えたときに、複数ページが含まれる場合に、それらの予測結果を一つのファイルに統合してエクスポートします。
95
+ - `--ignore_meta` 文章のheater, fotterなどの文字情報を出力ファイルに含めません。
100
96
 
101
97
  その他のオプションに関しては、ヘルプを参照
102
98
 
@@ -1,16 +1,16 @@
1
1
  yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
2
- yomitoku/base.py,sha256=lzR_V8t87aRasmFdFwD-8KAeSahSTI3AZaEn6g8sOv8,3871
2
+ yomitoku/base.py,sha256=9U3sfe69O6vuO430JzzKQQNkgPsLM9WdLfOUUhp3Ljs,3878
3
3
  yomitoku/constants.py,sha256=zlW5QRc_u_F3C2RAgBFWyHJZexBnJT5N15GC-9d3iLo,686
4
- yomitoku/document_analyzer.py,sha256=B2F_MXFKbq58ePDCgcZKk_bgQUkno1ehYb6CZmAekCk,16234
4
+ yomitoku/document_analyzer.py,sha256=wQMmXACDsDmyaxg2OnG9Og5Nx53WPUkQdUmgYtljACQ,16412
5
5
  yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
6
6
  yomitoku/layout_parser.py,sha256=V_mAkZxke1gwHfnxBFMTOJ8hnz2X_kfZu2lLiMd8cAs,7610
7
7
  yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
8
8
  yomitoku/reading_order.py,sha256=OfhOS9ttPDoPSuHrIRKyOzG19GGeRufbuSKDqhsohh4,6404
9
- yomitoku/table_structure_recognizer.py,sha256=Eam9t7OjW4a-UWk_dl-ylbOcinN_Te_ovuri2naldL0,9482
9
+ yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK-ynm4ALxg,9625
10
10
  yomitoku/text_detector.py,sha256=XgqhtbNcJww2x3BrH8EFz45qC6kqPKCX9hsa-dzRoIA,4274
11
- yomitoku/text_recognizer.py,sha256=LVMjy-PaGlDQqfJrjKX_7vOQXDyFg6FaCeIQIyWUJX8,5833
11
+ yomitoku/text_recognizer.py,sha256=t95sbxve-E9VOCaU9CFGZIlk_a4my9KfFfr9tXws9As,5871
12
12
  yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yomitoku/cli/main.py,sha256=Dcu-jBql5dP4DiN0QWbPAQscnJ7tTtbWUnaohz3FFbs,7868
13
+ yomitoku/cli/main.py,sha256=WvQO9V5HzxxvRAIsGkrDl9OGrmaKsAbDBrg4ApCSy_c,10527
14
14
  yomitoku/configs/__init__.py,sha256=e1Alss5QJLZSNfD6zLEG6xu5vDQDw-4Jayiqq8bq52s,571
15
15
  yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
16
16
  yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py,sha256=o70GMHD8k-zeBeJtuhPS8x7vVB-ffucnJXeSyn-0AXo,2116
@@ -19,15 +19,15 @@ yomitoku/configs/cfg_text_recognizer_parseq.py,sha256=hpFs3nKqh4XdU3BZMTultegtLE
19
19
  yomitoku/configs/cfg_text_recognizer_parseq_small.py,sha256=uCm_VC_G79IbZpOiK8fgYzAJ4b98H5pf328wyQomtfo,1259
20
20
  yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
21
21
  yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
22
- yomitoku/data/functions.py,sha256=eOyxo8S6EoAf1xGSPLWQFb9-t5Rg52NggD9MFIrOSpY,7506
23
- yomitoku/export/__init__.py,sha256=aANEfuovH2aevFjb2pGrBLFP-4iRzEzD9wcriCR-M7I,229
24
- yomitoku/export/export_csv.py,sha256=gKIhhFKOyZaxwmjbtxk8rkFOv3uPaRwfOnKwTC8d7K0,2935
25
- yomitoku/export/export_html.py,sha256=RsFU-IMtBOJWCdqHhp5btswf2fzfow01ypY1h6E6Vvo,4979
26
- yomitoku/export/export_json.py,sha256=-57hcT2ENTa1HcT7YMjXiv6tZZf_Y_1q1xu2Jt9T1P4,1976
27
- yomitoku/export/export_markdown.py,sha256=kGOyopq_vT2NqM4LoAu4JIPbjV_SLg9H0MIOAKpS-Gk,4099
22
+ yomitoku/data/functions.py,sha256=7a_3xDKAQVdWfzQwFcdyJBojoyzUa3ePZOnG4pX1dpI,7532
23
+ yomitoku/export/__init__.py,sha256=fkwOtqH0lh6eZQW5b4EMSjIH1FmWYLKKszahR-jQYSg,366
24
+ yomitoku/export/export_csv.py,sha256=B234jlNeO4n5kQ_lwxxAZe_O2ipTbeDYlWU1zyyaVrw,3001
25
+ yomitoku/export/export_html.py,sha256=pCLoxV10_SzRWmZlDnHuyfPFIuUGB3ZkqSdABVU7DTs,5038
26
+ yomitoku/export/export_json.py,sha256=D6dD04gcPR5lmfHFVX-iGOYapsOVaJ_kH1Qhs6d2O0M,2035
27
+ yomitoku/export/export_markdown.py,sha256=D1kX3X8odWa0pf4AFZ6gik5EKMKK7pgpQXaHHv6pWDI,4170
28
28
  yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
29
29
  yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
30
- yomitoku/models/parseq.py,sha256=-DQMQuON2jwtb4Ib2V0O19un9w-WG4rXS0SiscydrXU,8593
30
+ yomitoku/models/parseq.py,sha256=psCPjP3eKjOFAUZJPQQhbD0nWEV5FeOZ0tTK27Rvvbw,8748
31
31
  yomitoku/models/rtdetr.py,sha256=oJsr8RHz3frslhLfXdVJve47lUsrmqLjfdTrZ41tlQ0,687
32
32
  yomitoku/models/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  yomitoku/models/layers/activate.py,sha256=S54GPssZBMloM2oFAXeDVMmBBZOWyjwU98Niq758txE,1244
@@ -48,7 +48,7 @@ yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
48
48
  yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
49
49
  yomitoku/utils/misc.py,sha256=FbwPLeIYYBvNf9wQh2RoEonTM5BF7_IwaEqmRsYHKA8,2673
50
50
  yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
51
- yomitoku-0.7.1.dist-info/METADATA,sha256=NgweyozOKBkIn9-yxX0hOV313-dBh29SsFQ22BSvoD8,8488
52
- yomitoku-0.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
53
- yomitoku-0.7.1.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
54
- yomitoku-0.7.1.dist-info/RECORD,,
51
+ yomitoku-0.7.3.dist-info/METADATA,sha256=pHT4lQyl9cN4KbsOTooiJAaEEJqXhmAl9SVZKVaPkR0,8717
52
+ yomitoku-0.7.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
53
+ yomitoku-0.7.3.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
54
+ yomitoku-0.7.3.dist-info/RECORD,,