yomitoku 0.5.3__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,16 @@
1
1
  from typing import List, Union
2
2
 
3
3
  import cv2
4
+ import os
5
+ import onnx
6
+ import onnxruntime
4
7
  import torch
5
8
  import torchvision.transforms as T
6
9
  from PIL import Image
7
10
  from pydantic import conlist
8
11
 
12
+ from .constants import ROOT_DIR
13
+
9
14
  from .base import BaseModelCatalog, BaseModule, BaseSchema
10
15
  from .configs import TableStructureRecognizerRTDETRv2Config
11
16
  from .layout_parser import filter_contained_rectangles_within_category
@@ -30,10 +35,17 @@ class TableCellSchema(BaseSchema):
30
35
  contents: Union[str, None]
31
36
 
32
37
 
38
+ class TableLineSchema(BaseSchema):
39
+ box: conlist(int, min_length=4, max_length=4)
40
+ score: float
41
+
42
+
33
43
  class TableStructureRecognizerSchema(BaseSchema):
34
44
  box: conlist(int, min_length=4, max_length=4)
35
45
  n_row: int
36
46
  n_col: int
47
+ rows: List[TableLineSchema]
48
+ cols: List[TableLineSchema]
37
49
  cells: List[TableCellSchema]
38
50
  order: int
39
51
 
@@ -109,12 +121,13 @@ class TableStructureRecognizer(BaseModule):
109
121
  device="cuda",
110
122
  visualize=False,
111
123
  from_pretrained=True,
124
+ infer_onnx=False,
112
125
  ):
113
126
  super().__init__()
114
127
  self.load_model(
115
128
  model_name,
116
129
  path_cfg,
117
- from_pretrained=True,
130
+ from_pretrained=from_pretrained,
118
131
  )
119
132
  self.device = device
120
133
  self.visualize = visualize
@@ -140,6 +153,45 @@ class TableStructureRecognizer(BaseModule):
140
153
  id: category for id, category in enumerate(self._cfg.category)
141
154
  }
142
155
 
156
+ self.infer_onnx = infer_onnx
157
+ if infer_onnx:
158
+ name = self._cfg.hf_hub_repo.split("/")[-1]
159
+ path_onnx = f"{ROOT_DIR}/onnx/{name}.onnx"
160
+ if not os.path.exists(path_onnx):
161
+ self.convert_onnx(path_onnx)
162
+
163
+ self.model = None
164
+
165
+ model = onnx.load(path_onnx)
166
+ if torch.cuda.is_available() and device == "cuda":
167
+ self.sess = onnxruntime.InferenceSession(
168
+ model.SerializeToString(), providers=["CUDAExecutionProvider"]
169
+ )
170
+ else:
171
+ self.sess = onnxruntime.InferenceSession(model.SerializeToString())
172
+
173
+ if self.model is not None:
174
+ self.model.to(self.device)
175
+
176
+ def convert_onnx(self, path_onnx):
177
+ dynamic_axes = {
178
+ "input": {0: "batch_size"},
179
+ "output": {0: "batch_size"},
180
+ }
181
+
182
+ img_size = self._cfg.data.img_size
183
+ dummy_input = torch.randn(1, 3, *img_size, requires_grad=True)
184
+
185
+ torch.onnx.export(
186
+ self.model,
187
+ dummy_input,
188
+ path_onnx,
189
+ opset_version=16,
190
+ input_names=["input"],
191
+ output_names=["pred_logits", "pred_boxes"],
192
+ dynamic_axes=dynamic_axes,
193
+ )
194
+
143
195
  def preprocess(self, img, boxes):
144
196
  cv_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
145
197
 
@@ -149,7 +201,7 @@ class TableStructureRecognizer(BaseModule):
149
201
  table_img = cv_img[y1:y2, x1:x2, :]
150
202
  th, hw = table_img.shape[:2]
151
203
  table_img = Image.fromarray(table_img)
152
- img_tensor = self.transforms(table_img)[None].to(self.device)
204
+ img_tensor = self.transforms(table_img)[None]
153
205
  table_imgs.append(
154
206
  {
155
207
  "tensor": img_tensor,
@@ -190,7 +242,7 @@ class TableStructureRecognizer(BaseModule):
190
242
  category_elements
191
243
  )
192
244
 
193
- cells, n_row, n_col = self.extract_cell_elements(category_elements)
245
+ cells, rows, cols = self.extract_cell_elements(category_elements)
194
246
 
195
247
  table_x, table_y = data["offset"]
196
248
  table_x2 = table_x + data["size"][1]
@@ -199,8 +251,10 @@ class TableStructureRecognizer(BaseModule):
199
251
 
200
252
  table = {
201
253
  "box": table_box,
202
- "n_row": n_row,
203
- "n_col": n_col,
254
+ "n_row": len(rows),
255
+ "n_col": len(cols),
256
+ "rows": rows,
257
+ "cols": cols,
204
258
  "cells": cells,
205
259
  "order": 0,
206
260
  }
@@ -220,16 +274,32 @@ class TableStructureRecognizer(BaseModule):
220
274
  cells = extract_cells(row_boxes, col_boxes)
221
275
  cells = filter_contained_cells_within_spancell(cells, span_boxes)
222
276
 
223
- return cells, len(row_boxes), len(col_boxes)
277
+ rows = sorted(elements["row"], key=lambda x: x["box"][1])
278
+ cols = sorted(elements["col"], key=lambda x: x["box"][0])
279
+
280
+ return cells, rows, cols
224
281
 
225
282
  def __call__(self, img, table_boxes, vis=None):
226
283
  img_tensors = self.preprocess(img, table_boxes)
227
284
  outputs = []
228
285
  for data in img_tensors:
229
- with torch.inference_mode():
230
- pred = self.model(data["tensor"])
286
+ if self.infer_onnx:
287
+ input = data["tensor"].numpy()
288
+ results = self.sess.run(None, {"input": input})
289
+ pred = {
290
+ "pred_logits": torch.tensor(results[0]).to(self.device),
291
+ "pred_boxes": torch.tensor(results[1]).to(self.device),
292
+ }
293
+
294
+ else:
295
+ with torch.inference_mode():
296
+ data["tensor"] = data["tensor"].to(self.device)
297
+ pred = self.model(data["tensor"])
298
+
231
299
  table = self.postprocess(pred, data)
232
- outputs.append(table)
300
+
301
+ if table.n_row > 0 and table.n_col > 0:
302
+ outputs.append(table)
233
303
 
234
304
  if vis is None and self.visualize:
235
305
  vis = img.copy()
yomitoku/text_detector.py CHANGED
@@ -2,6 +2,7 @@ from typing import List
2
2
 
3
3
  import numpy as np
4
4
  import torch
5
+ import os
5
6
  from pydantic import conlist
6
7
 
7
8
  from .base import BaseModelCatalog, BaseModule, BaseSchema
@@ -14,6 +15,10 @@ from .data.functions import (
14
15
  from .models import DBNet
15
16
  from .postprocessor import DBnetPostProcessor
16
17
  from .utils.visualizer import det_visualizer
18
+ from .constants import ROOT_DIR
19
+
20
+ import onnx
21
+ import onnxruntime
17
22
 
18
23
 
19
24
  class TextDetectorModelCatalog(BaseModelCatalog):
@@ -43,21 +48,60 @@ class TextDetector(BaseModule):
43
48
  device="cuda",
44
49
  visualize=False,
45
50
  from_pretrained=True,
51
+ infer_onnx=False,
46
52
  ):
47
53
  super().__init__()
48
54
  self.load_model(
49
55
  model_name,
50
56
  path_cfg,
51
- from_pretrained=True,
57
+ from_pretrained=from_pretrained,
52
58
  )
53
59
 
54
60
  self.device = device
55
61
  self.visualize = visualize
56
62
 
57
63
  self.model.eval()
58
- self.model.to(self.device)
59
-
60
64
  self.post_processor = DBnetPostProcessor(**self._cfg.post_process)
65
+ self.infer_onnx = infer_onnx
66
+
67
+ if infer_onnx:
68
+ name = self._cfg.hf_hub_repo.split("/")[-1]
69
+ path_onnx = f"{ROOT_DIR}/onnx/{name}.onnx"
70
+ if not os.path.exists(path_onnx):
71
+ self.convert_onnx(path_onnx)
72
+
73
+ self.model = None
74
+
75
+ model = onnx.load(path_onnx)
76
+ if torch.cuda.is_available() and device == "cuda":
77
+ self.sess = onnxruntime.InferenceSession(
78
+ model.SerializeToString(), providers=["CUDAExecutionProvider"]
79
+ )
80
+ else:
81
+ self.sess = onnxruntime.InferenceSession(model.SerializeToString())
82
+
83
+ self.model = None
84
+
85
+ if self.model is not None:
86
+ self.model.to(self.device)
87
+
88
+ def convert_onnx(self, path_onnx):
89
+ dynamic_axes = {
90
+ "input": {0: "batch_size", 2: "height", 3: "width"},
91
+ "output": {0: "batch_size", 2: "height", 3: "width"},
92
+ }
93
+
94
+ dummy_input = torch.randn(1, 3, 256, 256, requires_grad=True)
95
+
96
+ torch.onnx.export(
97
+ self.model,
98
+ dummy_input,
99
+ path_onnx,
100
+ opset_version=14,
101
+ input_names=["input"],
102
+ output_names=["output"],
103
+ dynamic_axes=dynamic_axes,
104
+ )
61
105
 
62
106
  def preprocess(self, img):
63
107
  img = img.copy()
@@ -81,9 +125,15 @@ class TextDetector(BaseModule):
81
125
 
82
126
  ori_h, ori_w = img.shape[:2]
83
127
  tensor = self.preprocess(img)
84
- tensor = tensor.to(self.device)
85
- with torch.inference_mode():
86
- preds = self.model(tensor)
128
+
129
+ if self.infer_onnx:
130
+ input = tensor.numpy()
131
+ results = self.sess.run(["output"], {"input": input})
132
+ preds = {"binary": torch.tensor(results[0])}
133
+ else:
134
+ with torch.inference_mode():
135
+ tensor = tensor.to(self.device)
136
+ preds = self.model(tensor)
87
137
 
88
138
  quads, scores = self.postprocess(preds, (ori_h, ori_w))
89
139
  outputs = {"points": quads, "scores": scores}
@@ -93,9 +143,9 @@ class TextDetector(BaseModule):
93
143
  vis = None
94
144
  if self.visualize:
95
145
  vis = det_visualizer(
96
- preds,
97
146
  img,
98
147
  quads,
148
+ preds=preds,
99
149
  vis_heatmap=self._cfg.visualize.heatmap,
100
150
  line_color=tuple(self._cfg.visualize.color[::-1]),
101
151
  )
@@ -2,22 +2,28 @@ from typing import List
2
2
 
3
3
  import numpy as np
4
4
  import torch
5
+ import os
5
6
  import unicodedata
6
7
  from pydantic import conlist
7
8
 
8
9
  from .base import BaseModelCatalog, BaseModule, BaseSchema
9
- from .configs import TextRecognizerPARSeqConfig
10
+ from .configs import TextRecognizerPARSeqConfig, TextRecognizerPARSeqSmallConfig
10
11
  from .data.dataset import ParseqDataset
11
12
  from .models import PARSeq
12
13
  from .postprocessor import ParseqTokenizer as Tokenizer
13
14
  from .utils.misc import load_charset
14
15
  from .utils.visualizer import rec_visualizer
15
16
 
17
+ from .constants import ROOT_DIR
18
+ import onnx
19
+ import onnxruntime
20
+
16
21
 
17
22
  class TextRecognizerModelCatalog(BaseModelCatalog):
18
23
  def __init__(self):
19
24
  super().__init__()
20
25
  self.register("parseq", TextRecognizerPARSeqConfig, PARSeq)
26
+ self.register("parseq-small", TextRecognizerPARSeqSmallConfig, PARSeq)
21
27
 
22
28
 
23
29
  class TextRecognizerSchema(BaseSchema):
@@ -43,34 +49,86 @@ class TextRecognizer(BaseModule):
43
49
  device="cuda",
44
50
  visualize=False,
45
51
  from_pretrained=True,
52
+ infer_onnx=False,
46
53
  ):
47
54
  super().__init__()
48
55
  self.load_model(
49
56
  model_name,
50
57
  path_cfg,
51
- from_pretrained=True,
58
+ from_pretrained=from_pretrained,
52
59
  )
53
60
  self.charset = load_charset(self._cfg.charset)
54
61
  self.tokenizer = Tokenizer(self.charset)
55
62
 
56
63
  self.device = device
57
64
 
65
+ self.model.tokenizer = self.tokenizer
58
66
  self.model.eval()
59
- self.model.to(self.device)
60
67
 
61
68
  self.visualize = visualize
62
69
 
70
+ self.infer_onnx = infer_onnx
71
+
72
+ if infer_onnx:
73
+ name = self._cfg.hf_hub_repo.split("/")[-1]
74
+ path_onnx = f"{ROOT_DIR}/onnx/{name}.onnx"
75
+ if not os.path.exists(path_onnx):
76
+ self.convert_onnx(path_onnx)
77
+
78
+ self.model = None
79
+
80
+ model = onnx.load(path_onnx)
81
+ if torch.cuda.is_available() and device == "cuda":
82
+ self.sess = onnxruntime.InferenceSession(
83
+ model.SerializeToString(), providers=["CUDAExecutionProvider"]
84
+ )
85
+ else:
86
+ self.sess = onnxruntime.InferenceSession(model.SerializeToString())
87
+
88
+ if self.model is not None:
89
+ self.model.to(self.device)
90
+
63
91
  def preprocess(self, img, polygons):
64
92
  dataset = ParseqDataset(self._cfg, img, polygons)
65
- dataloader = torch.utils.data.DataLoader(
66
- dataset,
67
- batch_size=self._cfg.data.batch_size,
68
- shuffle=False,
69
- num_workers=self._cfg.data.num_workers,
70
- )
93
+ dataloader = self._make_mini_batch(dataset)
71
94
 
72
95
  return dataloader
73
96
 
97
+ def _make_mini_batch(self, dataset):
98
+ mini_batches = []
99
+ mini_batch = []
100
+ for data in dataset:
101
+ data = torch.unsqueeze(data, 0)
102
+ mini_batch.append(data)
103
+
104
+ if len(mini_batch) == self._cfg.data.batch_size:
105
+ mini_batches.append(torch.cat(mini_batch, 0))
106
+ mini_batch = []
107
+ else:
108
+ if len(mini_batch) > 0:
109
+ mini_batches.append(torch.cat(mini_batch, 0))
110
+
111
+ return mini_batches
112
+
113
+ def convert_onnx(self, path_onnx):
114
+ img_size = self._cfg.data.img_size
115
+ input = torch.randn(1, 3, *img_size, requires_grad=True)
116
+ dynamic_axes = {
117
+ "input": {0: "batch_size"},
118
+ "output": {0: "batch_size"},
119
+ }
120
+
121
+ torch.onnx.export(
122
+ self.model,
123
+ input,
124
+ path_onnx,
125
+ opset_version=14,
126
+ input_names=["input"],
127
+ output_names=["output"],
128
+ do_constant_folding=True,
129
+ dynamic_axes=dynamic_axes,
130
+ )
131
+
74
132
  def postprocess(self, p, points):
75
133
  pred, score = self.tokenizer.decode(p)
76
134
  pred = [unicodedata.normalize("NFKC", x) for x in pred]
@@ -101,13 +159,19 @@ class TextRecognizer(BaseModule):
101
159
  scores = []
102
160
  directions = []
103
161
  for data in dataloader:
104
- data = data.to(self.device)
105
- with torch.inference_mode():
106
- p = self.model(self.tokenizer, data).softmax(-1)
107
- pred, score, direction = self.postprocess(p, points)
108
- preds.extend(pred)
109
- scores.extend(score)
110
- directions.extend(direction)
162
+ if self.infer_onnx:
163
+ input = data.numpy()
164
+ results = self.sess.run(["output"], {"input": input})
165
+ p = torch.tensor(results[0])
166
+ else:
167
+ with torch.inference_mode():
168
+ data = data.to(self.device)
169
+ p = self.model(data).softmax(-1)
170
+
171
+ pred, score, direction = self.postprocess(p, points)
172
+ preds.extend(pred)
173
+ scores.extend(score)
174
+ directions.extend(direction)
111
175
 
112
176
  outputs = {
113
177
  "contents": preds,
yomitoku/utils/misc.py CHANGED
@@ -9,6 +9,24 @@ def filter_by_flag(elements, flags):
9
9
  return [element for element, flag in zip(elements, flags) if flag]
10
10
 
11
11
 
12
+ def calc_overlap_ratio(rect_a, rect_b):
13
+ intersection = calc_intersection(rect_a, rect_b)
14
+ if intersection is None:
15
+ return 0, None
16
+
17
+ ix1, iy1, ix2, iy2 = intersection
18
+
19
+ overlap_width = ix2 - ix1
20
+ overlap_height = iy2 - iy1
21
+ bx1, by1, bx2, by2 = rect_b
22
+
23
+ b_area = (bx2 - bx1) * (by2 - by1)
24
+ overlap_area = overlap_width * overlap_height
25
+
26
+ overlap_ratio = overlap_area / b_area
27
+ return overlap_ratio, intersection
28
+
29
+
12
30
  def is_contained(rect_a, rect_b, threshold=0.8):
13
31
  """二つの矩形A, Bが与えられたとき、矩形Bが矩形Aに含まれるかどうかを判定する。
14
32
  ずれを許容するため、重複率求め、thresholdを超える場合にTrueを返す。
@@ -23,20 +41,9 @@ def is_contained(rect_a, rect_b, threshold=0.8):
23
41
  bool: 矩形Bが矩形Aに含まれる場合True
24
42
  """
25
43
 
26
- intersection = calc_intersection(rect_a, rect_b)
27
- if intersection is None:
28
- return False
29
-
30
- ix1, iy1, ix2, iy2 = intersection
31
-
32
- overlap_width = ix2 - ix1
33
- overlap_height = iy2 - iy1
34
- bx1, by1, bx2, by2 = rect_b
35
-
36
- b_area = (bx2 - bx1) * (by2 - by1)
37
- overlap_area = overlap_width * overlap_height
44
+ overlap_ratio, _ = calc_overlap_ratio(rect_a, rect_b)
38
45
 
39
- if overlap_area / b_area > threshold:
46
+ if overlap_ratio > threshold:
40
47
  return True
41
48
 
42
49
  return False
@@ -66,14 +66,14 @@ def reading_order_visualizer(
66
66
  return out
67
67
 
68
68
 
69
- def det_visualizer(preds, img, quads, vis_heatmap=False, line_color=(0, 255, 0)):
70
- preds = preds["binary"][0]
71
- binary = preds.detach().cpu().numpy()
69
+ def det_visualizer(img, quads, preds=None, vis_heatmap=False, line_color=(0, 255, 0)):
72
70
  out = img.copy()
73
71
  h, w = out.shape[:2]
74
- binary = binary.squeeze(0)
75
- binary = (binary * 255).astype(np.uint8)
76
72
  if vis_heatmap:
73
+ preds = preds["binary"][0]
74
+ binary = preds.detach().cpu().numpy()
75
+ binary = binary.squeeze(0)
76
+ binary = (binary * 255).astype(np.uint8)
77
77
  binary = cv2.resize(binary, (w, h), interpolation=cv2.INTER_LINEAR)
78
78
  heatmap = cv2.applyColorMap(binary, cv2.COLORMAP_JET)
79
79
  out = cv2.addWeighted(out, 0.5, heatmap, 0.5, 0)
@@ -1,14 +1,16 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.5.3
3
+ Version: 0.7.0
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
7
7
  Keywords: Deep Learning,Japanese,OCR
8
- Requires-Python: <3.13,>=3.9
8
+ Requires-Python: <3.13,>=3.10
9
9
  Requires-Dist: huggingface-hub>=0.26.1
10
10
  Requires-Dist: lxml>=5.3.0
11
11
  Requires-Dist: omegaconf>=2.3.0
12
+ Requires-Dist: onnx>=1.17.0
13
+ Requires-Dist: onnxruntime>=1.20.1
12
14
  Requires-Dist: opencv-python>=4.10.0.84
13
15
  Requires-Dist: pyclipper>=1.3.0.post6
14
16
  Requires-Dist: pydantic>=2.9.2
@@ -17,13 +19,15 @@ Requires-Dist: shapely>=2.0.6
17
19
  Requires-Dist: timm>=1.0.11
18
20
  Requires-Dist: torch>=2.5.0
19
21
  Requires-Dist: torchvision>=0.20.0
22
+ Provides-Extra: gpu
23
+ Requires-Dist: onnxruntime-gpu>=1.20.1; extra == 'gpu'
20
24
  Description-Content-Type: text/markdown
21
25
 
22
26
  日本語版 | [English](README_EN.md)
23
27
 
24
28
  <img src="static/logo/horizontal.png" width="800px">
25
29
 
26
- ![Python](https://img.shields.io/badge/Python-3.9|3.10|3.11|3.12-F9DC3E.svg?logo=python&logoColor=&style=flat)
30
+ ![Python](https://img.shields.io/badge/Python-3.10|3.11|3.12-F9DC3E.svg?logo=python&logoColor=&style=flat)
27
31
  ![Pytorch](https://img.shields.io/badge/Pytorch-2.5-EE4C2C.svg?logo=Pytorch&style=fla)
28
32
  ![CUDA](https://img.shields.io/badge/CUDA->=11.8-76B900.svg?logo=NVIDIA&style=fla)
29
33
  ![OS](https://img.shields.io/badge/OS-Linux|Mac|Win-1793D1.svg?&style=fla)
@@ -69,23 +73,30 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
69
73
  pip install yomitoku
70
74
  ```
71
75
 
72
- - pytorch はご自身の CUDAのバージョンにあったものをインストールしてください。デフォルトではCUDA12.4以上に対応したものがインストールされます。
73
- - pytorch は2.5以上のバージョンに対応しています。その関係でCUDA11.8以上のバージョンが必要になります。対応できない場合は、リポジトリ内のDockerfileを利用してください。
76
+ onnxruntimeの実行にGPUを使用する場合
77
+ ```
78
+ pip install yomitoku[gpu]
79
+ ```
80
+
81
+ - pytorch はご自身の CUDA のバージョンにあったものをインストールしてください。デフォルトでは CUDA12.4 以上に対応したものがインストールされます。
82
+ - pytorch は 2.5 以上のバージョンに対応しています。その関係で CUDA11.8 以上のバージョンが必要になります。対応できない場合は、リポジトリ内の Dockerfile を利用してください。
74
83
 
75
84
  ## 🚀 実行方法
76
85
 
77
86
  ```
78
- yomitoku ${path_data} -f md -o results -v --figure
87
+ yomitoku ${path_data} -f md -o results -v --figure --lite
79
88
  ```
80
89
 
81
90
  - `${path_data}` 解析対象の画像が含まれたディレクトリか画像ファイルのパスを直接して指定してください。ディレクトリを対象とした場合はディレクトリのサブディレクトリ内の画像も含めて処理を実行します。
82
91
  - `-f`, `--format` 出力形式のファイルフォーマットを指定します。(json, csv, html, md をサポート)
83
92
  - `-o`, `--outdir` 出力先のディレクトリ名を指定します。存在しない場合は新規で作成されます。
84
93
  - `-v`, `--vis` を指定すると解析結果を可視化した画像を出力します。
94
+ - `-l`, `--lite` を指定すると軽量モデルで推論を実行します。通常より高速に推論できますが、若干、精度が低下する可能性があります。
85
95
  - `-d`, `--device` モデルを実行するためのデバイスを指定します。gpu が利用できない場合は cpu で推論が実行されます。(デフォルト: cuda)
86
96
  - `--ignore_line_break` 画像の改行位置を無視して、段落内の文章を連結して返します。(デフォルト:画像通りの改行位置位置で改行します。)
87
97
  - `--figure_letter` 検出した図表に含まれる文字も出力ファイルにエクスポートします。
88
- - `--figure` 検出した図、画像を出力ファイルにエクスポートします。(html と markdown のみ)
98
+ - `--figure` 検出した図、画像を出力ファイルにエクスポートします。
99
+ - `--encoding` エクスポートする出力ファイルの文字エンコーディングを指定します。サポートされていない文字コードが含まれる場合は、その文字を無視します。(utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
89
100
 
90
101
  その他のオプションに関しては、ヘルプを参照
91
102
 
@@ -94,6 +105,7 @@ yomitoku --help
94
105
  ```
95
106
 
96
107
  **NOTE**
108
+
97
109
  - GPU での実行を推奨します。CPU を用いての推論向けに最適化されておらず、処理時間が長くなります。
98
110
  - 活字のみ識別をサポートしております。手書き文字に関しては、読み取れる場合もありますが、公式にはサポートしておりません。
99
111
  - Yomitoku は文書 OCR 向けに最適化されており、情景 OCR(看板など紙以外にプリントされた文字の読み取り)向けには最適化されていません。
@@ -107,6 +119,6 @@ yomitoku --help
107
119
 
108
120
  本リポジトリ内に格納されているソースコードおよび本プロジェクトに関連する HuggingFaceHub 上のモデルの重みファイルのライセンスは CC BY-NC-SA 4.0 に従います。
109
121
  非商用での個人利用、研究目的での利用はご自由にお使いください。
110
- 商用目的での利用に関しては、別途、商用ライセンスを提供しますので、開発者にお問い合わせください。
122
+ 商用目的での利用に関しては、別途、商用ライセンスを提供しますので、https://www.mlism.com/ にお問い合わせください。
111
123
 
112
124
  YomiToku © 2024 by Kotaro Kinoshita is licensed under CC BY-NC-SA 4.0. To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
@@ -1,52 +1,54 @@
1
1
  yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
2
2
  yomitoku/base.py,sha256=lzR_V8t87aRasmFdFwD-8KAeSahSTI3AZaEn6g8sOv8,3871
3
3
  yomitoku/constants.py,sha256=zlW5QRc_u_F3C2RAgBFWyHJZexBnJT5N15GC-9d3iLo,686
4
- yomitoku/document_analyzer.py,sha256=HIg-nVzDhJIP-h-tn4uU86KakgHdlAhosEqK_i-SWe4,9906
5
- yomitoku/layout_analyzer.py,sha256=QTeRcVd8aySz8u6dg2ikET77ar3sqlukRLBwYfTyMPM,2033
6
- yomitoku/layout_parser.py,sha256=V2jCNHE61jNp8ytYdKwPV34V5qEK7y-7-Mq7-AkoQhU,5898
7
- yomitoku/ocr.py,sha256=Rcojw0aGA6yDF2RjqfK23_rMw-xm61KGd8JmTCTOOVU,2516
4
+ yomitoku/document_analyzer.py,sha256=85j93l-6rvvRZsL0FD7EQG--84ZLPiKoNm2CE1Ss8LM,16271
5
+ yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
6
+ yomitoku/layout_parser.py,sha256=V_mAkZxke1gwHfnxBFMTOJ8hnz2X_kfZu2lLiMd8cAs,7610
7
+ yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
8
8
  yomitoku/reading_order.py,sha256=OfhOS9ttPDoPSuHrIRKyOzG19GGeRufbuSKDqhsohh4,6404
9
- yomitoku/table_structure_recognizer.py,sha256=CouRzfdO_toZKUQbzQqocKdMcgA3Pr7glkZuqD5itpg,7280
10
- yomitoku/text_detector.py,sha256=okp0xuq4lXgEDcfgCzeJcrj8hfSI4NvAgorsNwi_NYI,2682
11
- yomitoku/text_recognizer.py,sha256=RHdq1M3-e3C1RECgbaoqPngtxicG3izAma12juD2ICQ,3789
9
+ yomitoku/table_structure_recognizer.py,sha256=Eam9t7OjW4a-UWk_dl-ylbOcinN_Te_ovuri2naldL0,9482
10
+ yomitoku/text_detector.py,sha256=XgqhtbNcJww2x3BrH8EFz45qC6kqPKCX9hsa-dzRoIA,4274
11
+ yomitoku/text_recognizer.py,sha256=LVMjy-PaGlDQqfJrjKX_7vOQXDyFg6FaCeIQIyWUJX8,5833
12
12
  yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yomitoku/cli/main.py,sha256=MBD0S4sXgquJ8P2egkZjJcglXvCke5Uw46C28SDtr8g,6252
14
- yomitoku/configs/__init__.py,sha256=KBhb9S7xt22HZaIcoWSgZHfscXXj9YlimOwLH5z9CRo,454
13
+ yomitoku/cli/main.py,sha256=N0X4-z_jfFM5_buUpiLHHA68B5oPVVdmvwzXWn7qoUs,7822
14
+ yomitoku/configs/__init__.py,sha256=e1Alss5QJLZSNfD6zLEG6xu5vDQDw-4Jayiqq8bq52s,571
15
15
  yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
16
16
  yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py,sha256=o70GMHD8k-zeBeJtuhPS8x7vVB-ffucnJXeSyn-0AXo,2116
17
17
  yomitoku/configs/cfg_text_detector_dbnet.py,sha256=U9k48PON7haoOaytiELhbZRpv9RMiUm6nnfHmdxIa9Q,1153
18
18
  yomitoku/configs/cfg_text_recognizer_parseq.py,sha256=hpFs3nKqh4XdU3BZMTultegtLEGahEsCaZdjfKC_MO8,1247
19
+ yomitoku/configs/cfg_text_recognizer_parseq_small.py,sha256=uCm_VC_G79IbZpOiK8fgYzAJ4b98H5pf328wyQomtfo,1259
19
20
  yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
20
21
  yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
21
22
  yomitoku/data/functions.py,sha256=eOyxo8S6EoAf1xGSPLWQFb9-t5Rg52NggD9MFIrOSpY,7506
22
23
  yomitoku/export/__init__.py,sha256=aANEfuovH2aevFjb2pGrBLFP-4iRzEzD9wcriCR-M7I,229
23
- yomitoku/export/export_csv.py,sha256=-n8eYPIzDQuiixeqpTbWaN9aQ5oFyl7XRfpv51oKPTI,1979
24
- yomitoku/export/export_html.py,sha256=X3H_orkS1BRlQo8Z1NzgrFwsIboDzRAx9etmqj90k2Y,4866
25
- yomitoku/export/export_json.py,sha256=1ChvCAHfCmMQvCfcAb1p3fSpr4elNAs3xBSIbpfn3bc,998
26
- yomitoku/export/export_markdown.py,sha256=mCcsXUWBLrYc1NcRSBFfBT28d6eCddAF1oHp0qdBEnE,3986
24
+ yomitoku/export/export_csv.py,sha256=MzGS1Y6kiHo7vZV3heKkd_v5gdxJBrpa8Zt9gFMwG88,2869
25
+ yomitoku/export/export_html.py,sha256=ezj96wQNqkBOCUOIPHFJW_BCh1I4Ij_8RDiKUxqaFok,4913
26
+ yomitoku/export/export_json.py,sha256=Kz8MgWM0bd6SNaSiHZjs-IjhsvX19Y0ovlIxGcm1vIw,1910
27
+ yomitoku/export/export_markdown.py,sha256=w9jT-A0__4rw1PaeGtRicuLu1rqeZO-ZLwyJm5F5PXQ,4033
27
28
  yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
28
29
  yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
29
- yomitoku/models/parseq.py,sha256=7QT-q5_oWqXTDXobRk1R6Lpap_AxdC4AzkSsOgXjOwM,8611
30
+ yomitoku/models/parseq.py,sha256=-DQMQuON2jwtb4Ib2V0O19un9w-WG4rXS0SiscydrXU,8593
30
31
  yomitoku/models/rtdetr.py,sha256=oJsr8RHz3frslhLfXdVJve47lUsrmqLjfdTrZ41tlQ0,687
31
32
  yomitoku/models/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
33
  yomitoku/models/layers/activate.py,sha256=S54GPssZBMloM2oFAXeDVMmBBZOWyjwU98Niq758txE,1244
33
34
  yomitoku/models/layers/dbnet_feature_attention.py,sha256=Vpp_PiLVuI7Zs30TTg4RNRn16KTb81ewonADpUHd4aE,6060
34
35
  yomitoku/models/layers/parseq_transformer.py,sha256=33eroJf8rmgIptP-NpZLJMhG7XOTwV4rXsq674VrKnU,6704
35
- yomitoku/models/layers/rtdetr_backbone.py,sha256=wn1jOI8oB0V4GWKaCB-WCNMJ7CqmjvkeO8v2GB3gtAQ,10054
36
- yomitoku/models/layers/rtdetr_hybrid_encoder.py,sha256=fCXNw8DmD5rXAQZkiVy2Ohj3v19TFUTUCohGlj7V408,14321
37
- yomitoku/models/layers/rtdetrv2_decoder.py,sha256=iAxZ-TknFuEcI1B6-UU8o0rvWnuBr20idqTWCpc-u7A,28456
36
+ yomitoku/models/layers/rtdetr_backbone.py,sha256=VOWFW7XFfJl4cvPaupqqP4-I-YHdwlVltQEgliD69As,9904
37
+ yomitoku/models/layers/rtdetr_hybrid_encoder.py,sha256=ZnpEzJLzHgu_hrx7YK6myXZ4F1CDHRM501RbAPQdzdQ,14125
38
+ yomitoku/models/layers/rtdetrv2_decoder.py,sha256=ggUwTdWpBfyYHnZuLx8vyH8n0XfZkQFtxgpY-1YI2sI,28070
39
+ yomitoku/onnx/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
40
  yomitoku/postprocessor/__init__.py,sha256=W4vUuqBaFtH5dlSBIYgyaCroGLMjpV6RrNGIBQ8NFVw,243
39
41
  yomitoku/postprocessor/dbnet_postporcessor.py,sha256=o_y8b5REd2dFEdIpRcr6o-XBfOCHo9rBYGwokP_uhTc,4948
40
42
  yomitoku/postprocessor/parseq_tokenizer.py,sha256=e89_g_bc4Au3SchuxoJfJNATJTxFmVYetzXyAzPWm28,4315
41
- yomitoku/postprocessor/rtdetr_postprocessor.py,sha256=ADT620nCs_OvHmoWwH01ylmbHuCNtVUN8pVYYq-vy0Q,4065
43
+ yomitoku/postprocessor/rtdetr_postprocessor.py,sha256=TCv1t1zCxg2rSirsLm4sXlaltGubH-roVdEqnUoRs-8,3905
42
44
  yomitoku/resource/MPLUS1p-Medium.ttf,sha256=KLL1KkCumIBkgQtx1n4SffdaFuCNffThktEAbkB1OU8,1758908
43
45
  yomitoku/resource/charset.txt,sha256=sU91kSi-9Wk4733bCXy4j_UDmvcsj96sHOq1ppUJlOY,21672
44
46
  yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
47
  yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
46
48
  yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
47
- yomitoku/utils/misc.py,sha256=2Eyy7-9K_h4Mal1zGXq6OlxubfNzhS0mEYwn_xt7xl8,2497
48
- yomitoku/utils/visualizer.py,sha256=2pSmbhUPylzVVJ0bXtGDoNmMdArAByab4Py7Xavvs_A,5230
49
- yomitoku-0.5.3.dist-info/METADATA,sha256=qLwgVjKd3AELsZu8k1JGbX2-VHgHq3Tn-eCaw11c0_s,7819
50
- yomitoku-0.5.3.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
51
- yomitoku-0.5.3.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
52
- yomitoku-0.5.3.dist-info/RECORD,,
49
+ yomitoku/utils/misc.py,sha256=FbwPLeIYYBvNf9wQh2RoEonTM5BF7_IwaEqmRsYHKA8,2673
50
+ yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
51
+ yomitoku-0.7.0.dist-info/METADATA,sha256=Yvpxy_oWORSz_db4yzledIhFHbuQbORz0DrMisf59zQ,8488
52
+ yomitoku-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
53
+ yomitoku-0.7.0.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
54
+ yomitoku-0.7.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.26.3
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any