yomitoku 0.4.0.post1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. yomitoku/__init__.py +20 -0
  2. yomitoku/base.py +136 -0
  3. yomitoku/cli/__init__.py +0 -0
  4. yomitoku/cli/main.py +230 -0
  5. yomitoku/configs/__init__.py +13 -0
  6. yomitoku/configs/cfg_layout_parser_rtdtrv2.py +89 -0
  7. yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py +80 -0
  8. yomitoku/configs/cfg_text_detector_dbnet.py +49 -0
  9. yomitoku/configs/cfg_text_recognizer_parseq.py +51 -0
  10. yomitoku/constants.py +32 -0
  11. yomitoku/data/__init__.py +3 -0
  12. yomitoku/data/dataset.py +40 -0
  13. yomitoku/data/functions.py +279 -0
  14. yomitoku/document_analyzer.py +315 -0
  15. yomitoku/export/__init__.py +6 -0
  16. yomitoku/export/export_csv.py +71 -0
  17. yomitoku/export/export_html.py +188 -0
  18. yomitoku/export/export_json.py +34 -0
  19. yomitoku/export/export_markdown.py +145 -0
  20. yomitoku/layout_analyzer.py +66 -0
  21. yomitoku/layout_parser.py +189 -0
  22. yomitoku/models/__init__.py +9 -0
  23. yomitoku/models/dbnet_plus.py +272 -0
  24. yomitoku/models/layers/__init__.py +0 -0
  25. yomitoku/models/layers/activate.py +38 -0
  26. yomitoku/models/layers/dbnet_feature_attention.py +160 -0
  27. yomitoku/models/layers/parseq_transformer.py +218 -0
  28. yomitoku/models/layers/rtdetr_backbone.py +333 -0
  29. yomitoku/models/layers/rtdetr_hybrid_encoder.py +433 -0
  30. yomitoku/models/layers/rtdetrv2_decoder.py +811 -0
  31. yomitoku/models/parseq.py +243 -0
  32. yomitoku/models/rtdetr.py +22 -0
  33. yomitoku/ocr.py +87 -0
  34. yomitoku/postprocessor/__init__.py +9 -0
  35. yomitoku/postprocessor/dbnet_postporcessor.py +137 -0
  36. yomitoku/postprocessor/parseq_tokenizer.py +128 -0
  37. yomitoku/postprocessor/rtdetr_postprocessor.py +107 -0
  38. yomitoku/reading_order.py +214 -0
  39. yomitoku/resource/MPLUS1p-Medium.ttf +0 -0
  40. yomitoku/resource/charset.txt +1 -0
  41. yomitoku/table_structure_recognizer.py +244 -0
  42. yomitoku/text_detector.py +103 -0
  43. yomitoku/text_recognizer.py +128 -0
  44. yomitoku/utils/__init__.py +0 -0
  45. yomitoku/utils/graph.py +20 -0
  46. yomitoku/utils/logger.py +15 -0
  47. yomitoku/utils/misc.py +102 -0
  48. yomitoku/utils/visualizer.py +179 -0
  49. yomitoku-0.4.0.post1.dev0.dist-info/METADATA +127 -0
  50. yomitoku-0.4.0.post1.dev0.dist-info/RECORD +52 -0
  51. yomitoku-0.4.0.post1.dev0.dist-info/WHEEL +4 -0
  52. yomitoku-0.4.0.post1.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,333 @@
1
+ """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2
+ """
3
+
4
+ from collections import OrderedDict
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
+
10
+ from .activate import get_activation
11
+
12
+ ResNet_cfg = {
13
+ 18: [2, 2, 2, 2],
14
+ 34: [3, 4, 6, 3],
15
+ 50: [3, 4, 6, 3],
16
+ 101: [3, 4, 23, 3],
17
+ # 152: [3, 8, 36, 3],
18
+ }
19
+
20
+
21
+ class ConvNormLayer(nn.Module):
22
+ def __init__(
23
+ self,
24
+ ch_in,
25
+ ch_out,
26
+ kernel_size,
27
+ stride,
28
+ padding=None,
29
+ bias=False,
30
+ act=None,
31
+ ):
32
+ super().__init__()
33
+ self.conv = nn.Conv2d(
34
+ ch_in,
35
+ ch_out,
36
+ kernel_size,
37
+ stride,
38
+ padding=(kernel_size - 1) // 2 if padding is None else padding,
39
+ bias=bias,
40
+ )
41
+ self.norm = nn.BatchNorm2d(ch_out)
42
+ self.act = get_activation(act)
43
+
44
+ def forward(self, x):
45
+ return self.act(self.norm(self.conv(x)))
46
+
47
+
48
+ class BasicBlock(nn.Module):
49
+ expansion = 1
50
+
51
+ def __init__(
52
+ self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"
53
+ ):
54
+ super().__init__()
55
+
56
+ self.shortcut = shortcut
57
+
58
+ if not shortcut:
59
+ if variant == "d" and stride == 2:
60
+ self.short = nn.Sequential(
61
+ OrderedDict(
62
+ [
63
+ ("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
64
+ ("conv", ConvNormLayer(ch_in, ch_out, 1, 1)),
65
+ ]
66
+ )
67
+ )
68
+ else:
69
+ self.short = ConvNormLayer(ch_in, ch_out, 1, stride)
70
+
71
+ self.branch2a = ConvNormLayer(ch_in, ch_out, 3, stride, act=act)
72
+ self.branch2b = ConvNormLayer(ch_out, ch_out, 3, 1, act=None)
73
+ self.act = nn.Identity() if act is None else get_activation(act)
74
+
75
+ def forward(self, x):
76
+ out = self.branch2a(x)
77
+ out = self.branch2b(out)
78
+ if self.shortcut:
79
+ short = x
80
+ else:
81
+ short = self.short(x)
82
+
83
+ out = out + short
84
+ out = self.act(out)
85
+
86
+ return out
87
+
88
+
89
+ class BottleNeck(nn.Module):
90
+ expansion = 4
91
+
92
+ def __init__(
93
+ self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"
94
+ ):
95
+ super().__init__()
96
+
97
+ if variant == "a":
98
+ stride1, stride2 = stride, 1
99
+ else:
100
+ stride1, stride2 = 1, stride
101
+
102
+ width = ch_out
103
+
104
+ self.branch2a = ConvNormLayer(ch_in, width, 1, stride1, act=act)
105
+ self.branch2b = ConvNormLayer(width, width, 3, stride2, act=act)
106
+ self.branch2c = ConvNormLayer(width, ch_out * self.expansion, 1, 1)
107
+
108
+ self.shortcut = shortcut
109
+ if not shortcut:
110
+ if variant == "d" and stride == 2:
111
+ self.short = nn.Sequential(
112
+ OrderedDict(
113
+ [
114
+ ("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
115
+ (
116
+ "conv",
117
+ ConvNormLayer(
118
+ ch_in, ch_out * self.expansion, 1, 1
119
+ ),
120
+ ),
121
+ ]
122
+ )
123
+ )
124
+ else:
125
+ self.short = ConvNormLayer(
126
+ ch_in, ch_out * self.expansion, 1, stride
127
+ )
128
+
129
+ self.act = nn.Identity() if act is None else get_activation(act)
130
+
131
+ def forward(self, x):
132
+ out = self.branch2a(x)
133
+ out = self.branch2b(out)
134
+ out = self.branch2c(out)
135
+
136
+ if self.shortcut:
137
+ short = x
138
+ else:
139
+ short = self.short(x)
140
+
141
+ out = out + short
142
+ out = self.act(out)
143
+
144
+ return out
145
+
146
+
147
+ class Blocks(nn.Module):
148
+ def __init__(
149
+ self, block, ch_in, ch_out, count, stage_num, act="relu", variant="b"
150
+ ):
151
+ super().__init__()
152
+
153
+ self.blocks = nn.ModuleList()
154
+ for i in range(count):
155
+ self.blocks.append(
156
+ block(
157
+ ch_in,
158
+ ch_out,
159
+ stride=2 if i == 0 and stage_num != 2 else 1,
160
+ shortcut=False if i == 0 else True,
161
+ variant=variant,
162
+ act=act,
163
+ )
164
+ )
165
+
166
+ if i == 0:
167
+ ch_in = ch_out * block.expansion
168
+
169
+ def forward(self, x):
170
+ out = x
171
+ for block in self.blocks:
172
+ out = block(out)
173
+ return out
174
+
175
+
176
+ class FrozenBatchNorm2d(nn.Module):
177
+ """copy and modified from https://github.com/facebookresearch/detr/blob/master/models/backbone.py
178
+ BatchNorm2d where the batch statistics and the affine parameters are fixed.
179
+ Copy-paste from torchvision.misc.ops with added eps before rqsrt,
180
+ without which any other models than torchvision.models.resnet[18,34,50,101]
181
+ produce nans.
182
+ """
183
+
184
+ def __init__(self, num_features, eps=1e-5):
185
+ super(FrozenBatchNorm2d, self).__init__()
186
+ n = num_features
187
+ self.register_buffer("weight", torch.ones(n))
188
+ self.register_buffer("bias", torch.zeros(n))
189
+ self.register_buffer("running_mean", torch.zeros(n))
190
+ self.register_buffer("running_var", torch.ones(n))
191
+ self.eps = eps
192
+ self.num_features = n
193
+
194
+ def _load_from_state_dict(
195
+ self,
196
+ state_dict,
197
+ prefix,
198
+ local_metadata,
199
+ strict,
200
+ missing_keys,
201
+ unexpected_keys,
202
+ error_msgs,
203
+ ):
204
+ num_batches_tracked_key = prefix + "num_batches_tracked"
205
+ if num_batches_tracked_key in state_dict:
206
+ del state_dict[num_batches_tracked_key]
207
+
208
+ super(FrozenBatchNorm2d, self)._load_from_state_dict(
209
+ state_dict,
210
+ prefix,
211
+ local_metadata,
212
+ strict,
213
+ missing_keys,
214
+ unexpected_keys,
215
+ error_msgs,
216
+ )
217
+
218
+ def forward(self, x):
219
+ # move reshapes to the beginning
220
+ # to make it fuser-friendly
221
+ w = self.weight.reshape(1, -1, 1, 1)
222
+ b = self.bias.reshape(1, -1, 1, 1)
223
+ rv = self.running_var.reshape(1, -1, 1, 1)
224
+ rm = self.running_mean.reshape(1, -1, 1, 1)
225
+ scale = w * (rv + self.eps).rsqrt()
226
+ bias = b - rm * scale
227
+ return x * scale + bias
228
+
229
+ def extra_repr(self):
230
+ return "{num_features}, eps={eps}".format(**self.__dict__)
231
+
232
+
233
+ def freeze_batch_norm2d(module: nn.Module) -> nn.Module:
234
+ if isinstance(module, nn.BatchNorm2d):
235
+ module = FrozenBatchNorm2d(module.num_features)
236
+ else:
237
+ for name, child in module.named_children():
238
+ _child = freeze_batch_norm2d(child)
239
+ if _child is not child:
240
+ setattr(module, name, _child)
241
+ return module
242
+
243
+
244
+ class PResNet(nn.Module):
245
+ def __init__(
246
+ self,
247
+ depth,
248
+ variant="d",
249
+ num_stages=4,
250
+ return_idx=[0, 1, 2, 3],
251
+ act="relu",
252
+ freeze_at=-1,
253
+ freeze_norm=True,
254
+ ):
255
+ super().__init__()
256
+
257
+ block_nums = ResNet_cfg[depth]
258
+ ch_in = 64
259
+ if variant in ["c", "d"]:
260
+ conv_def = [
261
+ [3, ch_in // 2, 3, 2, "conv1_1"],
262
+ [ch_in // 2, ch_in // 2, 3, 1, "conv1_2"],
263
+ [ch_in // 2, ch_in, 3, 1, "conv1_3"],
264
+ ]
265
+ else:
266
+ conv_def = [[3, ch_in, 7, 2, "conv1_1"]]
267
+
268
+ self.conv1 = nn.Sequential(
269
+ OrderedDict(
270
+ [
271
+ (name, ConvNormLayer(cin, cout, k, s, act=act))
272
+ for cin, cout, k, s, name in conv_def
273
+ ]
274
+ )
275
+ )
276
+
277
+ ch_out_list = [64, 128, 256, 512]
278
+ block = BottleNeck if depth >= 50 else BasicBlock
279
+
280
+ _out_channels = [block.expansion * v for v in ch_out_list]
281
+ _out_strides = [4, 8, 16, 32]
282
+
283
+ self.res_layers = nn.ModuleList()
284
+ for i in range(num_stages):
285
+ stage_num = i + 2
286
+ self.res_layers.append(
287
+ Blocks(
288
+ block,
289
+ ch_in,
290
+ ch_out_list[i],
291
+ block_nums[i],
292
+ stage_num,
293
+ act=act,
294
+ variant=variant,
295
+ )
296
+ )
297
+ ch_in = _out_channels[i]
298
+
299
+ self.return_idx = return_idx
300
+ self.out_channels = [_out_channels[_i] for _i in return_idx]
301
+ self.out_strides = [_out_strides[_i] for _i in return_idx]
302
+
303
+ if freeze_at >= 0:
304
+ self._freeze_parameters(self.conv1)
305
+ for i in range(min(freeze_at, num_stages)):
306
+ self._freeze_parameters(self.res_layers[i])
307
+
308
+ if freeze_norm:
309
+ self._freeze_norm(self)
310
+
311
+ def _freeze_parameters(self, m: nn.Module):
312
+ for p in m.parameters():
313
+ p.requires_grad = False
314
+
315
+ def _freeze_norm(self, m: nn.Module):
316
+ if isinstance(m, nn.BatchNorm2d):
317
+ m = FrozenBatchNorm2d(m.num_features)
318
+ else:
319
+ for name, child in m.named_children():
320
+ _child = self._freeze_norm(child)
321
+ if _child is not child:
322
+ setattr(m, name, _child)
323
+ return m
324
+
325
+ def forward(self, x):
326
+ conv1 = self.conv1(x)
327
+ x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
328
+ outs = []
329
+ for idx, stage in enumerate(self.res_layers):
330
+ x = stage(x)
331
+ if idx in self.return_idx:
332
+ outs.append(x)
333
+ return outs