yomitoku 0.4.0.post1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yomitoku/__init__.py +20 -0
- yomitoku/base.py +136 -0
- yomitoku/cli/__init__.py +0 -0
- yomitoku/cli/main.py +230 -0
- yomitoku/configs/__init__.py +13 -0
- yomitoku/configs/cfg_layout_parser_rtdtrv2.py +89 -0
- yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py +80 -0
- yomitoku/configs/cfg_text_detector_dbnet.py +49 -0
- yomitoku/configs/cfg_text_recognizer_parseq.py +51 -0
- yomitoku/constants.py +32 -0
- yomitoku/data/__init__.py +3 -0
- yomitoku/data/dataset.py +40 -0
- yomitoku/data/functions.py +279 -0
- yomitoku/document_analyzer.py +315 -0
- yomitoku/export/__init__.py +6 -0
- yomitoku/export/export_csv.py +71 -0
- yomitoku/export/export_html.py +188 -0
- yomitoku/export/export_json.py +34 -0
- yomitoku/export/export_markdown.py +145 -0
- yomitoku/layout_analyzer.py +66 -0
- yomitoku/layout_parser.py +189 -0
- yomitoku/models/__init__.py +9 -0
- yomitoku/models/dbnet_plus.py +272 -0
- yomitoku/models/layers/__init__.py +0 -0
- yomitoku/models/layers/activate.py +38 -0
- yomitoku/models/layers/dbnet_feature_attention.py +160 -0
- yomitoku/models/layers/parseq_transformer.py +218 -0
- yomitoku/models/layers/rtdetr_backbone.py +333 -0
- yomitoku/models/layers/rtdetr_hybrid_encoder.py +433 -0
- yomitoku/models/layers/rtdetrv2_decoder.py +811 -0
- yomitoku/models/parseq.py +243 -0
- yomitoku/models/rtdetr.py +22 -0
- yomitoku/ocr.py +87 -0
- yomitoku/postprocessor/__init__.py +9 -0
- yomitoku/postprocessor/dbnet_postporcessor.py +137 -0
- yomitoku/postprocessor/parseq_tokenizer.py +128 -0
- yomitoku/postprocessor/rtdetr_postprocessor.py +107 -0
- yomitoku/reading_order.py +214 -0
- yomitoku/resource/MPLUS1p-Medium.ttf +0 -0
- yomitoku/resource/charset.txt +1 -0
- yomitoku/table_structure_recognizer.py +244 -0
- yomitoku/text_detector.py +103 -0
- yomitoku/text_recognizer.py +128 -0
- yomitoku/utils/__init__.py +0 -0
- yomitoku/utils/graph.py +20 -0
- yomitoku/utils/logger.py +15 -0
- yomitoku/utils/misc.py +102 -0
- yomitoku/utils/visualizer.py +179 -0
- yomitoku-0.4.0.post1.dev0.dist-info/METADATA +127 -0
- yomitoku-0.4.0.post1.dev0.dist-info/RECORD +52 -0
- yomitoku-0.4.0.post1.dev0.dist-info/WHEEL +4 -0
- yomitoku-0.4.0.post1.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,333 @@
|
|
1
|
+
"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
|
2
|
+
"""
|
3
|
+
|
4
|
+
from collections import OrderedDict
|
5
|
+
|
6
|
+
import torch
|
7
|
+
import torch.nn as nn
|
8
|
+
import torch.nn.functional as F
|
9
|
+
|
10
|
+
from .activate import get_activation
|
11
|
+
|
12
|
+
ResNet_cfg = {
|
13
|
+
18: [2, 2, 2, 2],
|
14
|
+
34: [3, 4, 6, 3],
|
15
|
+
50: [3, 4, 6, 3],
|
16
|
+
101: [3, 4, 23, 3],
|
17
|
+
# 152: [3, 8, 36, 3],
|
18
|
+
}
|
19
|
+
|
20
|
+
|
21
|
+
class ConvNormLayer(nn.Module):
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
ch_in,
|
25
|
+
ch_out,
|
26
|
+
kernel_size,
|
27
|
+
stride,
|
28
|
+
padding=None,
|
29
|
+
bias=False,
|
30
|
+
act=None,
|
31
|
+
):
|
32
|
+
super().__init__()
|
33
|
+
self.conv = nn.Conv2d(
|
34
|
+
ch_in,
|
35
|
+
ch_out,
|
36
|
+
kernel_size,
|
37
|
+
stride,
|
38
|
+
padding=(kernel_size - 1) // 2 if padding is None else padding,
|
39
|
+
bias=bias,
|
40
|
+
)
|
41
|
+
self.norm = nn.BatchNorm2d(ch_out)
|
42
|
+
self.act = get_activation(act)
|
43
|
+
|
44
|
+
def forward(self, x):
|
45
|
+
return self.act(self.norm(self.conv(x)))
|
46
|
+
|
47
|
+
|
48
|
+
class BasicBlock(nn.Module):
|
49
|
+
expansion = 1
|
50
|
+
|
51
|
+
def __init__(
|
52
|
+
self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"
|
53
|
+
):
|
54
|
+
super().__init__()
|
55
|
+
|
56
|
+
self.shortcut = shortcut
|
57
|
+
|
58
|
+
if not shortcut:
|
59
|
+
if variant == "d" and stride == 2:
|
60
|
+
self.short = nn.Sequential(
|
61
|
+
OrderedDict(
|
62
|
+
[
|
63
|
+
("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
|
64
|
+
("conv", ConvNormLayer(ch_in, ch_out, 1, 1)),
|
65
|
+
]
|
66
|
+
)
|
67
|
+
)
|
68
|
+
else:
|
69
|
+
self.short = ConvNormLayer(ch_in, ch_out, 1, stride)
|
70
|
+
|
71
|
+
self.branch2a = ConvNormLayer(ch_in, ch_out, 3, stride, act=act)
|
72
|
+
self.branch2b = ConvNormLayer(ch_out, ch_out, 3, 1, act=None)
|
73
|
+
self.act = nn.Identity() if act is None else get_activation(act)
|
74
|
+
|
75
|
+
def forward(self, x):
|
76
|
+
out = self.branch2a(x)
|
77
|
+
out = self.branch2b(out)
|
78
|
+
if self.shortcut:
|
79
|
+
short = x
|
80
|
+
else:
|
81
|
+
short = self.short(x)
|
82
|
+
|
83
|
+
out = out + short
|
84
|
+
out = self.act(out)
|
85
|
+
|
86
|
+
return out
|
87
|
+
|
88
|
+
|
89
|
+
class BottleNeck(nn.Module):
|
90
|
+
expansion = 4
|
91
|
+
|
92
|
+
def __init__(
|
93
|
+
self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"
|
94
|
+
):
|
95
|
+
super().__init__()
|
96
|
+
|
97
|
+
if variant == "a":
|
98
|
+
stride1, stride2 = stride, 1
|
99
|
+
else:
|
100
|
+
stride1, stride2 = 1, stride
|
101
|
+
|
102
|
+
width = ch_out
|
103
|
+
|
104
|
+
self.branch2a = ConvNormLayer(ch_in, width, 1, stride1, act=act)
|
105
|
+
self.branch2b = ConvNormLayer(width, width, 3, stride2, act=act)
|
106
|
+
self.branch2c = ConvNormLayer(width, ch_out * self.expansion, 1, 1)
|
107
|
+
|
108
|
+
self.shortcut = shortcut
|
109
|
+
if not shortcut:
|
110
|
+
if variant == "d" and stride == 2:
|
111
|
+
self.short = nn.Sequential(
|
112
|
+
OrderedDict(
|
113
|
+
[
|
114
|
+
("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
|
115
|
+
(
|
116
|
+
"conv",
|
117
|
+
ConvNormLayer(
|
118
|
+
ch_in, ch_out * self.expansion, 1, 1
|
119
|
+
),
|
120
|
+
),
|
121
|
+
]
|
122
|
+
)
|
123
|
+
)
|
124
|
+
else:
|
125
|
+
self.short = ConvNormLayer(
|
126
|
+
ch_in, ch_out * self.expansion, 1, stride
|
127
|
+
)
|
128
|
+
|
129
|
+
self.act = nn.Identity() if act is None else get_activation(act)
|
130
|
+
|
131
|
+
def forward(self, x):
|
132
|
+
out = self.branch2a(x)
|
133
|
+
out = self.branch2b(out)
|
134
|
+
out = self.branch2c(out)
|
135
|
+
|
136
|
+
if self.shortcut:
|
137
|
+
short = x
|
138
|
+
else:
|
139
|
+
short = self.short(x)
|
140
|
+
|
141
|
+
out = out + short
|
142
|
+
out = self.act(out)
|
143
|
+
|
144
|
+
return out
|
145
|
+
|
146
|
+
|
147
|
+
class Blocks(nn.Module):
|
148
|
+
def __init__(
|
149
|
+
self, block, ch_in, ch_out, count, stage_num, act="relu", variant="b"
|
150
|
+
):
|
151
|
+
super().__init__()
|
152
|
+
|
153
|
+
self.blocks = nn.ModuleList()
|
154
|
+
for i in range(count):
|
155
|
+
self.blocks.append(
|
156
|
+
block(
|
157
|
+
ch_in,
|
158
|
+
ch_out,
|
159
|
+
stride=2 if i == 0 and stage_num != 2 else 1,
|
160
|
+
shortcut=False if i == 0 else True,
|
161
|
+
variant=variant,
|
162
|
+
act=act,
|
163
|
+
)
|
164
|
+
)
|
165
|
+
|
166
|
+
if i == 0:
|
167
|
+
ch_in = ch_out * block.expansion
|
168
|
+
|
169
|
+
def forward(self, x):
|
170
|
+
out = x
|
171
|
+
for block in self.blocks:
|
172
|
+
out = block(out)
|
173
|
+
return out
|
174
|
+
|
175
|
+
|
176
|
+
class FrozenBatchNorm2d(nn.Module):
|
177
|
+
"""copy and modified from https://github.com/facebookresearch/detr/blob/master/models/backbone.py
|
178
|
+
BatchNorm2d where the batch statistics and the affine parameters are fixed.
|
179
|
+
Copy-paste from torchvision.misc.ops with added eps before rqsrt,
|
180
|
+
without which any other models than torchvision.models.resnet[18,34,50,101]
|
181
|
+
produce nans.
|
182
|
+
"""
|
183
|
+
|
184
|
+
def __init__(self, num_features, eps=1e-5):
|
185
|
+
super(FrozenBatchNorm2d, self).__init__()
|
186
|
+
n = num_features
|
187
|
+
self.register_buffer("weight", torch.ones(n))
|
188
|
+
self.register_buffer("bias", torch.zeros(n))
|
189
|
+
self.register_buffer("running_mean", torch.zeros(n))
|
190
|
+
self.register_buffer("running_var", torch.ones(n))
|
191
|
+
self.eps = eps
|
192
|
+
self.num_features = n
|
193
|
+
|
194
|
+
def _load_from_state_dict(
|
195
|
+
self,
|
196
|
+
state_dict,
|
197
|
+
prefix,
|
198
|
+
local_metadata,
|
199
|
+
strict,
|
200
|
+
missing_keys,
|
201
|
+
unexpected_keys,
|
202
|
+
error_msgs,
|
203
|
+
):
|
204
|
+
num_batches_tracked_key = prefix + "num_batches_tracked"
|
205
|
+
if num_batches_tracked_key in state_dict:
|
206
|
+
del state_dict[num_batches_tracked_key]
|
207
|
+
|
208
|
+
super(FrozenBatchNorm2d, self)._load_from_state_dict(
|
209
|
+
state_dict,
|
210
|
+
prefix,
|
211
|
+
local_metadata,
|
212
|
+
strict,
|
213
|
+
missing_keys,
|
214
|
+
unexpected_keys,
|
215
|
+
error_msgs,
|
216
|
+
)
|
217
|
+
|
218
|
+
def forward(self, x):
|
219
|
+
# move reshapes to the beginning
|
220
|
+
# to make it fuser-friendly
|
221
|
+
w = self.weight.reshape(1, -1, 1, 1)
|
222
|
+
b = self.bias.reshape(1, -1, 1, 1)
|
223
|
+
rv = self.running_var.reshape(1, -1, 1, 1)
|
224
|
+
rm = self.running_mean.reshape(1, -1, 1, 1)
|
225
|
+
scale = w * (rv + self.eps).rsqrt()
|
226
|
+
bias = b - rm * scale
|
227
|
+
return x * scale + bias
|
228
|
+
|
229
|
+
def extra_repr(self):
|
230
|
+
return "{num_features}, eps={eps}".format(**self.__dict__)
|
231
|
+
|
232
|
+
|
233
|
+
def freeze_batch_norm2d(module: nn.Module) -> nn.Module:
|
234
|
+
if isinstance(module, nn.BatchNorm2d):
|
235
|
+
module = FrozenBatchNorm2d(module.num_features)
|
236
|
+
else:
|
237
|
+
for name, child in module.named_children():
|
238
|
+
_child = freeze_batch_norm2d(child)
|
239
|
+
if _child is not child:
|
240
|
+
setattr(module, name, _child)
|
241
|
+
return module
|
242
|
+
|
243
|
+
|
244
|
+
class PResNet(nn.Module):
|
245
|
+
def __init__(
|
246
|
+
self,
|
247
|
+
depth,
|
248
|
+
variant="d",
|
249
|
+
num_stages=4,
|
250
|
+
return_idx=[0, 1, 2, 3],
|
251
|
+
act="relu",
|
252
|
+
freeze_at=-1,
|
253
|
+
freeze_norm=True,
|
254
|
+
):
|
255
|
+
super().__init__()
|
256
|
+
|
257
|
+
block_nums = ResNet_cfg[depth]
|
258
|
+
ch_in = 64
|
259
|
+
if variant in ["c", "d"]:
|
260
|
+
conv_def = [
|
261
|
+
[3, ch_in // 2, 3, 2, "conv1_1"],
|
262
|
+
[ch_in // 2, ch_in // 2, 3, 1, "conv1_2"],
|
263
|
+
[ch_in // 2, ch_in, 3, 1, "conv1_3"],
|
264
|
+
]
|
265
|
+
else:
|
266
|
+
conv_def = [[3, ch_in, 7, 2, "conv1_1"]]
|
267
|
+
|
268
|
+
self.conv1 = nn.Sequential(
|
269
|
+
OrderedDict(
|
270
|
+
[
|
271
|
+
(name, ConvNormLayer(cin, cout, k, s, act=act))
|
272
|
+
for cin, cout, k, s, name in conv_def
|
273
|
+
]
|
274
|
+
)
|
275
|
+
)
|
276
|
+
|
277
|
+
ch_out_list = [64, 128, 256, 512]
|
278
|
+
block = BottleNeck if depth >= 50 else BasicBlock
|
279
|
+
|
280
|
+
_out_channels = [block.expansion * v for v in ch_out_list]
|
281
|
+
_out_strides = [4, 8, 16, 32]
|
282
|
+
|
283
|
+
self.res_layers = nn.ModuleList()
|
284
|
+
for i in range(num_stages):
|
285
|
+
stage_num = i + 2
|
286
|
+
self.res_layers.append(
|
287
|
+
Blocks(
|
288
|
+
block,
|
289
|
+
ch_in,
|
290
|
+
ch_out_list[i],
|
291
|
+
block_nums[i],
|
292
|
+
stage_num,
|
293
|
+
act=act,
|
294
|
+
variant=variant,
|
295
|
+
)
|
296
|
+
)
|
297
|
+
ch_in = _out_channels[i]
|
298
|
+
|
299
|
+
self.return_idx = return_idx
|
300
|
+
self.out_channels = [_out_channels[_i] for _i in return_idx]
|
301
|
+
self.out_strides = [_out_strides[_i] for _i in return_idx]
|
302
|
+
|
303
|
+
if freeze_at >= 0:
|
304
|
+
self._freeze_parameters(self.conv1)
|
305
|
+
for i in range(min(freeze_at, num_stages)):
|
306
|
+
self._freeze_parameters(self.res_layers[i])
|
307
|
+
|
308
|
+
if freeze_norm:
|
309
|
+
self._freeze_norm(self)
|
310
|
+
|
311
|
+
def _freeze_parameters(self, m: nn.Module):
|
312
|
+
for p in m.parameters():
|
313
|
+
p.requires_grad = False
|
314
|
+
|
315
|
+
def _freeze_norm(self, m: nn.Module):
|
316
|
+
if isinstance(m, nn.BatchNorm2d):
|
317
|
+
m = FrozenBatchNorm2d(m.num_features)
|
318
|
+
else:
|
319
|
+
for name, child in m.named_children():
|
320
|
+
_child = self._freeze_norm(child)
|
321
|
+
if _child is not child:
|
322
|
+
setattr(m, name, _child)
|
323
|
+
return m
|
324
|
+
|
325
|
+
def forward(self, x):
|
326
|
+
conv1 = self.conv1(x)
|
327
|
+
x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
|
328
|
+
outs = []
|
329
|
+
for idx, stage in enumerate(self.res_layers):
|
330
|
+
x = stage(x)
|
331
|
+
if idx in self.return_idx:
|
332
|
+
outs.append(x)
|
333
|
+
return outs
|