xfmr-zem 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. xfmr_zem/cli.py +32 -3
  2. xfmr_zem/client.py +59 -8
  3. xfmr_zem/server.py +21 -4
  4. xfmr_zem/servers/data_juicer/server.py +1 -1
  5. xfmr_zem/servers/instruction_gen/server.py +1 -1
  6. xfmr_zem/servers/io/server.py +1 -1
  7. xfmr_zem/servers/llm/parameters.yml +10 -0
  8. xfmr_zem/servers/nemo_curator/server.py +1 -1
  9. xfmr_zem/servers/ocr/deepdoc_vietocr/__init__.py +90 -0
  10. xfmr_zem/servers/ocr/deepdoc_vietocr/implementations.py +1286 -0
  11. xfmr_zem/servers/ocr/deepdoc_vietocr/layout_recognizer.py +562 -0
  12. xfmr_zem/servers/ocr/deepdoc_vietocr/ocr.py +512 -0
  13. xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/.gitattributes +35 -0
  14. xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/README.md +5 -0
  15. xfmr_zem/servers/ocr/deepdoc_vietocr/onnx/ocr.res +6623 -0
  16. xfmr_zem/servers/ocr/deepdoc_vietocr/operators.py +725 -0
  17. xfmr_zem/servers/ocr/deepdoc_vietocr/phases.py +191 -0
  18. xfmr_zem/servers/ocr/deepdoc_vietocr/pipeline.py +561 -0
  19. xfmr_zem/servers/ocr/deepdoc_vietocr/postprocess.py +370 -0
  20. xfmr_zem/servers/ocr/deepdoc_vietocr/recognizer.py +436 -0
  21. xfmr_zem/servers/ocr/deepdoc_vietocr/table_structure_recognizer.py +569 -0
  22. xfmr_zem/servers/ocr/deepdoc_vietocr/utils/__init__.py +81 -0
  23. xfmr_zem/servers/ocr/deepdoc_vietocr/utils/file_utils.py +246 -0
  24. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/__init__.py +0 -0
  25. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/base.yml +58 -0
  26. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/config/vgg-seq2seq.yml +38 -0
  27. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/__init__.py +0 -0
  28. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/cnn.py +25 -0
  29. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/backbone/vgg.py +51 -0
  30. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/seqmodel/seq2seq.py +175 -0
  31. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/transformerocr.py +29 -0
  32. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/model/vocab.py +36 -0
  33. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/config.py +37 -0
  34. xfmr_zem/servers/ocr/deepdoc_vietocr/vietocr/tool/translate.py +111 -0
  35. xfmr_zem/servers/ocr/engines.py +242 -0
  36. xfmr_zem/servers/ocr/install_models.py +63 -0
  37. xfmr_zem/servers/ocr/parameters.yml +4 -0
  38. xfmr_zem/servers/ocr/server.py +44 -0
  39. xfmr_zem/servers/profiler/parameters.yml +4 -0
  40. xfmr_zem/servers/sinks/parameters.yml +6 -0
  41. xfmr_zem/servers/unstructured/parameters.yml +6 -0
  42. xfmr_zem/servers/unstructured/server.py +62 -0
  43. xfmr_zem/zenml_wrapper.py +20 -7
  44. {xfmr_zem-0.2.2.dist-info → xfmr_zem-0.2.5.dist-info}/METADATA +19 -1
  45. xfmr_zem-0.2.5.dist-info/RECORD +58 -0
  46. xfmr_zem-0.2.2.dist-info/RECORD +0 -23
  47. /xfmr_zem/servers/data_juicer/{parameter.yaml → parameters.yml} +0 -0
  48. /xfmr_zem/servers/instruction_gen/{parameter.yaml → parameters.yml} +0 -0
  49. /xfmr_zem/servers/io/{parameter.yaml → parameters.yml} +0 -0
  50. /xfmr_zem/servers/nemo_curator/{parameter.yaml → parameters.yml} +0 -0
  51. {xfmr_zem-0.2.2.dist-info → xfmr_zem-0.2.5.dist-info}/WHEEL +0 -0
  52. {xfmr_zem-0.2.2.dist-info → xfmr_zem-0.2.5.dist-info}/entry_points.txt +0 -0
  53. {xfmr_zem-0.2.2.dist-info → xfmr_zem-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,725 @@
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ import logging
18
+ import sys
19
+ import six
20
+ import cv2
21
+ import numpy as np
22
+ import math
23
+ from PIL import Image
24
+
25
+
26
+ class DecodeImage:
27
+ """ decode image """
28
+
29
+ def __init__(self,
30
+ img_mode='RGB',
31
+ channel_first=False,
32
+ ignore_orientation=False,
33
+ **kwargs):
34
+ self.img_mode = img_mode
35
+ self.channel_first = channel_first
36
+ self.ignore_orientation = ignore_orientation
37
+
38
+ def __call__(self, data):
39
+ img = data['image']
40
+ if six.PY2:
41
+ assert isinstance(img, str) and len(
42
+ img) > 0, "invalid input 'img' in DecodeImage"
43
+ else:
44
+ assert isinstance(img, bytes) and len(
45
+ img) > 0, "invalid input 'img' in DecodeImage"
46
+ img = np.frombuffer(img, dtype='uint8')
47
+ if self.ignore_orientation:
48
+ img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION |
49
+ cv2.IMREAD_COLOR)
50
+ else:
51
+ img = cv2.imdecode(img, 1)
52
+ if img is None:
53
+ return None
54
+ if self.img_mode == 'GRAY':
55
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
56
+ elif self.img_mode == 'RGB':
57
+ assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
58
+ img.shape)
59
+ img = img[:, :, ::-1]
60
+
61
+ if self.channel_first:
62
+ img = img.transpose((2, 0, 1))
63
+
64
+ data['image'] = img
65
+ return data
66
+
67
+
68
+ class StandardizeImag:
69
+ """normalize image
70
+ Args:
71
+ mean (list): im - mean
72
+ std (list): im / std
73
+ is_scale (bool): whether need im / 255
74
+ norm_type (str): type in ['mean_std', 'none']
75
+ """
76
+
77
+ def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
78
+ self.mean = mean
79
+ self.std = std
80
+ self.is_scale = is_scale
81
+ self.norm_type = norm_type
82
+
83
+ def __call__(self, im, im_info):
84
+ """
85
+ Args:
86
+ im (np.ndarray): image (np.ndarray)
87
+ im_info (dict): info of image
88
+ Returns:
89
+ im (np.ndarray): processed image (np.ndarray)
90
+ im_info (dict): info of processed image
91
+ """
92
+ im = im.astype(np.float32, copy=False)
93
+ if self.is_scale:
94
+ scale = 1.0 / 255.0
95
+ im *= scale
96
+
97
+ if self.norm_type == 'mean_std':
98
+ mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
99
+ std = np.array(self.std)[np.newaxis, np.newaxis, :]
100
+ im -= mean
101
+ im /= std
102
+ return im, im_info
103
+
104
+
105
+ class NormalizeImage:
106
+ """ normalize image such as subtract mean, divide std
107
+ """
108
+
109
+ def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
110
+ if isinstance(scale, str):
111
+ scale = eval(scale)
112
+ self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
113
+ mean = mean if mean is not None else [0.485, 0.456, 0.406]
114
+ std = std if std is not None else [0.229, 0.224, 0.225]
115
+
116
+ shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
117
+ self.mean = np.array(mean).reshape(shape).astype('float32')
118
+ self.std = np.array(std).reshape(shape).astype('float32')
119
+
120
+ def __call__(self, data):
121
+ img = data['image']
122
+ from PIL import Image
123
+ if isinstance(img, Image.Image):
124
+ img = np.array(img)
125
+ assert isinstance(img,
126
+ np.ndarray), "invalid input 'img' in NormalizeImage"
127
+ data['image'] = (
128
+ img.astype('float32') * self.scale - self.mean) / self.std
129
+ return data
130
+
131
+
132
+ class ToCHWImage:
133
+ """ convert hwc image to chw image
134
+ """
135
+
136
+ def __init__(self, **kwargs):
137
+ pass
138
+
139
+ def __call__(self, data):
140
+ img = data['image']
141
+ from PIL import Image
142
+ if isinstance(img, Image.Image):
143
+ img = np.array(img)
144
+ data['image'] = img.transpose((2, 0, 1))
145
+ return data
146
+
147
+
148
+ class KeepKeys:
149
+ def __init__(self, keep_keys, **kwargs):
150
+ self.keep_keys = keep_keys
151
+
152
+ def __call__(self, data):
153
+ data_list = []
154
+ for key in self.keep_keys:
155
+ data_list.append(data[key])
156
+ return data_list
157
+
158
+
159
+ class Pad:
160
+ def __init__(self, size=None, size_div=32, **kwargs):
161
+ if size is not None and not isinstance(size, (int, list, tuple)):
162
+ raise TypeError("Type of target_size is invalid. Now is {}".format(
163
+ type(size)))
164
+ if isinstance(size, int):
165
+ size = [size, size]
166
+ self.size = size
167
+ self.size_div = size_div
168
+
169
+ def __call__(self, data):
170
+
171
+ img = data['image']
172
+ img_h, img_w = img.shape[0], img.shape[1]
173
+ if self.size:
174
+ resize_h2, resize_w2 = self.size
175
+ assert (
176
+ img_h < resize_h2 and img_w < resize_w2
177
+ ), '(h, w) of target size should be greater than (img_h, img_w)'
178
+ else:
179
+ resize_h2 = max(
180
+ int(math.ceil(img.shape[0] / self.size_div) * self.size_div),
181
+ self.size_div)
182
+ resize_w2 = max(
183
+ int(math.ceil(img.shape[1] / self.size_div) * self.size_div),
184
+ self.size_div)
185
+ img = cv2.copyMakeBorder(
186
+ img,
187
+ 0,
188
+ resize_h2 - img_h,
189
+ 0,
190
+ resize_w2 - img_w,
191
+ cv2.BORDER_CONSTANT,
192
+ value=0)
193
+ data['image'] = img
194
+ return data
195
+
196
+
197
+ class LinearResize:
198
+ """resize image by target_size and max_size
199
+ Args:
200
+ target_size (int): the target size of image
201
+ keep_ratio (bool): whether keep_ratio or not, default true
202
+ interp (int): method of resize
203
+ """
204
+
205
+ def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
206
+ if isinstance(target_size, int):
207
+ target_size = [target_size, target_size]
208
+ self.target_size = target_size
209
+ self.keep_ratio = keep_ratio
210
+ self.interp = interp
211
+
212
+ def __call__(self, im, im_info):
213
+ """
214
+ Args:
215
+ im (np.ndarray): image (np.ndarray)
216
+ im_info (dict): info of image
217
+ Returns:
218
+ im (np.ndarray): processed image (np.ndarray)
219
+ im_info (dict): info of processed image
220
+ """
221
+ assert len(self.target_size) == 2
222
+ assert self.target_size[0] > 0 and self.target_size[1] > 0
223
+ _im_channel = im.shape[2]
224
+ im_scale_y, im_scale_x = self.generate_scale(im)
225
+ im = cv2.resize(
226
+ im,
227
+ None,
228
+ None,
229
+ fx=im_scale_x,
230
+ fy=im_scale_y,
231
+ interpolation=self.interp)
232
+ im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
233
+ im_info['scale_factor'] = np.array(
234
+ [im_scale_y, im_scale_x]).astype('float32')
235
+ return im, im_info
236
+
237
+ def generate_scale(self, im):
238
+ """
239
+ Args:
240
+ im (np.ndarray): image (np.ndarray)
241
+ Returns:
242
+ im_scale_x: the resize ratio of X
243
+ im_scale_y: the resize ratio of Y
244
+ """
245
+ origin_shape = im.shape[:2]
246
+ _im_c = im.shape[2]
247
+ if self.keep_ratio:
248
+ im_size_min = np.min(origin_shape)
249
+ im_size_max = np.max(origin_shape)
250
+ target_size_min = np.min(self.target_size)
251
+ target_size_max = np.max(self.target_size)
252
+ im_scale = float(target_size_min) / float(im_size_min)
253
+ if np.round(im_scale * im_size_max) > target_size_max:
254
+ im_scale = float(target_size_max) / float(im_size_max)
255
+ im_scale_x = im_scale
256
+ im_scale_y = im_scale
257
+ else:
258
+ resize_h, resize_w = self.target_size
259
+ im_scale_y = resize_h / float(origin_shape[0])
260
+ im_scale_x = resize_w / float(origin_shape[1])
261
+ return im_scale_y, im_scale_x
262
+
263
+
264
+ class Resize:
265
+ def __init__(self, size=(640, 640), **kwargs):
266
+ self.size = size
267
+
268
+ def resize_image(self, img):
269
+ resize_h, resize_w = self.size
270
+ ori_h, ori_w = img.shape[:2] # (h, w, c)
271
+ ratio_h = float(resize_h) / ori_h
272
+ ratio_w = float(resize_w) / ori_w
273
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
274
+ return img, [ratio_h, ratio_w]
275
+
276
+ def __call__(self, data):
277
+ img = data['image']
278
+ if 'polys' in data:
279
+ text_polys = data['polys']
280
+
281
+ img_resize, [ratio_h, ratio_w] = self.resize_image(img)
282
+ if 'polys' in data:
283
+ new_boxes = []
284
+ for box in text_polys:
285
+ new_box = []
286
+ for cord in box:
287
+ new_box.append([cord[0] * ratio_w, cord[1] * ratio_h])
288
+ new_boxes.append(new_box)
289
+ data['polys'] = np.array(new_boxes, dtype=np.float32)
290
+ data['image'] = img_resize
291
+ return data
292
+
293
+
294
+ class DetResizeForTest:
295
+ def __init__(self, **kwargs):
296
+ super(DetResizeForTest, self).__init__()
297
+ self.resize_type = 0
298
+ self.keep_ratio = False
299
+ if 'image_shape' in kwargs:
300
+ self.image_shape = kwargs['image_shape']
301
+ self.resize_type = 1
302
+ if 'keep_ratio' in kwargs:
303
+ self.keep_ratio = kwargs['keep_ratio']
304
+ elif 'limit_side_len' in kwargs:
305
+ self.limit_side_len = kwargs['limit_side_len']
306
+ self.limit_type = kwargs.get('limit_type', 'min')
307
+ elif 'resize_long' in kwargs:
308
+ self.resize_type = 2
309
+ self.resize_long = kwargs.get('resize_long', 960)
310
+ else:
311
+ self.limit_side_len = 736
312
+ self.limit_type = 'min'
313
+
314
+ def __call__(self, data):
315
+ img = data['image']
316
+ src_h, src_w, _ = img.shape
317
+ if sum([src_h, src_w]) < 64:
318
+ img = self.image_padding(img)
319
+
320
+ if self.resize_type == 0:
321
+ # img, shape = self.resize_image_type0(img)
322
+ img, [ratio_h, ratio_w] = self.resize_image_type0(img)
323
+ elif self.resize_type == 2:
324
+ img, [ratio_h, ratio_w] = self.resize_image_type2(img)
325
+ else:
326
+ # img, shape = self.resize_image_type1(img)
327
+ img, [ratio_h, ratio_w] = self.resize_image_type1(img)
328
+ data['image'] = img
329
+ data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
330
+ return data
331
+
332
+ def image_padding(self, im, value=0):
333
+ h, w, c = im.shape
334
+ im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
335
+ im_pad[:h, :w, :] = im
336
+ return im_pad
337
+
338
+ def resize_image_type1(self, img):
339
+ resize_h, resize_w = self.image_shape
340
+ ori_h, ori_w = img.shape[:2] # (h, w, c)
341
+ if self.keep_ratio is True:
342
+ resize_w = ori_w * resize_h / ori_h
343
+ N = math.ceil(resize_w / 32)
344
+ resize_w = N * 32
345
+ ratio_h = float(resize_h) / ori_h
346
+ ratio_w = float(resize_w) / ori_w
347
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
348
+ # return img, np.array([ori_h, ori_w])
349
+ return img, [ratio_h, ratio_w]
350
+
351
+ def resize_image_type0(self, img):
352
+ """
353
+ resize image to a size multiple of 32 which is required by the network
354
+ args:
355
+ img(array): array with shape [h, w, c]
356
+ return(tuple):
357
+ img, (ratio_h, ratio_w)
358
+ """
359
+ limit_side_len = self.limit_side_len
360
+ h, w, c = img.shape
361
+
362
+ # limit the max side
363
+ if self.limit_type == 'max':
364
+ if max(h, w) > limit_side_len:
365
+ if h > w:
366
+ ratio = float(limit_side_len) / h
367
+ else:
368
+ ratio = float(limit_side_len) / w
369
+ else:
370
+ ratio = 1.
371
+ elif self.limit_type == 'min':
372
+ if min(h, w) < limit_side_len:
373
+ if h < w:
374
+ ratio = float(limit_side_len) / h
375
+ else:
376
+ ratio = float(limit_side_len) / w
377
+ else:
378
+ ratio = 1.
379
+ elif self.limit_type == 'resize_long':
380
+ ratio = float(limit_side_len) / max(h, w)
381
+ else:
382
+ raise Exception('not support limit type, image ')
383
+ resize_h = int(h * ratio)
384
+ resize_w = int(w * ratio)
385
+
386
+ resize_h = max(int(round(resize_h / 32) * 32), 32)
387
+ resize_w = max(int(round(resize_w / 32) * 32), 32)
388
+
389
+ try:
390
+ if int(resize_w) <= 0 or int(resize_h) <= 0:
391
+ return None, (None, None)
392
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
393
+ except BaseException:
394
+ logging.exception("{} {} {}".format(img.shape, resize_w, resize_h))
395
+ sys.exit(0)
396
+ ratio_h = resize_h / float(h)
397
+ ratio_w = resize_w / float(w)
398
+ return img, [ratio_h, ratio_w]
399
+
400
+ def resize_image_type2(self, img):
401
+ h, w, _ = img.shape
402
+
403
+ resize_w = w
404
+ resize_h = h
405
+
406
+ if resize_h > resize_w:
407
+ ratio = float(self.resize_long) / resize_h
408
+ else:
409
+ ratio = float(self.resize_long) / resize_w
410
+
411
+ resize_h = int(resize_h * ratio)
412
+ resize_w = int(resize_w * ratio)
413
+
414
+ max_stride = 128
415
+ resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
416
+ resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
417
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
418
+ ratio_h = resize_h / float(h)
419
+ ratio_w = resize_w / float(w)
420
+
421
+ return img, [ratio_h, ratio_w]
422
+
423
+
424
+ class E2EResizeForTest:
425
+ def __init__(self, **kwargs):
426
+ super(E2EResizeForTest, self).__init__()
427
+ self.max_side_len = kwargs['max_side_len']
428
+ self.valid_set = kwargs['valid_set']
429
+
430
+ def __call__(self, data):
431
+ img = data['image']
432
+ src_h, src_w, _ = img.shape
433
+ if self.valid_set == 'totaltext':
434
+ im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext(
435
+ img, max_side_len=self.max_side_len)
436
+ else:
437
+ im_resized, (ratio_h, ratio_w) = self.resize_image(
438
+ img, max_side_len=self.max_side_len)
439
+ data['image'] = im_resized
440
+ data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
441
+ return data
442
+
443
+ def resize_image_for_totaltext(self, im, max_side_len=512):
444
+ h, w, _ = im.shape
445
+ resize_w = w
446
+ resize_h = h
447
+ ratio = 1.25
448
+ if h * ratio > max_side_len:
449
+ ratio = float(max_side_len) / resize_h
450
+ resize_h = int(resize_h * ratio)
451
+ resize_w = int(resize_w * ratio)
452
+
453
+ max_stride = 128
454
+ resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
455
+ resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
456
+ im = cv2.resize(im, (int(resize_w), int(resize_h)))
457
+ ratio_h = resize_h / float(h)
458
+ ratio_w = resize_w / float(w)
459
+ return im, (ratio_h, ratio_w)
460
+
461
+ def resize_image(self, im, max_side_len=512):
462
+ """
463
+ resize image to a size multiple of max_stride which is required by the network
464
+ :param im: the resized image
465
+ :param max_side_len: limit of max image size to avoid out of memory in gpu
466
+ :return: the resized image and the resize ratio
467
+ """
468
+ h, w, _ = im.shape
469
+
470
+ resize_w = w
471
+ resize_h = h
472
+
473
+ # Fix the longer side
474
+ if resize_h > resize_w:
475
+ ratio = float(max_side_len) / resize_h
476
+ else:
477
+ ratio = float(max_side_len) / resize_w
478
+
479
+ resize_h = int(resize_h * ratio)
480
+ resize_w = int(resize_w * ratio)
481
+
482
+ max_stride = 128
483
+ resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
484
+ resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
485
+ im = cv2.resize(im, (int(resize_w), int(resize_h)))
486
+ ratio_h = resize_h / float(h)
487
+ ratio_w = resize_w / float(w)
488
+
489
+ return im, (ratio_h, ratio_w)
490
+
491
+
492
+ class KieResize:
493
+ def __init__(self, **kwargs):
494
+ super(KieResize, self).__init__()
495
+ self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[
496
+ 'img_scale'][1]
497
+
498
+ def __call__(self, data):
499
+ img = data['image']
500
+ points = data['points']
501
+ src_h, src_w, _ = img.shape
502
+ im_resized, scale_factor, [ratio_h, ratio_w
503
+ ], [new_h, new_w] = self.resize_image(img)
504
+ resize_points = self.resize_boxes(img, points, scale_factor)
505
+ data['ori_image'] = img
506
+ data['ori_boxes'] = points
507
+ data['points'] = resize_points
508
+ data['image'] = im_resized
509
+ data['shape'] = np.array([new_h, new_w])
510
+ return data
511
+
512
+ def resize_image(self, img):
513
+ norm_img = np.zeros([1024, 1024, 3], dtype='float32')
514
+ scale = [512, 1024]
515
+ h, w = img.shape[:2]
516
+ max_long_edge = max(scale)
517
+ max_short_edge = min(scale)
518
+ scale_factor = min(max_long_edge / max(h, w),
519
+ max_short_edge / min(h, w))
520
+ resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float(
521
+ scale_factor) + 0.5)
522
+ max_stride = 32
523
+ resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
524
+ resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
525
+ im = cv2.resize(img, (resize_w, resize_h))
526
+ new_h, new_w = im.shape[:2]
527
+ w_scale = new_w / w
528
+ h_scale = new_h / h
529
+ scale_factor = np.array(
530
+ [w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
531
+ norm_img[:new_h, :new_w, :] = im
532
+ return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w]
533
+
534
+ def resize_boxes(self, im, points, scale_factor):
535
+ points = points * scale_factor
536
+ img_shape = im.shape[:2]
537
+ points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1])
538
+ points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0])
539
+ return points
540
+
541
+
542
+ class SRResize:
543
+ def __init__(self,
544
+ imgH=32,
545
+ imgW=128,
546
+ down_sample_scale=4,
547
+ keep_ratio=False,
548
+ min_ratio=1,
549
+ mask=False,
550
+ infer_mode=False,
551
+ **kwargs):
552
+ self.imgH = imgH
553
+ self.imgW = imgW
554
+ self.keep_ratio = keep_ratio
555
+ self.min_ratio = min_ratio
556
+ self.down_sample_scale = down_sample_scale
557
+ self.mask = mask
558
+ self.infer_mode = infer_mode
559
+
560
+ def __call__(self, data):
561
+ imgH = self.imgH
562
+ imgW = self.imgW
563
+ images_lr = data["image_lr"]
564
+ transform2 = ResizeNormalize(
565
+ (imgW // self.down_sample_scale, imgH // self.down_sample_scale))
566
+ images_lr = transform2(images_lr)
567
+ data["img_lr"] = images_lr
568
+ if self.infer_mode:
569
+ return data
570
+
571
+ images_HR = data["image_hr"]
572
+ _label_strs = data["label"]
573
+ transform = ResizeNormalize((imgW, imgH))
574
+ images_HR = transform(images_HR)
575
+ data["img_hr"] = images_HR
576
+ return data
577
+
578
+
579
+ class ResizeNormalize:
580
+ def __init__(self, size, interpolation=Image.BICUBIC):
581
+ self.size = size
582
+ self.interpolation = interpolation
583
+
584
+ def __call__(self, img):
585
+ img = img.resize(self.size, self.interpolation)
586
+ img_numpy = np.array(img).astype("float32")
587
+ img_numpy = img_numpy.transpose((2, 0, 1)) / 255
588
+ return img_numpy
589
+
590
+
591
+ class GrayImageChannelFormat:
592
+ """
593
+ format gray scale image's channel: (3,h,w) -> (1,h,w)
594
+ Args:
595
+ inverse: inverse gray image
596
+ """
597
+
598
+ def __init__(self, inverse=False, **kwargs):
599
+ self.inverse = inverse
600
+
601
+ def __call__(self, data):
602
+ img = data['image']
603
+ img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
604
+ img_expanded = np.expand_dims(img_single_channel, 0)
605
+
606
+ if self.inverse:
607
+ data['image'] = np.abs(img_expanded - 1)
608
+ else:
609
+ data['image'] = img_expanded
610
+
611
+ data['src_image'] = img
612
+ return data
613
+
614
+
615
+ class Permute:
616
+ """permute image
617
+ Args:
618
+ to_bgr (bool): whether convert RGB to BGR
619
+ channel_first (bool): whether convert HWC to CHW
620
+ """
621
+
622
+ def __init__(self, ):
623
+ super(Permute, self).__init__()
624
+
625
+ def __call__(self, im, im_info):
626
+ """
627
+ Args:
628
+ im (np.ndarray): image (np.ndarray)
629
+ im_info (dict): info of image
630
+ Returns:
631
+ im (np.ndarray): processed image (np.ndarray)
632
+ im_info (dict): info of processed image
633
+ """
634
+ im = im.transpose((2, 0, 1)).copy()
635
+ return im, im_info
636
+
637
+
638
+ class PadStride:
639
+ """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
640
+ Args:
641
+ stride (bool): model with FPN need image shape % stride == 0
642
+ """
643
+
644
+ def __init__(self, stride=0):
645
+ self.coarsest_stride = stride
646
+
647
+ def __call__(self, im, im_info):
648
+ """
649
+ Args:
650
+ im (np.ndarray): image (np.ndarray)
651
+ im_info (dict): info of image
652
+ Returns:
653
+ im (np.ndarray): processed image (np.ndarray)
654
+ im_info (dict): info of processed image
655
+ """
656
+ coarsest_stride = self.coarsest_stride
657
+ if coarsest_stride <= 0:
658
+ return im, im_info
659
+ im_c, im_h, im_w = im.shape
660
+ pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
661
+ pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
662
+ padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
663
+ padding_im[:, :im_h, :im_w] = im
664
+ return padding_im, im_info
665
+
666
+
667
+ def decode_image(im_file, im_info):
668
+ """read rgb image
669
+ Args:
670
+ im_file (str|np.ndarray): input can be image path or np.ndarray
671
+ im_info (dict): info of image
672
+ Returns:
673
+ im (np.ndarray): processed image (np.ndarray)
674
+ im_info (dict): info of processed image
675
+ """
676
+ if isinstance(im_file, str):
677
+ with open(im_file, 'rb') as f:
678
+ im_read = f.read()
679
+ data = np.frombuffer(im_read, dtype='uint8')
680
+ im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
681
+ im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
682
+ else:
683
+ im = im_file
684
+ im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
685
+ im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
686
+ return im, im_info
687
+
688
+
689
+ def preprocess(im, preprocess_ops):
690
+ # process image by preprocess_ops
691
+ im_info = {
692
+ 'scale_factor': np.array(
693
+ [1., 1.], dtype=np.float32),
694
+ 'im_shape': None,
695
+ }
696
+ im, im_info = decode_image(im, im_info)
697
+ for operator in preprocess_ops:
698
+ im, im_info = operator(im, im_info)
699
+ return im, im_info
700
+
701
+
702
+ def nms(bboxes, scores, iou_thresh):
703
+ import numpy as np
704
+ x1 = bboxes[:, 0]
705
+ y1 = bboxes[:, 1]
706
+ x2 = bboxes[:, 2]
707
+ y2 = bboxes[:, 3]
708
+ areas = (y2 - y1) * (x2 - x1)
709
+
710
+ indices = []
711
+ index = scores.argsort()[::-1]
712
+ while index.size > 0:
713
+ i = index[0]
714
+ indices.append(i)
715
+ x11 = np.maximum(x1[i], x1[index[1:]])
716
+ y11 = np.maximum(y1[i], y1[index[1:]])
717
+ x22 = np.minimum(x2[i], x2[index[1:]])
718
+ y22 = np.minimum(y2[i], y2[index[1:]])
719
+ w = np.maximum(0, x22 - x11 + 1)
720
+ h = np.maximum(0, y22 - y11 + 1)
721
+ overlaps = w * h
722
+ ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
723
+ idx = np.where(ious <= iou_thresh)[0]
724
+ index = index[idx + 1]
725
+ return indices