yomitoku 0.5.2__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. {yomitoku-0.5.2 → yomitoku-0.5.3}/PKG-INFO +1 -1
  2. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/layers/activate.py +13 -0
  3. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/layers/rtdetr_backbone.py +28 -6
  4. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/layers/rtdetr_hybrid_encoder.py +31 -7
  5. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/layers/rtdetrv2_decoder.py +56 -18
  6. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/postprocessor/rtdetr_postprocessor.py +27 -5
  7. {yomitoku-0.5.2 → yomitoku-0.5.3}/uv.lock +1 -1
  8. {yomitoku-0.5.2 → yomitoku-0.5.3}/.github/release-drafter.yml +0 -0
  9. {yomitoku-0.5.2 → yomitoku-0.5.3}/.github/workflows/build-and-publish-docs.yaml +0 -0
  10. {yomitoku-0.5.2 → yomitoku-0.5.3}/.github/workflows/build-and-publish.yml +0 -0
  11. {yomitoku-0.5.2 → yomitoku-0.5.3}/.github/workflows/create-release.yml +0 -0
  12. {yomitoku-0.5.2 → yomitoku-0.5.3}/.github/workflows/lint-and-test.yml +0 -0
  13. {yomitoku-0.5.2 → yomitoku-0.5.3}/.gitignore +0 -0
  14. {yomitoku-0.5.2 → yomitoku-0.5.3}/.pre-commit-config.yaml +0 -0
  15. {yomitoku-0.5.2 → yomitoku-0.5.3}/.python-version +0 -0
  16. {yomitoku-0.5.2 → yomitoku-0.5.3}/README.md +0 -0
  17. {yomitoku-0.5.2 → yomitoku-0.5.3}/README_EN.md +0 -0
  18. {yomitoku-0.5.2 → yomitoku-0.5.3}/configs/layout_parser_rtdetrv2.yaml +0 -0
  19. {yomitoku-0.5.2 → yomitoku-0.5.3}/configs/table_structure_recognitizer.yaml +0 -0
  20. {yomitoku-0.5.2 → yomitoku-0.5.3}/configs/text_detector.yaml +0 -0
  21. {yomitoku-0.5.2 → yomitoku-0.5.3}/configs/text_recognizer.yaml +0 -0
  22. {yomitoku-0.5.2 → yomitoku-0.5.3}/demo/sample.pdf +0 -0
  23. {yomitoku-0.5.2 → yomitoku-0.5.3}/demo/setting_document_anaysis.py +0 -0
  24. {yomitoku-0.5.2 → yomitoku-0.5.3}/demo/simple_document_analysis.py +0 -0
  25. {yomitoku-0.5.2 → yomitoku-0.5.3}/demo/simple_layout.py +0 -0
  26. {yomitoku-0.5.2 → yomitoku-0.5.3}/demo/simple_ocr.py +0 -0
  27. {yomitoku-0.5.2 → yomitoku-0.5.3}/demo/text_detector.yaml +0 -0
  28. {yomitoku-0.5.2 → yomitoku-0.5.3}/dockerfile +0 -0
  29. {yomitoku-0.5.2 → yomitoku-0.5.3}/docs/assets/logo.svg +0 -0
  30. {yomitoku-0.5.2 → yomitoku-0.5.3}/docs/index.en.md +0 -0
  31. {yomitoku-0.5.2 → yomitoku-0.5.3}/docs/index.ja.md +0 -0
  32. {yomitoku-0.5.2 → yomitoku-0.5.3}/docs/installation.en.md +0 -0
  33. {yomitoku-0.5.2 → yomitoku-0.5.3}/docs/installation.ja.md +0 -0
  34. {yomitoku-0.5.2 → yomitoku-0.5.3}/docs/usage.en.md +0 -0
  35. {yomitoku-0.5.2 → yomitoku-0.5.3}/docs/usage.ja.md +0 -0
  36. {yomitoku-0.5.2 → yomitoku-0.5.3}/gallery.md +0 -0
  37. {yomitoku-0.5.2 → yomitoku-0.5.3}/mkdocs.yml +0 -0
  38. {yomitoku-0.5.2 → yomitoku-0.5.3}/pyproject.toml +0 -0
  39. {yomitoku-0.5.2 → yomitoku-0.5.3}/pytest.ini +0 -0
  40. {yomitoku-0.5.2 → yomitoku-0.5.3}/scripts/register_hugging_face_hub.py +0 -0
  41. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/__init__.py +0 -0
  42. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/base.py +0 -0
  43. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/cli/__init__.py +0 -0
  44. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/cli/main.py +0 -0
  45. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/configs/__init__.py +0 -0
  46. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/configs/cfg_layout_parser_rtdtrv2.py +0 -0
  47. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py +0 -0
  48. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/configs/cfg_text_detector_dbnet.py +0 -0
  49. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/configs/cfg_text_recognizer_parseq.py +0 -0
  50. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/constants.py +0 -0
  51. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/data/__init__.py +0 -0
  52. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/data/dataset.py +0 -0
  53. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/data/functions.py +0 -0
  54. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/document_analyzer.py +0 -0
  55. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/export/__init__.py +0 -0
  56. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/export/export_csv.py +0 -0
  57. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/export/export_html.py +0 -0
  58. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/export/export_json.py +0 -0
  59. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/export/export_markdown.py +0 -0
  60. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/layout_analyzer.py +0 -0
  61. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/layout_parser.py +0 -0
  62. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/__init__.py +0 -0
  63. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/dbnet_plus.py +0 -0
  64. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/layers/__init__.py +0 -0
  65. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/layers/dbnet_feature_attention.py +0 -0
  66. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/layers/parseq_transformer.py +0 -0
  67. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/parseq.py +0 -0
  68. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/models/rtdetr.py +0 -0
  69. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/ocr.py +0 -0
  70. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/postprocessor/__init__.py +0 -0
  71. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/postprocessor/dbnet_postporcessor.py +0 -0
  72. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/postprocessor/parseq_tokenizer.py +0 -0
  73. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/reading_order.py +0 -0
  74. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/resource/MPLUS1p-Medium.ttf +0 -0
  75. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/resource/charset.txt +0 -0
  76. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/table_structure_recognizer.py +0 -0
  77. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/text_detector.py +0 -0
  78. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/text_recognizer.py +0 -0
  79. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/utils/__init__.py +0 -0
  80. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/utils/graph.py +0 -0
  81. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/utils/logger.py +0 -0
  82. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/utils/misc.py +0 -0
  83. {yomitoku-0.5.2 → yomitoku-0.5.3}/src/yomitoku/utils/visualizer.py +0 -0
  84. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/in/demo.jpg +0 -0
  85. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/in/gallery1.jpg +0 -0
  86. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/in/gallery2.jpg +0 -0
  87. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/in/gallery3.jpg +0 -0
  88. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/in/gallery4.jpg +0 -0
  89. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/in/gallery5.jpg +0 -0
  90. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/in/gallery6.jpg +0 -0
  91. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/logo/horizontal.png +0 -0
  92. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/demo_html.png +0 -0
  93. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_demo_p1_figure_0.png +0 -0
  94. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_0.png +0 -0
  95. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_1.png +0 -0
  96. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_10.png +0 -0
  97. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_2.png +0 -0
  98. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_3.png +0 -0
  99. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_4.png +0 -0
  100. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_5.png +0 -0
  101. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_6.png +0 -0
  102. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_7.png +0 -0
  103. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_8.png +0 -0
  104. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery1_p1_figure_9.png +0 -0
  105. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery3_p1_figure_0.png +0 -0
  106. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery3_p1_figure_1.png +0 -0
  107. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery5_p1_figure_0.png +0 -0
  108. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery5_p1_figure_1.png +0 -0
  109. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery6_p1_figure_0.png +0 -0
  110. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/figures/in_gallery6_p1_figure_1.png +0 -0
  111. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_demo_p1.html +0 -0
  112. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_demo_p1.md +0 -0
  113. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_demo_p1_layout.jpg +0 -0
  114. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_demo_p1_ocr.jpg +0 -0
  115. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery1_p1.html +0 -0
  116. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery1_p1.md +0 -0
  117. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery1_p1_layout.jpg +0 -0
  118. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery1_p1_ocr.jpg +0 -0
  119. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery2_p1.html +0 -0
  120. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery2_p1.md +0 -0
  121. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery2_p1_layout.jpg +0 -0
  122. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery2_p1_ocr.jpg +0 -0
  123. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery3_p1.html +0 -0
  124. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery3_p1.md +0 -0
  125. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery3_p1_layout.jpg +0 -0
  126. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery3_p1_ocr.jpg +0 -0
  127. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery4_p1.html +0 -0
  128. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery4_p1.md +0 -0
  129. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery4_p1_layout.jpg +0 -0
  130. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery4_p1_ocr.jpg +0 -0
  131. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery5_p1.html +0 -0
  132. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery5_p1.md +0 -0
  133. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery5_p1_layout.jpg +0 -0
  134. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery5_p1_ocr.jpg +0 -0
  135. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery6_p1.html +0 -0
  136. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery6_p1.md +0 -0
  137. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery6_p1_layout.jpg +0 -0
  138. {yomitoku-0.5.2 → yomitoku-0.5.3}/static/out/in_gallery6_p1_ocr.jpg +0 -0
  139. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/invalid.jpg +0 -0
  140. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/invalid.pdf +0 -0
  141. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/rgba.png +0 -0
  142. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/small.jpg +0 -0
  143. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/subdir/test.jpg +0 -0
  144. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/test.bmp +0 -0
  145. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/test.jpg +0 -0
  146. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/test.pdf +0 -0
  147. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/test.png +0 -0
  148. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/test.tiff +0 -0
  149. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/test.txt +0 -0
  150. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/data/test_gray.jpg +0 -0
  151. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/test_base.py +0 -0
  152. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/test_cli.py +0 -0
  153. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/test_data.py +0 -0
  154. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/test_document_analyzer.py +0 -0
  155. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/test_export.py +0 -0
  156. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/test_layout_analyzer.py +0 -0
  157. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/test_ocr.py +0 -0
  158. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/yaml/layout_parser.yaml +0 -0
  159. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/yaml/table_structure_recognizer.yaml +0 -0
  160. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/yaml/text_detector.yaml +0 -0
  161. {yomitoku-0.5.2 → yomitoku-0.5.3}/tests/yaml/text_recognizer.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: yomitoku
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
@@ -1,3 +1,16 @@
1
+ # Copyright(c) 2023 lyuwenyu
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
1
14
  import torch.nn as nn
2
15
 
3
16
 
@@ -1,4 +1,16 @@
1
- """Copyright(c) 2023 lyuwenyu. All Rights Reserved."""
1
+ # Copyright 2023 lyuwenyu
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
2
14
 
3
15
  from collections import OrderedDict
4
16
 
@@ -47,7 +59,9 @@ class ConvNormLayer(nn.Module):
47
59
  class BasicBlock(nn.Module):
48
60
  expansion = 1
49
61
 
50
- def __init__(self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"):
62
+ def __init__(
63
+ self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"
64
+ ):
51
65
  super().__init__()
52
66
 
53
67
  self.shortcut = shortcut
@@ -86,7 +100,9 @@ class BasicBlock(nn.Module):
86
100
  class BottleNeck(nn.Module):
87
101
  expansion = 4
88
102
 
89
- def __init__(self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"):
103
+ def __init__(
104
+ self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"
105
+ ):
90
106
  super().__init__()
91
107
 
92
108
  if variant == "a":
@@ -109,13 +125,17 @@ class BottleNeck(nn.Module):
109
125
  ("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
110
126
  (
111
127
  "conv",
112
- ConvNormLayer(ch_in, ch_out * self.expansion, 1, 1),
128
+ ConvNormLayer(
129
+ ch_in, ch_out * self.expansion, 1, 1
130
+ ),
113
131
  ),
114
132
  ]
115
133
  )
116
134
  )
117
135
  else:
118
- self.short = ConvNormLayer(ch_in, ch_out * self.expansion, 1, stride)
136
+ self.short = ConvNormLayer(
137
+ ch_in, ch_out * self.expansion, 1, stride
138
+ )
119
139
 
120
140
  self.act = nn.Identity() if act is None else get_activation(act)
121
141
 
@@ -136,7 +156,9 @@ class BottleNeck(nn.Module):
136
156
 
137
157
 
138
158
  class Blocks(nn.Module):
139
- def __init__(self, block, ch_in, ch_out, count, stage_num, act="relu", variant="b"):
159
+ def __init__(
160
+ self, block, ch_in, ch_out, count, stage_num, act="relu", variant="b"
161
+ ):
140
162
  super().__init__()
141
163
 
142
164
  self.blocks = nn.ModuleList()
@@ -1,4 +1,16 @@
1
- """Copyright(c) 2023 lyuwenyu. All Rights Reserved."""
1
+ # Copyright 2023 lyuwenyu
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
2
14
 
3
15
  import copy
4
16
  from collections import OrderedDict
@@ -240,7 +252,9 @@ class HybridEncoder(nn.Module):
240
252
  for in_channel in in_channels:
241
253
  if version == "v1":
242
254
  proj = nn.Sequential(
243
- nn.Conv2d(in_channel, hidden_dim, kernel_size=1, bias=False),
255
+ nn.Conv2d(
256
+ in_channel, hidden_dim, kernel_size=1, bias=False
257
+ ),
244
258
  nn.BatchNorm2d(hidden_dim),
245
259
  )
246
260
  elif version == "v2":
@@ -276,7 +290,9 @@ class HybridEncoder(nn.Module):
276
290
 
277
291
  self.encoder = nn.ModuleList(
278
292
  [
279
- TransformerEncoder(copy.deepcopy(encoder_layer), num_encoder_layers)
293
+ TransformerEncoder(
294
+ copy.deepcopy(encoder_layer), num_encoder_layers
295
+ )
280
296
  for _ in range(len(use_encoder_idx))
281
297
  ]
282
298
  )
@@ -331,7 +347,9 @@ class HybridEncoder(nn.Module):
331
347
  # self.register_buffer(f'pos_embed{idx}', pos_embed)
332
348
 
333
349
  @staticmethod
334
- def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
350
+ def build_2d_sincos_position_embedding(
351
+ w, h, embed_dim=256, temperature=10000.0
352
+ ):
335
353
  """ """
336
354
  grid_w = torch.arange(int(w), dtype=torch.float32)
337
355
  grid_h = torch.arange(int(h), dtype=torch.float32)
@@ -369,7 +387,9 @@ class HybridEncoder(nn.Module):
369
387
  src_flatten.device
370
388
  )
371
389
 
372
- memory: torch.Tensor = self.encoder[i](src_flatten, pos_embed=pos_embed)
390
+ memory: torch.Tensor = self.encoder[i](
391
+ src_flatten, pos_embed=pos_embed
392
+ )
373
393
  proj_feats[enc_ind] = (
374
394
  memory.permute(0, 2, 1)
375
395
  .reshape(-1, self.hidden_dim, h, w)
@@ -381,9 +401,13 @@ class HybridEncoder(nn.Module):
381
401
  for idx in range(len(self.in_channels) - 1, 0, -1):
382
402
  feat_heigh = inner_outs[0]
383
403
  feat_low = proj_feats[idx - 1]
384
- feat_heigh = self.lateral_convs[len(self.in_channels) - 1 - idx](feat_heigh)
404
+ feat_heigh = self.lateral_convs[len(self.in_channels) - 1 - idx](
405
+ feat_heigh
406
+ )
385
407
  inner_outs[0] = feat_heigh
386
- upsample_feat = F.interpolate(feat_heigh, scale_factor=2.0, mode="nearest")
408
+ upsample_feat = F.interpolate(
409
+ feat_heigh, scale_factor=2.0, mode="nearest"
410
+ )
387
411
  inner_out = self.fpn_blocks[len(self.in_channels) - 1 - idx](
388
412
  torch.concat([upsample_feat, feat_low], dim=1)
389
413
  )
@@ -1,4 +1,17 @@
1
- """Copyright(c) 2023 lyuwenyu. All Rights Reserved."""
1
+ # Scene Text Recognition Model Hub
2
+ # Copyright 2023 lyuwenyu
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # https://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
2
15
 
3
16
  import copy
4
17
  import functools
@@ -27,7 +40,9 @@ def inverse_sigmoid(x: torch.Tensor, eps: float = 1e-5) -> torch.Tensor:
27
40
 
28
41
 
29
42
  class MLP(nn.Module):
30
- def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act="relu"):
43
+ def __init__(
44
+ self, input_dim, hidden_dim, output_dim, num_layers, act="relu"
45
+ ):
31
46
  super().__init__()
32
47
  self.num_layers = num_layers
33
48
  h = [hidden_dim] * (num_layers - 1)
@@ -178,7 +193,9 @@ class MSDeformableAttention(nn.Module):
178
193
  elif reference_points.shape[-1] == 4:
179
194
  # reference_points [8, 480, None, 1, 4]
180
195
  # sampling_offsets [8, 480, 8, 12, 2]
181
- num_points_scale = self.num_points_scale.to(dtype=query.dtype).unsqueeze(-1)
196
+ num_points_scale = self.num_points_scale.to(
197
+ dtype=query.dtype
198
+ ).unsqueeze(-1)
182
199
  offset = (
183
200
  sampling_offsets
184
201
  * num_points_scale
@@ -313,7 +330,9 @@ def deformable_attention_core_func_v2(
313
330
  _, Len_q, _, _, _ = sampling_locations.shape
314
331
 
315
332
  split_shape = [h * w for h, w in value_spatial_shapes]
316
- value_list = value.permute(0, 2, 3, 1).flatten(0, 1).split(split_shape, dim=-1)
333
+ value_list = (
334
+ value.permute(0, 2, 3, 1).flatten(0, 1).split(split_shape, dim=-1)
335
+ )
317
336
 
318
337
  # sampling_offsets [8, 480, 8, 12, 2]
319
338
  if method == "default":
@@ -342,7 +361,8 @@ def deformable_attention_core_func_v2(
342
361
  elif method == "discrete":
343
362
  # n * m, seq, n, 2
344
363
  sampling_coord = (
345
- sampling_grid_l * torch.tensor([[w, h]], device=value.device) + 0.5
364
+ sampling_grid_l * torch.tensor([[w, h]], device=value.device)
365
+ + 0.5
346
366
  ).to(torch.int64)
347
367
 
348
368
  # FIX ME? for rectangle input
@@ -369,7 +389,9 @@ def deformable_attention_core_func_v2(
369
389
  attn_weights = attention_weights.permute(0, 2, 1, 3).reshape(
370
390
  bs * n_head, 1, Len_q, sum(num_points_list)
371
391
  )
372
- weighted_sample_locs = torch.concat(sampling_value_list, dim=-1) * attn_weights
392
+ weighted_sample_locs = (
393
+ torch.concat(sampling_value_list, dim=-1) * attn_weights
394
+ )
373
395
  output = weighted_sample_locs.sum(-1).reshape(bs, n_head * c, Len_q)
374
396
 
375
397
  return output.permute(0, 2, 1)
@@ -584,7 +606,9 @@ class RTDETRTransformerv2(nn.Module):
584
606
  [
585
607
  (
586
608
  "conv",
587
- nn.Conv2d(in_channels, self.hidden_dim, 1, bias=False),
609
+ nn.Conv2d(
610
+ in_channels, self.hidden_dim, 1, bias=False
611
+ ),
588
612
  ),
589
613
  (
590
614
  "norm",
@@ -665,9 +689,13 @@ class RTDETRTransformerv2(nn.Module):
665
689
  torch.arange(h), torch.arange(w), indexing="ij"
666
690
  )
667
691
  grid_xy = torch.stack([grid_x, grid_y], dim=-1)
668
- grid_xy = (grid_xy.unsqueeze(0) + 0.5) / torch.tensor([w, h], dtype=dtype)
692
+ grid_xy = (grid_xy.unsqueeze(0) + 0.5) / torch.tensor(
693
+ [w, h], dtype=dtype
694
+ )
669
695
  wh = torch.ones_like(grid_xy) * grid_size * (2.0**lvl)
670
- lvl_anchors = torch.concat([grid_xy, wh], dim=-1).reshape(-1, h * w, 4)
696
+ lvl_anchors = torch.concat([grid_xy, wh], dim=-1).reshape(
697
+ -1, h * w, 4
698
+ )
671
699
  anchors.append(lvl_anchors)
672
700
 
673
701
  anchors = torch.concat(anchors, dim=1).to(device)
@@ -701,18 +729,22 @@ class RTDETRTransformerv2(nn.Module):
701
729
  )
702
730
 
703
731
  enc_topk_bboxes_list, enc_topk_logits_list = [], []
704
- enc_topk_memory, enc_topk_logits, enc_topk_bbox_unact = self._select_topk(
705
- output_memory,
706
- enc_outputs_logits,
707
- enc_outputs_coord_unact,
708
- self.num_queries,
732
+ enc_topk_memory, enc_topk_logits, enc_topk_bbox_unact = (
733
+ self._select_topk(
734
+ output_memory,
735
+ enc_outputs_logits,
736
+ enc_outputs_coord_unact,
737
+ self.num_queries,
738
+ )
709
739
  )
710
740
 
711
741
  # if self.num_select_queries != self.num_queries:
712
742
  # raise NotImplementedError('')
713
743
 
714
744
  if self.learn_query_content:
715
- content = self.tgt_embed.weight.unsqueeze(0).tile([memory.shape[0], 1, 1])
745
+ content = self.tgt_embed.weight.unsqueeze(0).tile(
746
+ [memory.shape[0], 1, 1]
747
+ )
716
748
  else:
717
749
  content = enc_topk_memory.detach()
718
750
 
@@ -739,7 +771,9 @@ class RTDETRTransformerv2(nn.Module):
739
771
  topk: int,
740
772
  ):
741
773
  if self.query_select_method == "default":
742
- _, topk_ind = torch.topk(outputs_logits.max(-1).values, topk, dim=-1)
774
+ _, topk_ind = torch.topk(
775
+ outputs_logits.max(-1).values, topk, dim=-1
776
+ )
743
777
 
744
778
  elif self.query_select_method == "one2many":
745
779
  _, topk_ind = torch.topk(outputs_logits.flatten(1), topk, dim=-1)
@@ -752,12 +786,16 @@ class RTDETRTransformerv2(nn.Module):
752
786
 
753
787
  topk_coords = outputs_coords_unact.gather(
754
788
  dim=1,
755
- index=topk_ind.unsqueeze(-1).repeat(1, 1, outputs_coords_unact.shape[-1]),
789
+ index=topk_ind.unsqueeze(-1).repeat(
790
+ 1, 1, outputs_coords_unact.shape[-1]
791
+ ),
756
792
  )
757
793
 
758
794
  topk_logits = outputs_logits.gather(
759
795
  dim=1,
760
- index=topk_ind.unsqueeze(-1).repeat(1, 1, outputs_logits.shape[-1]),
796
+ index=topk_ind.unsqueeze(-1).repeat(
797
+ 1, 1, outputs_logits.shape[-1]
798
+ ),
761
799
  )
762
800
 
763
801
  topk_memory = memory.gather(
@@ -1,4 +1,17 @@
1
- """Copyright(c) 2023 lyuwenyu. All Rights Reserved."""
1
+ # Copyright 2023 lyuwenyu
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
2
15
 
3
16
  import torch
4
17
  import torch.nn as nn
@@ -41,12 +54,16 @@ class RTDETRPostProcessor(nn.Module):
41
54
  logits, boxes = outputs["pred_logits"], outputs["pred_boxes"]
42
55
  # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
43
56
 
44
- bbox_pred = torchvision.ops.box_convert(boxes, in_fmt="cxcywh", out_fmt="xyxy")
57
+ bbox_pred = torchvision.ops.box_convert(
58
+ boxes, in_fmt="cxcywh", out_fmt="xyxy"
59
+ )
45
60
  bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
46
61
 
47
62
  if self.use_focal_loss:
48
63
  scores = F.sigmoid(logits)
49
- scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1)
64
+ scores, index = torch.topk(
65
+ scores.flatten(1), self.num_top_queries, dim=-1
66
+ )
50
67
  # TODO for older tensorrt
51
68
  # labels = index % self.num_classes
52
69
  labels = mod(index, self.num_classes)
@@ -60,7 +77,9 @@ class RTDETRPostProcessor(nn.Module):
60
77
  scores = F.softmax(logits)[:, :, :-1]
61
78
  scores, labels = scores.max(dim=-1)
62
79
  if scores.shape[1] > self.num_top_queries:
63
- scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
80
+ scores, index = torch.topk(
81
+ scores, self.num_top_queries, dim=-1
82
+ )
64
83
  labels = torch.gather(labels, dim=1, index=index)
65
84
  boxes = torch.gather(
66
85
  boxes,
@@ -78,7 +97,10 @@ class RTDETRPostProcessor(nn.Module):
78
97
 
79
98
  labels = (
80
99
  torch.tensor(
81
- [mscoco_label2category[int(x.item())] for x in labels.flatten()]
100
+ [
101
+ mscoco_label2category[int(x.item())]
102
+ for x in labels.flatten()
103
+ ]
82
104
  )
83
105
  .to(boxes.device)
84
106
  .reshape(labels.shape)
@@ -1530,7 +1530,7 @@ wheels = [
1530
1530
 
1531
1531
  [[package]]
1532
1532
  name = "yomitoku"
1533
- version = "0.5.2"
1533
+ version = "0.5.3"
1534
1534
  source = { editable = "." }
1535
1535
  dependencies = [
1536
1536
  { name = "huggingface-hub" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes