yomitoku 0.7.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. {yomitoku-0.7.0 → yomitoku-0.7.1}/PKG-INFO +1 -1
  2. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/cli/main.py +2 -1
  3. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/document_analyzer.py +0 -1
  4. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/export_csv.py +2 -0
  5. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/export_html.py +2 -0
  6. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/export_json.py +2 -0
  7. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/export_markdown.py +2 -0
  8. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_cli.py +1 -0
  9. yomitoku-0.7.1/tests/test_document_analyzer.py +596 -0
  10. {yomitoku-0.7.0 → yomitoku-0.7.1}/uv.lock +1 -1
  11. yomitoku-0.7.0/tests/test_document_analyzer.py +0 -88
  12. {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/release-drafter.yml +0 -0
  13. {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/workflows/build-and-publish-docs.yaml +0 -0
  14. {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/workflows/build-and-publish.yml +0 -0
  15. {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/workflows/create-release.yml +0 -0
  16. {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/workflows/lint-and-test.yml +0 -0
  17. {yomitoku-0.7.0 → yomitoku-0.7.1}/.gitignore +0 -0
  18. {yomitoku-0.7.0 → yomitoku-0.7.1}/.pre-commit-config.yaml +0 -0
  19. {yomitoku-0.7.0 → yomitoku-0.7.1}/.python-version +0 -0
  20. {yomitoku-0.7.0 → yomitoku-0.7.1}/README.md +0 -0
  21. {yomitoku-0.7.0 → yomitoku-0.7.1}/README_EN.md +0 -0
  22. {yomitoku-0.7.0 → yomitoku-0.7.1}/configs/layout_parser_rtdetrv2.yaml +0 -0
  23. {yomitoku-0.7.0 → yomitoku-0.7.1}/configs/table_structure_recognitizer.yaml +0 -0
  24. {yomitoku-0.7.0 → yomitoku-0.7.1}/configs/text_detector.yaml +0 -0
  25. {yomitoku-0.7.0 → yomitoku-0.7.1}/configs/text_recognizer.yaml +0 -0
  26. {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/sample.pdf +0 -0
  27. {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/setting_document_anaysis.py +0 -0
  28. {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/simple_document_analysis.py +0 -0
  29. {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/simple_layout.py +0 -0
  30. {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/simple_ocr.py +0 -0
  31. {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/text_detector.yaml +0 -0
  32. {yomitoku-0.7.0 → yomitoku-0.7.1}/dockerfile +0 -0
  33. {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/assets/logo.svg +0 -0
  34. {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/index.en.md +0 -0
  35. {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/index.ja.md +0 -0
  36. {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/installation.en.md +0 -0
  37. {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/installation.ja.md +0 -0
  38. {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/usage.en.md +0 -0
  39. {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/usage.ja.md +0 -0
  40. {yomitoku-0.7.0 → yomitoku-0.7.1}/gallery.md +0 -0
  41. {yomitoku-0.7.0 → yomitoku-0.7.1}/mkdocs.yml +0 -0
  42. {yomitoku-0.7.0 → yomitoku-0.7.1}/pyproject.toml +0 -0
  43. {yomitoku-0.7.0 → yomitoku-0.7.1}/pytest.ini +0 -0
  44. {yomitoku-0.7.0 → yomitoku-0.7.1}/scripts/register_hugging_face_hub.py +0 -0
  45. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/__init__.py +0 -0
  46. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/base.py +0 -0
  47. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/cli/__init__.py +0 -0
  48. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/__init__.py +0 -0
  49. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_layout_parser_rtdtrv2.py +0 -0
  50. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py +0 -0
  51. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_text_detector_dbnet.py +0 -0
  52. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_text_recognizer_parseq.py +0 -0
  53. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_text_recognizer_parseq_small.py +0 -0
  54. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/constants.py +0 -0
  55. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/data/__init__.py +0 -0
  56. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/data/dataset.py +0 -0
  57. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/data/functions.py +0 -0
  58. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/__init__.py +0 -0
  59. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/layout_analyzer.py +0 -0
  60. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/layout_parser.py +0 -0
  61. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/__init__.py +0 -0
  62. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/dbnet_plus.py +0 -0
  63. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/__init__.py +0 -0
  64. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/activate.py +0 -0
  65. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/dbnet_feature_attention.py +0 -0
  66. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/parseq_transformer.py +0 -0
  67. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/rtdetr_backbone.py +0 -0
  68. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/rtdetr_hybrid_encoder.py +0 -0
  69. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/rtdetrv2_decoder.py +0 -0
  70. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/parseq.py +0 -0
  71. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/rtdetr.py +0 -0
  72. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/ocr.py +0 -0
  73. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/onnx/.gitkeep +0 -0
  74. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/postprocessor/__init__.py +0 -0
  75. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/postprocessor/dbnet_postporcessor.py +0 -0
  76. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/postprocessor/parseq_tokenizer.py +0 -0
  77. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/postprocessor/rtdetr_postprocessor.py +0 -0
  78. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/reading_order.py +0 -0
  79. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/resource/MPLUS1p-Medium.ttf +0 -0
  80. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/resource/charset.txt +0 -0
  81. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/table_structure_recognizer.py +0 -0
  82. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/text_detector.py +0 -0
  83. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/text_recognizer.py +0 -0
  84. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/__init__.py +0 -0
  85. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/graph.py +0 -0
  86. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/logger.py +0 -0
  87. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/misc.py +0 -0
  88. {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/visualizer.py +0 -0
  89. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/demo.jpg +0 -0
  90. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery1.jpg +0 -0
  91. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery2.jpg +0 -0
  92. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery3.jpg +0 -0
  93. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery4.jpg +0 -0
  94. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery5.jpg +0 -0
  95. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery6.jpg +0 -0
  96. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/logo/horizontal.png +0 -0
  97. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/demo_html.png +0 -0
  98. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_demo_p1_figure_0.png +0 -0
  99. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_0.png +0 -0
  100. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_1.png +0 -0
  101. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_10.png +0 -0
  102. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_2.png +0 -0
  103. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_3.png +0 -0
  104. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_4.png +0 -0
  105. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_5.png +0 -0
  106. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_6.png +0 -0
  107. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_7.png +0 -0
  108. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_8.png +0 -0
  109. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_9.png +0 -0
  110. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery3_p1_figure_0.png +0 -0
  111. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery3_p1_figure_1.png +0 -0
  112. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery5_p1_figure_0.png +0 -0
  113. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery5_p1_figure_1.png +0 -0
  114. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery6_p1_figure_0.png +0 -0
  115. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery6_p1_figure_1.png +0 -0
  116. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_demo_p1.html +0 -0
  117. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_demo_p1.md +0 -0
  118. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_demo_p1_layout.jpg +0 -0
  119. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_demo_p1_ocr.jpg +0 -0
  120. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery1_p1.html +0 -0
  121. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery1_p1.md +0 -0
  122. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery1_p1_layout.jpg +0 -0
  123. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery1_p1_ocr.jpg +0 -0
  124. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery2_p1.html +0 -0
  125. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery2_p1.md +0 -0
  126. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery2_p1_layout.jpg +0 -0
  127. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery2_p1_ocr.jpg +0 -0
  128. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery3_p1.html +0 -0
  129. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery3_p1.md +0 -0
  130. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery3_p1_layout.jpg +0 -0
  131. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery3_p1_ocr.jpg +0 -0
  132. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery4_p1.html +0 -0
  133. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery4_p1.md +0 -0
  134. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery4_p1_layout.jpg +0 -0
  135. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery4_p1_ocr.jpg +0 -0
  136. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery5_p1.html +0 -0
  137. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery5_p1.md +0 -0
  138. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery5_p1_layout.jpg +0 -0
  139. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery5_p1_ocr.jpg +0 -0
  140. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery6_p1.html +0 -0
  141. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery6_p1.md +0 -0
  142. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery6_p1_layout.jpg +0 -0
  143. {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery6_p1_ocr.jpg +0 -0
  144. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/invalid.jpg +0 -0
  145. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/invalid.pdf +0 -0
  146. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/rgba.png +0 -0
  147. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/small.jpg +0 -0
  148. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/subdir/test.jpg +0 -0
  149. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.bmp +0 -0
  150. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.jpg +0 -0
  151. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.pdf +0 -0
  152. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.png +0 -0
  153. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.tiff +0 -0
  154. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.txt +0 -0
  155. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test_gray.jpg +0 -0
  156. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_base.py +0 -0
  157. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_data.py +0 -0
  158. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_export.py +0 -0
  159. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_layout_analyzer.py +0 -0
  160. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_ocr.py +0 -0
  161. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/yaml/layout_parser.yaml +0 -0
  162. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/yaml/table_structure_recognizer.yaml +0 -0
  163. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/yaml/text_detector.yaml +0 -0
  164. {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/yaml/text_recognizer.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
@@ -1,5 +1,6 @@
1
1
  import argparse
2
2
  import os
3
+ import torch
3
4
  from pathlib import Path
4
5
 
5
6
  import cv2
@@ -235,7 +236,7 @@ def main():
235
236
  if args.lite:
236
237
  configs["ocr"]["text_recognizer"]["model_name"] = "parseq-small"
237
238
 
238
- if args.device == "cpu":
239
+ if args.device == "cpu" or not torch.cuda.is_available():
239
240
  configs["ocr"]["text_detector"]["infer_onnx"] = True
240
241
 
241
242
  # Note: Text Detector以外はONNX推論よりもPyTorch推論の方が速いため、ONNX推論は行わない
@@ -127,7 +127,6 @@ def extract_words_within_element(pred_words, element):
127
127
  if len(contained_words) == 0:
128
128
  return None, None, check_list
129
129
 
130
- element_direction = "horizontal"
131
130
  word_direction = [word.direction for word in contained_words]
132
131
  cnt_horizontal = word_direction.count("horizontal")
133
132
  cnt_vertical = word_direction.count("vertical")
@@ -41,6 +41,8 @@ def save_figure(
41
41
  out_path,
42
42
  figure_dir="figures",
43
43
  ):
44
+ assert img is not None, "img is required for saving figures"
45
+
44
46
  for i, figure in enumerate(figures):
45
47
  x1, y1, x2, y2 = map(int, figure.box)
46
48
  figure_img = img[y1:y2, x1:x2, :]
@@ -110,6 +110,8 @@ def figure_to_html(
110
110
  figure_dir="figures",
111
111
  width=200,
112
112
  ):
113
+ assert img is not None, "img is required for saving figures"
114
+
113
115
  elements = []
114
116
  for i, figure in enumerate(figures):
115
117
  x1, y1, x2, y2 = map(int, figure.box)
@@ -21,6 +21,8 @@ def save_figure(
21
21
  out_path,
22
22
  figure_dir="figures",
23
23
  ):
24
+ assert img is not None, "img is required for saving figures"
25
+
24
26
  for i, figure in enumerate(figures):
25
27
  x1, y1, x2, y2 = map(int, figure.box)
26
28
  figure_img = img[y1:y2, x1:x2, :]
@@ -75,6 +75,8 @@ def figure_to_md(
75
75
  width=200,
76
76
  figure_dir="figures",
77
77
  ):
78
+ assert img is not None, "img is required for saving figures"
79
+
78
80
  elements = []
79
81
  for i, figure in enumerate(figures):
80
82
  x1, y1, x2, y2 = map(int, figure.box)
@@ -210,6 +210,7 @@ def test_validate_encoding():
210
210
  validate_encoding("utf-9")
211
211
 
212
212
  assert validate_encoding("utf-8")
213
+ assert validate_encoding("utf-8-sig")
213
214
  assert validate_encoding("shift-jis")
214
215
  assert validate_encoding("euc-jp")
215
216
  assert validate_encoding("cp932")
@@ -0,0 +1,596 @@
1
+ import pytest
2
+ import torch
3
+ from omegaconf import OmegaConf
4
+
5
+ from yomitoku import DocumentAnalyzer
6
+ from yomitoku.document_analyzer import (
7
+ ParagraphSchema,
8
+ FigureSchema,
9
+ DocumentAnalyzerSchema,
10
+ extract_paragraph_within_figure,
11
+ combine_flags,
12
+ judge_page_direction,
13
+ extract_words_within_element,
14
+ is_vertical,
15
+ is_noise,
16
+ recursive_update,
17
+ _extract_words_within_table,
18
+ _calc_overlap_words_on_lines,
19
+ _correct_vertical_word_boxes,
20
+ _correct_horizontal_word_boxes,
21
+ _split_text_across_cells,
22
+ )
23
+
24
+
25
+ from yomitoku.text_detector import TextDetectorSchema
26
+
27
+ from yomitoku.table_structure_recognizer import (
28
+ TableStructureRecognizerSchema,
29
+ TableLineSchema,
30
+ TableCellSchema,
31
+ )
32
+
33
+ from yomitoku.ocr import (
34
+ WordPrediction,
35
+ )
36
+
37
+
38
+ def test_initialize():
39
+ device = "cpu"
40
+ visualize = True
41
+ config = {
42
+ "ocr": {
43
+ "text_detector": {
44
+ "path_cfg": "tests/yaml/text_detector.yaml",
45
+ },
46
+ "text_recognizer": {
47
+ "path_cfg": "tests/yaml/text_recognizer.yaml",
48
+ },
49
+ },
50
+ "layout_analyzer": {
51
+ "layout_parser": {
52
+ "path_cfg": "tests/yaml/layout_parser.yaml",
53
+ },
54
+ "table_structure_recognizer": {
55
+ "path_cfg": "tests/yaml/table_structure_recognizer.yaml",
56
+ },
57
+ },
58
+ }
59
+
60
+ analyzer = DocumentAnalyzer(configs=config, device=device, visualize=visualize)
61
+
62
+ # サブモジュールのパラメータが更新されているか確認
63
+ assert analyzer.text_detector.device == torch.device(device)
64
+ assert analyzer.text_recognizer.device == torch.device(device)
65
+ assert analyzer.layout.layout_parser.device == torch.device(device)
66
+ assert analyzer.layout.table_structure_recognizer.device == torch.device(device)
67
+
68
+ assert analyzer.text_detector.visualize == visualize
69
+ assert analyzer.text_recognizer.visualize == visualize
70
+ assert analyzer.layout.layout_parser.visualize == visualize
71
+ assert analyzer.layout.table_structure_recognizer.visualize == visualize
72
+
73
+ text_detector_cfg = OmegaConf.load(config["ocr"]["text_detector"]["path_cfg"])
74
+ text_recognizer_cfg = OmegaConf.load(config["ocr"]["text_recognizer"]["path_cfg"])
75
+ layout_parser_cfg = OmegaConf.load(
76
+ config["layout_analyzer"]["layout_parser"]["path_cfg"]
77
+ )
78
+ table_structure_recognizer_cfg = OmegaConf.load(
79
+ config["layout_analyzer"]["table_structure_recognizer"]["path_cfg"]
80
+ )
81
+
82
+ assert (
83
+ analyzer.text_detector.post_processor.thresh
84
+ == text_detector_cfg.post_process.thresh
85
+ )
86
+
87
+ assert (
88
+ analyzer.text_recognizer.model.refine_iters == text_recognizer_cfg.refine_iters
89
+ )
90
+
91
+ assert analyzer.layout.layout_parser.thresh_score == layout_parser_cfg.thresh_score
92
+
93
+ assert (
94
+ analyzer.layout.table_structure_recognizer.thresh_score
95
+ == table_structure_recognizer_cfg.thresh_score
96
+ )
97
+
98
+
99
+ def test_invalid_path():
100
+ config = {
101
+ "ocr": {
102
+ "text_detector": {
103
+ "path_cfg": "tests/yaml/dummy.yaml",
104
+ },
105
+ }
106
+ }
107
+
108
+ with pytest.raises(FileNotFoundError):
109
+ DocumentAnalyzer(
110
+ configs=config,
111
+ )
112
+
113
+
114
+ def test_invalid_config():
115
+ with pytest.raises(ValueError):
116
+ DocumentAnalyzer(
117
+ configs="invalid",
118
+ )
119
+
120
+
121
+ def test_extract_paragraph_within_figure():
122
+ paragraphs = [
123
+ {
124
+ "box": [0, 0, 2, 1],
125
+ "contents": "This is a test.",
126
+ "direction": "horizontal",
127
+ "order": 1,
128
+ "role": None,
129
+ },
130
+ {
131
+ "box": [0, 0, 1, 2],
132
+ "contents": "This is a test.",
133
+ "direction": "vertical",
134
+ "order": 1,
135
+ "role": None,
136
+ },
137
+ {
138
+ "box": [10, 10, 1, 2],
139
+ "contents": "This is a test.",
140
+ "direction": "horizontal",
141
+ "order": 1,
142
+ "role": None,
143
+ },
144
+ ]
145
+
146
+ figures = [
147
+ {
148
+ "box": [0, 0, 2, 2],
149
+ "order": 1,
150
+ "paragraphs": [],
151
+ "direction": None,
152
+ }
153
+ ]
154
+
155
+ paragraphs = [ParagraphSchema(**paragraph) for paragraph in paragraphs]
156
+ figures = [FigureSchema(**figure) for figure in figures]
157
+
158
+ figures, checklist = extract_paragraph_within_figure(paragraphs, figures)
159
+
160
+ assert checklist == [True, True, False]
161
+ assert len(figures[0].paragraphs) == 2
162
+
163
+
164
+ def test_combile_flags():
165
+ flags1 = [True, False, True]
166
+ flags2 = [False, False, True]
167
+
168
+ assert combine_flags(flags1, flags2) == [True, False, True]
169
+
170
+
171
+ def test_judge_page_direction():
172
+ paragraphs = [
173
+ {
174
+ "box": [0, 0, 2, 1],
175
+ "contents": "This is a test.",
176
+ "direction": "horizontal",
177
+ "order": 1,
178
+ "role": None,
179
+ },
180
+ {
181
+ "box": [0, 0, 1, 2],
182
+ "contents": "This is a test.",
183
+ "direction": "vertical",
184
+ "order": 1,
185
+ "role": None,
186
+ },
187
+ {
188
+ "box": [10, 10, 1, 2],
189
+ "contents": "This is a test.",
190
+ "direction": "horizontal",
191
+ "order": 1,
192
+ "role": None,
193
+ },
194
+ ]
195
+
196
+ paragraphs = [ParagraphSchema(**paragraph) for paragraph in paragraphs]
197
+ assert judge_page_direction(paragraphs) == "horizontal"
198
+
199
+ paragraphs = [
200
+ {
201
+ "box": [0, 0, 2, 1],
202
+ "contents": "This is a test.",
203
+ "direction": "horizontal",
204
+ "order": 1,
205
+ "role": None,
206
+ },
207
+ {
208
+ "box": [0, 0, 1, 2],
209
+ "contents": "This is a test.",
210
+ "direction": "vertical",
211
+ "order": 1,
212
+ "role": None,
213
+ },
214
+ {
215
+ "box": [10, 10, 2, 1],
216
+ "contents": "This is a test.",
217
+ "direction": "vertical",
218
+ "order": 1,
219
+ "role": None,
220
+ },
221
+ ]
222
+
223
+ paragraphs = [ParagraphSchema(**paragraph) for paragraph in paragraphs]
224
+ assert judge_page_direction(paragraphs) == "vertical"
225
+
226
+
227
+ def test_extract_words_within_element():
228
+ paragraph = {
229
+ "box": [0, 0, 1, 1],
230
+ "contents": "This is a test.",
231
+ "direction": "horizontal",
232
+ "order": 1,
233
+ "role": None,
234
+ }
235
+
236
+ element = ParagraphSchema(**paragraph)
237
+
238
+ words = [
239
+ {
240
+ "points": [[10, 10], [11, 10], [11, 11], [10, 11]],
241
+ "content": "This",
242
+ "direction": "horizontal",
243
+ "rec_score": 0.9,
244
+ "det_score": 0.9,
245
+ }
246
+ ]
247
+
248
+ words = [WordPrediction(**word) for word in words]
249
+
250
+ words, direction, checklist = extract_words_within_element(words, element)
251
+
252
+ assert words is None
253
+ assert direction is None
254
+ assert checklist == [False]
255
+
256
+ paragraph = {
257
+ "box": [0, 0, 5, 5],
258
+ "contents": "This is a test.",
259
+ "direction": "horizontal",
260
+ "order": 1,
261
+ "role": None,
262
+ }
263
+
264
+ element = ParagraphSchema(**paragraph)
265
+
266
+ words = [
267
+ {
268
+ "points": [[0, 0], [1, 0], [1, 1], [0, 1]],
269
+ "content": "Hello",
270
+ "direction": "horizontal",
271
+ "rec_score": 0.9,
272
+ "det_score": 0.9,
273
+ },
274
+ {
275
+ "points": [[0, 1], [1, 1], [1, 2], [0, 2]],
276
+ "content": "World",
277
+ "direction": "horizontal",
278
+ "rec_score": 0.9,
279
+ "det_score": 0.9,
280
+ },
281
+ ]
282
+
283
+ words = [WordPrediction(**word) for word in words]
284
+
285
+ words, direction, checklist = extract_words_within_element(words, element)
286
+
287
+ assert words == "Hello\nWorld"
288
+ assert direction == "horizontal"
289
+ assert checklist == [True, True]
290
+
291
+ paragraph = {
292
+ "box": [0, 0, 5, 5],
293
+ "contents": "This is a test.",
294
+ "direction": "horizontal",
295
+ "order": 1,
296
+ "role": None,
297
+ }
298
+
299
+ element = ParagraphSchema(**paragraph)
300
+
301
+ words = [
302
+ {
303
+ "points": [[2, 0], [3, 0], [3, 1], [2, 1]],
304
+ "content": "Hello",
305
+ "direction": "vertical",
306
+ "rec_score": 0.9,
307
+ "det_score": 0.9,
308
+ },
309
+ {
310
+ "points": [[0, 1], [1, 1], [1, 2], [0, 2]],
311
+ "content": "World",
312
+ "direction": "vertical",
313
+ "rec_score": 0.9,
314
+ "det_score": 0.9,
315
+ },
316
+ ]
317
+
318
+ words = [WordPrediction(**word) for word in words]
319
+
320
+ words, direction, checklist = extract_words_within_element(words, element)
321
+
322
+ assert words == "Hello\nWorld"
323
+ assert direction == "vertical"
324
+ assert checklist == [True, True]
325
+
326
+
327
+ def test_is_vertical():
328
+ quad = [[0, 0], [1, 0], [1, 1], [0, 1]]
329
+ assert not is_vertical(quad)
330
+ quad = [[0, 0], [1, 0], [1, 3], [0, 3]]
331
+ assert is_vertical(quad)
332
+
333
+
334
+ def test_is_noise():
335
+ quad = [[0, 0], [1, 0], [1, 1], [0, 1]]
336
+ assert is_noise(quad)
337
+
338
+ quad = [[0, 0], [20, 0], [20, 20], [0, 20]]
339
+ assert not is_noise(quad)
340
+
341
+
342
+ def test_recursive_update():
343
+ original = {"a": {"b": {"c": 1, "d": 2}}}
344
+ update = {"a": {"b": {"d": 3, "e": 4}}}
345
+
346
+ updated = recursive_update(original, update)
347
+
348
+ assert updated == {"a": {"b": {"c": 1, "d": 3, "e": 4}}}
349
+
350
+
351
+ def test_extract_words_within_table():
352
+ points = [
353
+ [[0, 0], [3, 0], [3, 1], [0, 1]],
354
+ [[3, 0], [5, 0], [5, 1], [3, 1]],
355
+ [[0, 1], [1, 1], [1, 4], [0, 4]],
356
+ [[3, 1], [3, 1], [4, 4], [4, 4]],
357
+ ]
358
+
359
+ scores = [0.9, 0.9, 0.9, 0.9]
360
+
361
+ words = TextDetectorSchema(points=points, scores=scores)
362
+
363
+ table = {
364
+ "box": [0, 0, 3, 3],
365
+ "n_row": 2,
366
+ "n_col": 2,
367
+ "rows": [],
368
+ "cols": [],
369
+ "cells": [],
370
+ "order": 0,
371
+ }
372
+
373
+ table = TableStructureRecognizerSchema(**table)
374
+ checklist = [False, False, False, False]
375
+ h_words, v_words, checklist = _extract_words_within_table(words, table, checklist)
376
+
377
+ assert len(h_words) == 1
378
+ assert len(v_words) == 1
379
+ assert checklist == [True, False, True, False]
380
+
381
+
382
+ def test_calc_overlap_words_on_lines():
383
+ lines = [
384
+ {
385
+ "box": [0, 0, 2, 1],
386
+ "score": 0.9,
387
+ },
388
+ {
389
+ "box": [0, 1, 1, 1],
390
+ "score": 0.9,
391
+ },
392
+ ]
393
+
394
+ lines = [TableLineSchema(**line) for line in lines]
395
+
396
+ words = [
397
+ {
398
+ "points": [[0, 0], [1, 0], [1, 1], [0, 1]],
399
+ },
400
+ {
401
+ "points": [[1, 0], [3, 0], [3, 1], [1, 1]],
402
+ },
403
+ ]
404
+
405
+ overrap_ratios = _calc_overlap_words_on_lines(lines, words)
406
+
407
+ assert overrap_ratios == [[1.0, 0.0], [0.5, 0.0]]
408
+
409
+
410
+ def test_correct_vertical_word_boxes():
411
+ words = [
412
+ {
413
+ "points": [[0, 0], [20, 0], [20, 100], [0, 100]],
414
+ "score": 0.9,
415
+ },
416
+ ]
417
+
418
+ cols = [TableLineSchema(box=[0, 0, 20, 100], score=0.9)]
419
+ rows = [
420
+ TableLineSchema(box=[0, 0, 20, 50], score=0.9),
421
+ TableLineSchema(box=[0, 50, 20, 100], score=0.9),
422
+ ]
423
+
424
+ cells = [
425
+ {
426
+ "col": 1,
427
+ "row": 1,
428
+ "col_span": 1,
429
+ "row_span": 1,
430
+ "box": [0, 0, 20, 50],
431
+ "contents": None,
432
+ },
433
+ {
434
+ "col": 1,
435
+ "row": 2,
436
+ "col_span": 1,
437
+ "row_span": 1,
438
+ "box": [0, 50, 20, 100],
439
+ "contents": None,
440
+ },
441
+ ]
442
+
443
+ cells = [TableCellSchema(**cell) for cell in cells]
444
+
445
+ table = {
446
+ "box": [0, 0, 100, 20],
447
+ "n_row": 2,
448
+ "n_col": 1,
449
+ "rows": rows,
450
+ "cols": cols,
451
+ "cells": cells,
452
+ "order": 0,
453
+ }
454
+
455
+ table = TableStructureRecognizerSchema(**table)
456
+
457
+ overrap_ratios = _calc_overlap_words_on_lines(cols, words)
458
+
459
+ points, scores = _correct_vertical_word_boxes(
460
+ overrap_ratios,
461
+ table,
462
+ words,
463
+ )
464
+
465
+ assert len(points) == 2
466
+ assert len(scores) == 2
467
+ assert points[0] == [[0, 0], [20, 0], [20, 50], [0, 50]]
468
+ assert points[1] == [[0, 50], [20, 50], [20, 100], [0, 100]]
469
+
470
+
471
+ def test_correct_horizontal_word_boxes():
472
+ words = [
473
+ {
474
+ "points": [[0, 0], [100, 0], [100, 20], [0, 20]],
475
+ "score": 0.9,
476
+ },
477
+ ]
478
+
479
+ cols = [
480
+ TableLineSchema(box=[0, 0, 50, 20], score=0.9),
481
+ TableLineSchema(box=[50, 0, 100, 20], score=0.9),
482
+ ]
483
+ rows = [
484
+ TableLineSchema(box=[0, 0, 100, 20], score=0.9),
485
+ ]
486
+
487
+ cells = [
488
+ {
489
+ "col": 1,
490
+ "row": 1,
491
+ "col_span": 1,
492
+ "row_span": 1,
493
+ "box": [0, 0, 50, 20],
494
+ "contents": None,
495
+ },
496
+ {
497
+ "col": 2,
498
+ "row": 1,
499
+ "col_span": 1,
500
+ "row_span": 1,
501
+ "box": [50, 0, 100, 20],
502
+ "contents": None,
503
+ },
504
+ ]
505
+
506
+ cells = [TableCellSchema(**cell) for cell in cells]
507
+
508
+ table = {
509
+ "box": [0, 0, 20, 100],
510
+ "n_row": 2,
511
+ "n_col": 1,
512
+ "rows": rows,
513
+ "cols": cols,
514
+ "cells": cells,
515
+ "order": 0,
516
+ }
517
+
518
+ table = TableStructureRecognizerSchema(**table)
519
+
520
+ overrap_ratios = _calc_overlap_words_on_lines(cols, words)
521
+
522
+ points, scores = _correct_horizontal_word_boxes(
523
+ overrap_ratios,
524
+ table,
525
+ words,
526
+ )
527
+
528
+ assert len(points) == 2
529
+ assert len(scores) == 2
530
+ assert points[0] == [[0, 0], [50, 0], [50, 20], [0, 20]]
531
+ assert points[1] == [[50, 0], [100, 0], [100, 20], [50, 20]]
532
+
533
+
534
+ def test_split_text_across_cells():
535
+ points = [
536
+ [[0, 0], [100, 0], [100, 20], [0, 20]],
537
+ ]
538
+
539
+ scores = [0.9]
540
+
541
+ words = TextDetectorSchema(points=points, scores=scores)
542
+
543
+ cols = [
544
+ TableLineSchema(box=[0, 0, 50, 20], score=0.9),
545
+ TableLineSchema(box=[50, 0, 100, 20], score=0.9),
546
+ ]
547
+ rows = [
548
+ TableLineSchema(box=[0, 0, 100, 20], score=0.9),
549
+ ]
550
+
551
+ cells = [
552
+ {
553
+ "col": 1,
554
+ "row": 1,
555
+ "col_span": 1,
556
+ "row_span": 1,
557
+ "box": [0, 0, 50, 20],
558
+ "contents": None,
559
+ },
560
+ {
561
+ "col": 2,
562
+ "row": 1,
563
+ "col_span": 1,
564
+ "row_span": 1,
565
+ "box": [50, 0, 100, 20],
566
+ "contents": None,
567
+ },
568
+ ]
569
+
570
+ cells = [TableCellSchema(**cell) for cell in cells]
571
+
572
+ table = {
573
+ "box": [0, 0, 100, 20],
574
+ "n_row": 2,
575
+ "n_col": 1,
576
+ "rows": rows,
577
+ "cols": cols,
578
+ "cells": cells,
579
+ "order": 0,
580
+ }
581
+
582
+ table = TableStructureRecognizerSchema(**table)
583
+
584
+ Layout = DocumentAnalyzerSchema(
585
+ paragraphs=[],
586
+ figures=[],
587
+ tables=[table],
588
+ words=[],
589
+ )
590
+
591
+ results = _split_text_across_cells(words, Layout)
592
+
593
+ assert len(results.points) == 2
594
+ assert len(results.scores) == 2
595
+ assert results.points[0] == [[0, 0], [50, 0], [50, 20], [0, 20]]
596
+ assert results.points[1] == [[50, 0], [100, 0], [100, 20], [50, 20]]
@@ -1578,7 +1578,7 @@ wheels = [
1578
1578
 
1579
1579
  [[package]]
1580
1580
  name = "yomitoku"
1581
- version = "0.7.0"
1581
+ version = "0.7.1"
1582
1582
  source = { editable = "." }
1583
1583
  dependencies = [
1584
1584
  { name = "huggingface-hub" },