yomitoku 0.9.0__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. {yomitoku-0.9.0 → yomitoku-0.9.1}/PKG-INFO +2 -1
  2. {yomitoku-0.9.0 → yomitoku-0.9.1}/README.md +1 -0
  3. {yomitoku-0.9.0 → yomitoku-0.9.1}/demo/simple_ocr.py +4 -1
  4. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/cli.en.md +15 -1
  5. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/cli.ja.md +15 -1
  6. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/cli/main.py +14 -5
  7. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/data/dataset.py +20 -10
  8. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/data/functions.py +19 -20
  9. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/document_analyzer.py +21 -6
  10. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/reading_order.py +38 -8
  11. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/utils/misc.py +49 -2
  12. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery4_p1.html +6 -6
  13. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery4_p1.md +9 -9
  14. yomitoku-0.9.1/static/out/in_gallery4_p1_layout.jpg +0 -0
  15. yomitoku-0.9.1/static/out/in_gallery4_p1_ocr.jpg +0 -0
  16. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/test_data.py +14 -31
  17. {yomitoku-0.9.0 → yomitoku-0.9.1}/uv.lock +700 -700
  18. yomitoku-0.9.0/static/out/in_gallery4_p1_layout.jpg +0 -0
  19. yomitoku-0.9.0/static/out/in_gallery4_p1_ocr.jpg +0 -0
  20. {yomitoku-0.9.0 → yomitoku-0.9.1}/.github/FUNDING.yml +0 -0
  21. {yomitoku-0.9.0 → yomitoku-0.9.1}/.github/release-drafter.yml +0 -0
  22. {yomitoku-0.9.0 → yomitoku-0.9.1}/.github/workflows/build-and-publish-docs.yaml +0 -0
  23. {yomitoku-0.9.0 → yomitoku-0.9.1}/.github/workflows/build-and-publish.yml +0 -0
  24. {yomitoku-0.9.0 → yomitoku-0.9.1}/.github/workflows/create-release.yml +0 -0
  25. {yomitoku-0.9.0 → yomitoku-0.9.1}/.github/workflows/lint-and-test.yml +0 -0
  26. {yomitoku-0.9.0 → yomitoku-0.9.1}/.gitignore +0 -0
  27. {yomitoku-0.9.0 → yomitoku-0.9.1}/.pre-commit-config.yaml +0 -0
  28. {yomitoku-0.9.0 → yomitoku-0.9.1}/.python-version +0 -0
  29. {yomitoku-0.9.0 → yomitoku-0.9.1}/README_EN.md +0 -0
  30. {yomitoku-0.9.0 → yomitoku-0.9.1}/configs/yomitoku-layout-parser-rtdtrv2-open-beta.yaml +0 -0
  31. {yomitoku-0.9.0 → yomitoku-0.9.1}/configs/yomitoku-table-structure-recognizer-rtdtrv2-open-beta.yaml +0 -0
  32. {yomitoku-0.9.0 → yomitoku-0.9.1}/configs/yomitoku-text-detector-dbnet-open-beta.yaml +0 -0
  33. {yomitoku-0.9.0 → yomitoku-0.9.1}/configs/yomitoku-text-recognizer-parseq-open-beta.yaml +0 -0
  34. {yomitoku-0.9.0 → yomitoku-0.9.1}/configs/yomitoku-text-recognizer-parseq-small-open-beta.yaml +0 -0
  35. {yomitoku-0.9.0 → yomitoku-0.9.1}/demo/sample.pdf +0 -0
  36. {yomitoku-0.9.0 → yomitoku-0.9.1}/demo/setting_document_anaysis.py +0 -0
  37. {yomitoku-0.9.0 → yomitoku-0.9.1}/demo/simple_document_analysis.py +0 -0
  38. {yomitoku-0.9.0 → yomitoku-0.9.1}/demo/simple_layout.py +0 -0
  39. {yomitoku-0.9.0 → yomitoku-0.9.1}/demo/text_detector.yaml +0 -0
  40. {yomitoku-0.9.0 → yomitoku-0.9.1}/dockerfile +0 -0
  41. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/assets/logo.svg +0 -0
  42. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/configuration.en.md +0 -0
  43. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/configuration.ja.md +0 -0
  44. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/index.en.md +0 -0
  45. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/index.ja.md +0 -0
  46. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/installation.en.md +0 -0
  47. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/installation.ja.md +0 -0
  48. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/mcp.en.md +0 -0
  49. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/mcp.ja.md +0 -0
  50. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/module.en.md +0 -0
  51. {yomitoku-0.9.0 → yomitoku-0.9.1}/docs/module.ja.md +0 -0
  52. {yomitoku-0.9.0 → yomitoku-0.9.1}/gallery.md +0 -0
  53. {yomitoku-0.9.0 → yomitoku-0.9.1}/mkdocs.yml +0 -0
  54. {yomitoku-0.9.0 → yomitoku-0.9.1}/pyproject.toml +0 -0
  55. {yomitoku-0.9.0 → yomitoku-0.9.1}/pytest.ini +0 -0
  56. {yomitoku-0.9.0 → yomitoku-0.9.1}/scripts/register_hugging_face_hub.py +0 -0
  57. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/__init__.py +0 -0
  58. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/base.py +0 -0
  59. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/cli/__init__.py +0 -0
  60. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/cli/mcp.py +0 -0
  61. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/__init__.py +0 -0
  62. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/cfg_layout_parser_rtdtrv2.py +0 -0
  63. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/cfg_layout_parser_rtdtrv2_v2.py +0 -0
  64. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py +0 -0
  65. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/cfg_text_detector_dbnet.py +0 -0
  66. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/cfg_text_detector_dbnet_v2.py +0 -0
  67. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/cfg_text_recognizer_parseq.py +0 -0
  68. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/cfg_text_recognizer_parseq_small.py +0 -0
  69. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/configs/cfg_text_recognizer_parseq_v2.py +0 -0
  70. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/constants.py +0 -0
  71. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/data/__init__.py +0 -0
  72. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/export/__init__.py +0 -0
  73. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/export/export_csv.py +0 -0
  74. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/export/export_html.py +0 -0
  75. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/export/export_json.py +0 -0
  76. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/export/export_markdown.py +0 -0
  77. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/layout_analyzer.py +0 -0
  78. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/layout_parser.py +0 -0
  79. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/__init__.py +0 -0
  80. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/dbnet_plus.py +0 -0
  81. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/layers/__init__.py +0 -0
  82. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/layers/activate.py +0 -0
  83. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/layers/dbnet_feature_attention.py +0 -0
  84. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/layers/parseq_transformer.py +0 -0
  85. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/layers/rtdetr_backbone.py +0 -0
  86. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/layers/rtdetr_hybrid_encoder.py +0 -0
  87. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/layers/rtdetrv2_decoder.py +0 -0
  88. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/parseq.py +0 -0
  89. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/models/rtdetr.py +0 -0
  90. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/ocr.py +0 -0
  91. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/onnx/.gitkeep +0 -0
  92. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/postprocessor/__init__.py +0 -0
  93. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/postprocessor/dbnet_postporcessor.py +0 -0
  94. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/postprocessor/parseq_tokenizer.py +0 -0
  95. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/postprocessor/rtdetr_postprocessor.py +0 -0
  96. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/resource/MPLUS1p-Medium.ttf +0 -0
  97. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/resource/charset.txt +0 -0
  98. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/table_structure_recognizer.py +0 -0
  99. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/text_detector.py +0 -0
  100. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/text_recognizer.py +0 -0
  101. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/utils/__init__.py +0 -0
  102. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/utils/graph.py +0 -0
  103. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/utils/logger.py +0 -0
  104. {yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/utils/visualizer.py +0 -0
  105. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/in/demo.jpg +0 -0
  106. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/in/gallery1.jpg +0 -0
  107. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/in/gallery2.jpg +0 -0
  108. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/in/gallery3.jpg +0 -0
  109. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/in/gallery4.jpg +0 -0
  110. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/in/gallery5.jpg +0 -0
  111. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/in/gallery6.jpg +0 -0
  112. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/in/gallery7.jpeg +0 -0
  113. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/logo/horizontal.png +0 -0
  114. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/demo_html.png +0 -0
  115. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_demo_p1_figure_0.png +0 -0
  116. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_0.png +0 -0
  117. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_1.png +0 -0
  118. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_10.png +0 -0
  119. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_2.png +0 -0
  120. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_3.png +0 -0
  121. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_4.png +0 -0
  122. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_5.png +0 -0
  123. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_6.png +0 -0
  124. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_7.png +0 -0
  125. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_8.png +0 -0
  126. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery1_p1_figure_9.png +0 -0
  127. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery3_p1_figure_0.png +0 -0
  128. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery3_p1_figure_1.png +0 -0
  129. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery5_p1_figure_0.png +0 -0
  130. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery5_p1_figure_1.png +0 -0
  131. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery6_p1_figure_0.png +0 -0
  132. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery6_p1_figure_1.png +0 -0
  133. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/figures/in_gallery7_p1_figure_0.png +0 -0
  134. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_demo_p1.html +0 -0
  135. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_demo_p1.md +0 -0
  136. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_demo_p1_layout.jpg +0 -0
  137. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_demo_p1_ocr.jpg +0 -0
  138. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery1_p1.html +0 -0
  139. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery1_p1.md +0 -0
  140. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery1_p1_layout.jpg +0 -0
  141. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery1_p1_ocr.jpg +0 -0
  142. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery2_p1.html +0 -0
  143. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery2_p1.md +0 -0
  144. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery2_p1_layout.jpg +0 -0
  145. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery2_p1_ocr.jpg +0 -0
  146. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery3_p1.html +0 -0
  147. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery3_p1.md +0 -0
  148. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery3_p1_layout.jpg +0 -0
  149. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery3_p1_ocr.jpg +0 -0
  150. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery5_p1.html +0 -0
  151. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery5_p1.md +0 -0
  152. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery5_p1_layout.jpg +0 -0
  153. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery5_p1_ocr.jpg +0 -0
  154. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery6_p1.html +0 -0
  155. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery6_p1.md +0 -0
  156. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery6_p1_layout.jpg +0 -0
  157. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery6_p1_ocr.jpg +0 -0
  158. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery7_p1.html +0 -0
  159. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery7_p1.md +0 -0
  160. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery7_p1_layout.jpg +0 -0
  161. {yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery7_p1_ocr.jpg +0 -0
  162. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/invalid.jpg +0 -0
  163. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/invalid.pdf +0 -0
  164. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/rgba.png +0 -0
  165. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/sampldoc.tif +0 -0
  166. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/small.jpg +0 -0
  167. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/subdir/test.jpg +0 -0
  168. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/test.bmp +0 -0
  169. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/test.jpg +0 -0
  170. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/test.pdf +0 -0
  171. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/test.png +0 -0
  172. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/test.tiff +0 -0
  173. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/test.txt +0 -0
  174. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/data/test_gray.jpg +0 -0
  175. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/test_base.py +0 -0
  176. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/test_cli.py +0 -0
  177. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/test_document_analyzer.py +0 -0
  178. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/test_export.py +0 -0
  179. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/test_layout_analyzer.py +0 -0
  180. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/test_ocr.py +0 -0
  181. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/yaml/layout_parser.yaml +0 -0
  182. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/yaml/table_structure_recognizer.yaml +0 -0
  183. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/yaml/text_detector.yaml +0 -0
  184. {yomitoku-0.9.0 → yomitoku-0.9.1}/tests/yaml/text_recognizer.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.9.0
3
+ Version: 0.9.1
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
@@ -66,6 +66,7 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
66
66
 
67
67
  ## 📣 リリース情報
68
68
 
69
+ - 2025 年 4 月 4 日 YomiToku v0.8.0 手書き文字認識のサポート
69
70
  - 2024 年 11 月 26 日 YomiToku v0.5.1 (beta) を公開
70
71
 
71
72
  ## 💡 インストールの方法
@@ -41,6 +41,7 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
41
41
 
42
42
  ## 📣 リリース情報
43
43
 
44
+ - 2025 年 4 月 4 日 YomiToku v0.8.0 手書き文字認識のサポート
44
45
  - 2024 年 11 月 26 日 YomiToku v0.5.1 (beta) を公開
45
46
 
46
47
  ## 💡 インストールの方法
@@ -4,9 +4,12 @@ from yomitoku import OCR
4
4
  from yomitoku.data.functions import load_pdf
5
5
 
6
6
  if __name__ == "__main__":
7
- ocr = OCR(visualize=True, device="cpu")
7
+ ocr = OCR(visualize=True, device="cuda")
8
8
  # PDFファイルを読み込み
9
9
  imgs = load_pdf("demo/sample.pdf")
10
+ import time
11
+
12
+ start = time.time()
10
13
  for i, img in enumerate(imgs):
11
14
  results, ocr_vis = ocr(img)
12
15
 
@@ -107,4 +107,18 @@ If the PDF contains multiple pages, you can export them as a single file.
107
107
 
108
108
  ```
109
109
  yomitoku ${path_data} -f md --combine
110
- ```
110
+ ```
111
+
112
+ ## Specifying Reading Order
113
+
114
+ By default, *Auto* mode automatically detects whether a document is written horizontally or vertically and estimates the appropriate reading order. However, you can explicitly specify a custom reading order. For horizontal documents, the default is `top2left`, and for vertical documents, it is `top2bottom`.
115
+
116
+ ```
117
+ yomitoku ${path_data} --reading_order left2right
118
+ ```
119
+
120
+ * `top2bottom`: Prioritizes reading from top to bottom. Useful for multi-column documents such as word processor files with vertical flow.
121
+
122
+ * `left2right`: Prioritizes reading from left to right. Suitable for layouts like receipts or health insurance cards, where key-value text pairs are arranged in columns.
123
+
124
+ * `right2left`: Prioritizes reading from right to left. Effective for vertically written documents.
@@ -104,4 +104,18 @@ PDFに複数ページが含まれる場合に複数ページを一つのファ
104
104
 
105
105
  ```
106
106
  yomitoku ${path_data} -f md --combine
107
- ```
107
+ ```
108
+
109
+ ## 読み取り順を指定する
110
+ Autoでは、横書きのドキュメント、縦書きのドキュメントを識別し、自動で読み取り順を推定しますが、任意の読み取り順の指定することが可能です。デフォルトでは横書きの文書は`top2left`, 縦書きは`top2bottom`になります。
111
+
112
+ ```
113
+ yomitoku ${path_data} --reading_order left2right
114
+ ```
115
+
116
+ - `top2bottom`: 上から下方向に優先的に読み取り順を推定します。段組みのワードドキュメントなどに対して、有効です。
117
+
118
+ - `left2right`: 左から右方向に優先的に読み取り順を推定します。レシートや保険証などキーに対して、値を示すテキストが段組みになっているようなレイアウトに有効です。
119
+
120
+ - `right2left:` 右から左方向に優先的に読み取り順を推定します。縦書きのドキュメントに対して有効です。
121
+
@@ -92,7 +92,7 @@ def process_single_file(args, analyzer, path, format):
92
92
 
93
93
  if ocr is not None:
94
94
  out_path = os.path.join(
95
- args.outdir, f"{dirname}_{filename}_p{page+1}_ocr.jpg"
95
+ args.outdir, f"{dirname}_{filename}_p{page + 1}_ocr.jpg"
96
96
  )
97
97
 
98
98
  save_image(ocr, out_path)
@@ -100,13 +100,15 @@ def process_single_file(args, analyzer, path, format):
100
100
 
101
101
  if layout is not None:
102
102
  out_path = os.path.join(
103
- args.outdir, f"{dirname}_{filename}_p{page+1}_layout.jpg"
103
+ args.outdir, f"{dirname}_{filename}_p{page + 1}_layout.jpg"
104
104
  )
105
105
 
106
106
  save_image(layout, out_path)
107
107
  logger.info(f"Output file: {out_path}")
108
108
 
109
- out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
109
+ out_path = os.path.join(
110
+ args.outdir, f"{dirname}_{filename}_p{page + 1}.{format}"
111
+ )
110
112
 
111
113
  if format == "json":
112
114
  if args.combine:
@@ -341,6 +343,12 @@ def main():
341
343
  action="store_true",
342
344
  help="if set, ignore meta information(header, footer) in the output",
343
345
  )
346
+ parser.add_argument(
347
+ "--reading_order",
348
+ default="auto",
349
+ type=str,
350
+ choices=["auto", "left2right", "top2bottom", "right2left"],
351
+ )
344
352
 
345
353
  args = parser.parse_args()
346
354
 
@@ -394,6 +402,7 @@ def main():
394
402
  visualize=args.vis,
395
403
  device=args.device,
396
404
  ignore_meta=args.ignore_meta,
405
+ reading_order=args.reading_order,
397
406
  )
398
407
 
399
408
  os.makedirs(args.outdir, exist_ok=True)
@@ -408,7 +417,7 @@ def main():
408
417
  logger.info(f"Processing file: {file_path}")
409
418
  process_single_file(args, analyzer, file_path, format)
410
419
  end = time.time()
411
- logger.info(f"Total Processing time: {end-start:.2f} sec")
420
+ logger.info(f"Total Processing time: {end - start:.2f} sec")
412
421
  except Exception:
413
422
  continue
414
423
  else:
@@ -416,7 +425,7 @@ def main():
416
425
  logger.info(f"Processing file: {path}")
417
426
  process_single_file(args, analyzer, path, format)
418
427
  end = time.time()
419
- logger.info(f"Total Processing time: {end-start:.2f} sec")
428
+ logger.info(f"Total Processing time: {end - start:.2f} sec")
420
429
 
421
430
 
422
431
  if __name__ == "__main__":
@@ -8,9 +8,11 @@ from .functions import (
8
8
  validate_quads,
9
9
  )
10
10
 
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
11
13
 
12
14
  class ParseqDataset(Dataset):
13
- def __init__(self, cfg, img, quads):
15
+ def __init__(self, cfg, img, quads, num_workers=8):
14
16
  self.img = img[:, :, ::-1]
15
17
  self.quads = quads
16
18
  self.cfg = cfg
@@ -22,19 +24,27 @@ class ParseqDataset(Dataset):
22
24
  ]
23
25
  )
24
26
 
25
- validate_quads(self.img, self.quads)
27
+ with ThreadPoolExecutor(max_workers=num_workers) as executor:
28
+ data = list(executor.map(self.preprocess, self.quads))
26
29
 
27
- def __len__(self):
28
- return len(self.quads)
30
+ self.data = [tensor for tensor in data if tensor is not None]
31
+
32
+ def preprocess(self, quad):
33
+ if validate_quads(self.img, quad) is None:
34
+ return None
35
+
36
+ roi_img = extract_roi_with_perspective(self.img, quad)
29
37
 
30
- def __getitem__(self, index):
31
- polygon = self.quads[index]
32
- roi_img = extract_roi_with_perspective(self.img, polygon)
33
38
  if roi_img is None:
34
- return
39
+ return None
35
40
 
36
41
  roi_img = rotate_text_image(roi_img, thresh_aspect=2)
37
42
  resized = resize_with_padding(roi_img, self.cfg.data.img_size)
38
- tensor = self.transform(resized)
39
43
 
40
- return tensor
44
+ return resized
45
+
46
+ def __len__(self):
47
+ return len(self.data)
48
+
49
+ def __getitem__(self, index):
50
+ return self.transform(self.data[index])
@@ -191,7 +191,7 @@ def array_to_tensor(img: np.ndarray) -> torch.Tensor:
191
191
  return tensor
192
192
 
193
193
 
194
- def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
194
+ def validate_quads(img: np.ndarray, quad: list[list[list[int]]]):
195
195
  """
196
196
  Validate the vertices of the quadrilateral.
197
197
 
@@ -204,23 +204,23 @@ def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
204
204
  """
205
205
 
206
206
  h, w = img.shape[:2]
207
- for quad in quads:
208
- if len(quad) != 4:
209
- raise ValueError("The number of vertices must be 4.")
210
-
211
- for point in quad:
212
- if len(point) != 2:
213
- raise ValueError("The number of coordinates must be 2.")
214
-
215
- quad = np.array(quad, dtype=int)
216
- x1 = np.min(quad[:, 0])
217
- x2 = np.max(quad[:, 0])
218
- y1 = np.min(quad[:, 1])
219
- y2 = np.max(quad[:, 1])
220
- h, w = img.shape[:2]
207
+ if len(quad) != 4:
208
+ # raise ValueError("The number of vertices must be 4.")
209
+ return None
210
+
211
+ for point in quad:
212
+ if len(point) != 2:
213
+ return None
214
+
215
+ quad = np.array(quad, dtype=int)
216
+ x1 = np.min(quad[:, 0])
217
+ x2 = np.max(quad[:, 0])
218
+ y1 = np.min(quad[:, 1])
219
+ y2 = np.max(quad[:, 1])
220
+ h, w = img.shape[:2]
221
221
 
222
- if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
223
- raise ValueError(f"The vertices are out of the image. {quad.tolist()}")
222
+ if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
223
+ return None
224
224
 
225
225
  return True
226
226
 
@@ -237,19 +237,18 @@ def extract_roi_with_perspective(img, quad):
237
237
  np.ndarray: extracted image
238
238
  """
239
239
  dst = img.copy()
240
- quad = np.array(quad, dtype=np.float32)
240
+ quad = np.array(quad, dtype=np.int64)
241
+
241
242
  width = np.linalg.norm(quad[0] - quad[1])
242
243
  height = np.linalg.norm(quad[1] - quad[2])
243
244
 
244
245
  width = int(width)
245
246
  height = int(height)
246
-
247
247
  pts1 = np.float32(quad)
248
248
  pts2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
249
249
 
250
250
  M = cv2.getPerspectiveTransform(pts1, pts2)
251
251
  dst = cv2.warpPerspective(dst, M, (width, height))
252
-
253
252
  return dst
254
253
 
255
254
 
@@ -86,8 +86,12 @@ def extract_paragraph_within_figure(paragraphs, figures):
86
86
  check_list[i] = True
87
87
 
88
88
  figure["direction"] = judge_page_direction(contained_paragraphs)
89
+ reading_order = (
90
+ "left2right" if figure["direction"] == "horizontal" else "right2left"
91
+ )
92
+
89
93
  figure_paragraphs = prediction_reading_order(
90
- contained_paragraphs, figure["direction"]
94
+ contained_paragraphs, reading_order
91
95
  )
92
96
  figure["paragraphs"] = sorted(figure_paragraphs, key=lambda x: x.order)
93
97
  figure = FigureSchema(**figure)
@@ -126,8 +130,8 @@ def extract_words_within_element(pred_words, element):
126
130
  cnt_vertical = word_direction.count("vertical")
127
131
 
128
132
  element_direction = "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
129
-
130
- prediction_reading_order(contained_words, element_direction)
133
+ order = "left2right" if element_direction == "horizontal" else "right2left"
134
+ prediction_reading_order(contained_words, order)
131
135
  contained_words = sorted(contained_words, key=lambda x: x.order)
132
136
 
133
137
  contained_words = "\n".join([content.contents for content in contained_words])
@@ -328,6 +332,7 @@ class DocumentAnalyzer:
328
332
  device="cuda",
329
333
  visualize=False,
330
334
  ignore_meta=False,
335
+ reading_order="auto",
331
336
  ):
332
337
  default_configs = {
333
338
  "ocr": {
@@ -352,6 +357,8 @@ class DocumentAnalyzer:
352
357
  },
353
358
  }
354
359
 
360
+ self.reading_order = reading_order
361
+
355
362
  if isinstance(configs, dict):
356
363
  recursive_update(default_configs, configs)
357
364
  else:
@@ -452,9 +459,17 @@ class DocumentAnalyzer:
452
459
 
453
460
  elements = page_contents + layout_res.tables + figures
454
461
 
455
- prediction_reading_order(headers, page_direction)
456
- prediction_reading_order(footers, page_direction)
457
- prediction_reading_order(elements, page_direction, self.img)
462
+ prediction_reading_order(headers, "left2right")
463
+ prediction_reading_order(footers, "left2right")
464
+
465
+ if self.reading_order == "auto":
466
+ reading_order = (
467
+ "right2left" if page_direction == "vertical" else "top2bottom"
468
+ )
469
+ else:
470
+ reading_order = self.reading_order
471
+
472
+ prediction_reading_order(elements, reading_order, self.img)
458
473
 
459
474
  for i, element in enumerate(elements):
460
475
  element.order += len(headers)
@@ -17,7 +17,6 @@ def _priority_dfs(nodes, direction):
17
17
 
18
18
  pending_nodes = sorted(nodes, key=lambda x: x.prop["distance"])
19
19
  visited = [False] * len(nodes)
20
-
21
20
  start = pending_nodes.pop(0)
22
21
  stack = [start]
23
22
 
@@ -53,11 +52,11 @@ def _priority_dfs(nodes, direction):
53
52
  children.append(node)
54
53
  stack.remove(node)
55
54
 
56
- if direction == "horizontal":
55
+ if direction in "top2bottom":
57
56
  children = sorted(
58
57
  children, key=lambda x: x.prop["box"][0], reverse=True
59
58
  )
60
- else:
59
+ elif direction in ["right2left", "left2right"]:
61
60
  children = sorted(
62
61
  children, key=lambda x: x.prop["box"][1], reverse=True
63
62
  )
@@ -121,7 +120,7 @@ def _exist_other_node_between_horizontal(node, other_node, nodes):
121
120
  return False
122
121
 
123
122
 
124
- def _create_graph_horizontal(nodes):
123
+ def _create_graph_top2bottom(nodes):
125
124
  for i, node in enumerate(nodes):
126
125
  for j, other_node in enumerate(nodes):
127
126
  if i == j:
@@ -146,7 +145,7 @@ def _create_graph_horizontal(nodes):
146
145
  node.children = sorted(node.children, key=lambda x: x.prop["box"][0])
147
146
 
148
147
 
149
- def _create_graph_vertical(nodes):
148
+ def _create_graph_right2left(nodes):
150
149
  max_x = max([node.prop["box"][2] for node in nodes])
151
150
 
152
151
  for i, node in enumerate(nodes):
@@ -172,15 +171,46 @@ def _create_graph_vertical(nodes):
172
171
  node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
173
172
 
174
173
 
174
+ def _create_graph_left2right(nodes, x_weight=1, y_weight=5):
175
+ for i, node in enumerate(nodes):
176
+ for j, other_node in enumerate(nodes):
177
+ if i == j:
178
+ continue
179
+
180
+ if is_intersected_horizontal(node.prop["box"], other_node.prop["box"]):
181
+ tx = node.prop["box"][2]
182
+ ox = other_node.prop["box"][2]
183
+
184
+ if _exist_other_node_between_horizontal(node, other_node, nodes):
185
+ continue
186
+
187
+ if ox < tx:
188
+ other_node.add_link(node)
189
+ else:
190
+ node.add_link(other_node)
191
+
192
+ node_distance = (
193
+ node.prop["box"][0] * x_weight + node.prop["box"][1] * y_weight
194
+ )
195
+ node.prop["distance"] = node_distance
196
+
197
+ for node in nodes:
198
+ node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
199
+
200
+
175
201
  def prediction_reading_order(elements, direction, img=None):
176
202
  if len(elements) < 2:
177
203
  return elements
178
204
 
179
205
  nodes = [Node(i, element.dict()) for i, element in enumerate(elements)]
180
- if direction == "horizontal":
181
- _create_graph_horizontal(nodes)
206
+ if direction == "top2bottom":
207
+ _create_graph_top2bottom(nodes)
208
+ elif direction == "right2left":
209
+ _create_graph_right2left(nodes)
210
+ elif direction == "left2right":
211
+ _create_graph_left2right(nodes)
182
212
  else:
183
- _create_graph_vertical(nodes)
213
+ raise ValueError(f"Invalid direction: {direction}")
184
214
 
185
215
  # For debugging
186
216
  # if img is not None:
@@ -80,7 +80,7 @@ def calc_intersection(rect_a, rect_b):
80
80
  return [ix1, iy1, ix2, iy2]
81
81
 
82
82
 
83
- def is_intersected_horizontal(rect_a, rect_b):
83
+ def is_intersected_horizontal(rect_a, rect_b, threshold=0.5):
84
84
  _, ay1, _, ay2 = map(int, rect_a)
85
85
  _, by1, _, by2 = map(int, rect_b)
86
86
 
@@ -88,9 +88,11 @@ def is_intersected_horizontal(rect_a, rect_b):
88
88
  iy1 = max(ay1, by1)
89
89
  iy2 = min(ay2, by2)
90
90
 
91
+ min_height = min(ay2 - ay1, by2 - by1)
92
+
91
93
  overlap_height = max(0, iy2 - iy1)
92
94
 
93
- if overlap_height == 0:
95
+ if (overlap_height / min_height) < threshold:
94
96
  return False
95
97
 
96
98
  return True
@@ -119,3 +121,48 @@ def quad_to_xyxy(quad):
119
121
  y2 = max([y for _, y in quad])
120
122
 
121
123
  return x1, y1, x2, y2
124
+
125
+
126
+ def convert_table_array(table):
127
+ n_rows = table.n_row
128
+ n_cols = table.n_col
129
+
130
+ table_array = [["" for _ in range(n_cols)] for _ in range(n_rows)]
131
+
132
+ for cell in table.cells:
133
+ row = cell.row - 1
134
+ col = cell.col - 1
135
+ row_span = cell.row_span
136
+ col_span = cell.col_span
137
+ contents = cell.contents
138
+
139
+ for i in range(row, row + row_span):
140
+ for j in range(col, col + col_span):
141
+ table_array[i][j] = contents
142
+
143
+ return table_array
144
+
145
+
146
+ def convert_table_array_to_dict(table_array, header_row=1):
147
+ n_cols = len(table_array[0])
148
+ n_rows = len(table_array)
149
+
150
+ header_cols = []
151
+ for i in range(n_cols):
152
+ header = []
153
+ for j in range(header_row):
154
+ header.append(table_array[j][i])
155
+
156
+ if len(header) > 0:
157
+ header_cols.append("_".join(header))
158
+ else:
159
+ header_cols.append(f"col_{i}")
160
+
161
+ table_dict = []
162
+ for i in range(header_row, n_rows):
163
+ row_dict = {}
164
+ for j in range(n_cols):
165
+ row_dict[header_cols[j]] = table_array[i][j]
166
+ table_dict.append(row_dict)
167
+
168
+ return table_dict
@@ -11,20 +11,20 @@
11
11
  <p>AM9:00~PM7:00</p>
12
12
  <p>お買い上げ、誠にありがとうございます。</p>
13
13
  <p>またのお越しをお待ちしております。</p>
14
- <p>端末取引ID:50631</p>
14
+ <p>2024-11-13 13:26:15</p>
15
+ <p/>
16
+ <h1>むこたまソフト ※</h1>
15
17
  <p>¥529</p>
18
+ <p>端末取引ID:50631</p>
16
19
  <p>小計<br/>¥529</p>
17
20
  <p>合計<br/>¥529</p>
18
21
  <p>内消費税<br/>(¥39)</p>
19
22
  <p>(8%対象 ¥529 内消費税 ¥39)</p>
20
23
  <p>合計点数<br/>1点</p>
21
24
  <p>お預り金額<br/>¥1,000</p>
22
- <p>お釣り<br/>¥471</p>
23
- <p>2024-11-13 13:26:15</p>
24
- <p/>
25
- <h1>むこたまソフト ※</h1>
26
25
  <p>注) ※は軽減税率(8%)適用</p>
26
+ <p>お釣り<br/>¥471</p>
27
27
  <p>オンラインでもご購入いただけます!</p>
28
- <p>No.7314719750041</p>
29
28
  <p>http://www.mukotama.com/</p>
29
+ <p>No. 7314719750041</p>
30
30
  </div>
@@ -4,7 +4,7 @@
4
4
 
5
5
  〒2770871
6
6
 
7
- 千葉県柏市若柴186番地 中央146街区1<br>ARAGE 112<br>KOIL LINK G
7
+ ARAGE 112<br>千葉県柏市若柴186番地 中央146街区1<br>KOIL LINK G
8
8
 
9
9
  TEL:0471\-28\-8905
10
10
 
@@ -20,8 +20,12 @@ AM9:00\~PM7:00
20
20
 
21
21
  またのお越しをお待ちしております。
22
22
 
23
+ 2024\-11\-13 13:26:15
24
+
23
25
  端末取引ID:50631
24
26
 
27
+ # むこたまソフト ※
28
+
25
29
  ¥529
26
30
 
27
31
  小計<br>¥529
@@ -36,16 +40,12 @@ AM9:00\~PM7:00
36
40
 
37
41
  お預り金額<br>¥1,000
38
42
 
39
- お釣り<br>¥471
40
-
41
- 2024\-11\-13 13:26:15
42
-
43
- # むこたまソフト ※
44
-
45
43
  注\) ※は軽減税率\(8%\)適用
46
44
 
47
- オンラインでもご購入いただけます\!
45
+ お釣り<br>¥471
48
46
 
49
- No.7314719750041
47
+ オンラインでもご購入いただけます\!
50
48
 
51
49
  http://www.mukotama.com/
50
+
51
+ No. 7314719750041
@@ -140,47 +140,29 @@ def test_resize_with_padding():
140
140
 
141
141
  def test_validate_quads():
142
142
  img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
143
- quads = [
144
- [[0, 0], [0, 10], [10, 10]],
145
- ]
143
+ quad = [[0, 0], [0, 10], [10, 10]]
146
144
 
147
- with pytest.raises(ValueError):
148
- validate_quads(img, quads)
145
+ assert validate_quads(img, quad) is None
149
146
 
150
- quads = [
151
- [[0], [0, 10], [10, 10], [10, 0]],
152
- ]
147
+ quad = [[0], [0, 10], [10, 10], [10, 0]]
153
148
 
154
- with pytest.raises(ValueError):
155
- validate_quads(img, quads)
149
+ assert validate_quads(img, quad) is None
156
150
 
157
- quads = [
158
- [[0, 0], [0, 150], [10, 150], [10, 0]],
159
- ]
151
+ quad = [[0, 0], [0, 150], [10, 150], [10, 0]]
160
152
 
161
- with pytest.raises(ValueError):
162
- validate_quads(img, quads)
153
+ assert validate_quads(img, quad) is None
163
154
 
164
- quads = [
165
- [[150, 0], [150, 10], [10, 10], [10, 0]],
166
- ]
155
+ quad = [[150, 0], [150, 10], [10, 10], [10, 0]]
167
156
 
168
- with pytest.raises(ValueError):
169
- validate_quads(img, quads)
157
+ assert validate_quads(img, quad) is None
170
158
 
171
- quads = [
172
- [[-1, 0], [-1, 10], [10, 10], [10, 0]],
173
- ]
159
+ quad = [[-1, 0], [-1, 10], [10, 10], [10, 0]]
174
160
 
175
- with pytest.raises(ValueError):
176
- validate_quads(img, quads)
161
+ assert validate_quads(img, quad) is None
177
162
 
178
- quads = [
179
- [[0, -1], [0, 10], [10, 10], [10, -1]],
180
- ]
163
+ quad = [[0, -1], [0, 10], [10, 10], [10, -1]]
181
164
 
182
- with pytest.raises(ValueError):
183
- validate_quads(img, quads)
165
+ assert validate_quads(img, quad) is None
184
166
 
185
167
  quads = [
186
168
  [[0, 0], [0, 10], [10, 10], [10, 0]],
@@ -188,4 +170,5 @@ def test_validate_quads():
188
170
  [[10, 0], [10, 30], [80, 30], [80, 0]],
189
171
  ]
190
172
 
191
- assert validate_quads(img, quads)
173
+ for quad in quads:
174
+ assert validate_quads(img, quad)