yomitoku 0.7.0__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {yomitoku-0.7.0 → yomitoku-0.7.1}/PKG-INFO +1 -1
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/cli/main.py +2 -1
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/document_analyzer.py +0 -1
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/export_csv.py +2 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/export_html.py +2 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/export_json.py +2 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/export_markdown.py +2 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_cli.py +1 -0
- yomitoku-0.7.1/tests/test_document_analyzer.py +596 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/uv.lock +1 -1
- yomitoku-0.7.0/tests/test_document_analyzer.py +0 -88
- {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/release-drafter.yml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/workflows/build-and-publish-docs.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/workflows/build-and-publish.yml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/workflows/create-release.yml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/.github/workflows/lint-and-test.yml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/.gitignore +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/.pre-commit-config.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/.python-version +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/README.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/README_EN.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/configs/layout_parser_rtdetrv2.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/configs/table_structure_recognitizer.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/configs/text_detector.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/configs/text_recognizer.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/sample.pdf +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/setting_document_anaysis.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/simple_document_analysis.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/simple_layout.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/simple_ocr.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/demo/text_detector.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/dockerfile +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/assets/logo.svg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/index.en.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/index.ja.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/installation.en.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/installation.ja.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/usage.en.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/docs/usage.ja.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/gallery.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/mkdocs.yml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/pyproject.toml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/pytest.ini +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/scripts/register_hugging_face_hub.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/base.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/cli/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_layout_parser_rtdtrv2.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_table_structure_recognizer_rtdtrv2.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_text_detector_dbnet.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_text_recognizer_parseq.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/configs/cfg_text_recognizer_parseq_small.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/constants.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/data/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/data/dataset.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/data/functions.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/export/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/layout_analyzer.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/layout_parser.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/dbnet_plus.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/activate.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/dbnet_feature_attention.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/parseq_transformer.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/rtdetr_backbone.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/rtdetr_hybrid_encoder.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/layers/rtdetrv2_decoder.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/parseq.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/models/rtdetr.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/ocr.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/onnx/.gitkeep +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/postprocessor/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/postprocessor/dbnet_postporcessor.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/postprocessor/parseq_tokenizer.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/postprocessor/rtdetr_postprocessor.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/reading_order.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/resource/MPLUS1p-Medium.ttf +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/resource/charset.txt +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/table_structure_recognizer.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/text_detector.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/text_recognizer.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/__init__.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/graph.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/logger.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/misc.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/src/yomitoku/utils/visualizer.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/demo.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery1.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery2.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery3.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery4.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery5.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/in/gallery6.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/logo/horizontal.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/demo_html.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_demo_p1_figure_0.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_0.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_1.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_10.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_2.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_3.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_4.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_5.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_6.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_7.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_8.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery1_p1_figure_9.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery3_p1_figure_0.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery3_p1_figure_1.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery5_p1_figure_0.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery5_p1_figure_1.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery6_p1_figure_0.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/figures/in_gallery6_p1_figure_1.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_demo_p1.html +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_demo_p1.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_demo_p1_layout.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_demo_p1_ocr.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery1_p1.html +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery1_p1.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery1_p1_layout.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery1_p1_ocr.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery2_p1.html +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery2_p1.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery2_p1_layout.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery2_p1_ocr.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery3_p1.html +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery3_p1.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery3_p1_layout.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery3_p1_ocr.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery4_p1.html +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery4_p1.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery4_p1_layout.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery4_p1_ocr.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery5_p1.html +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery5_p1.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery5_p1_layout.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery5_p1_ocr.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery6_p1.html +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery6_p1.md +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery6_p1_layout.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/static/out/in_gallery6_p1_ocr.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/invalid.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/invalid.pdf +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/rgba.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/small.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/subdir/test.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.bmp +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.pdf +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.png +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.tiff +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test.txt +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/data/test_gray.jpg +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_base.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_data.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_export.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_layout_analyzer.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/test_ocr.py +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/yaml/layout_parser.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/yaml/table_structure_recognizer.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/yaml/text_detector.yaml +0 -0
- {yomitoku-0.7.0 → yomitoku-0.7.1}/tests/yaml/text_recognizer.yaml +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: yomitoku
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.1
|
4
4
|
Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
|
5
5
|
Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
|
6
6
|
License: CC BY-NC-SA 4.0
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import argparse
|
2
2
|
import os
|
3
|
+
import torch
|
3
4
|
from pathlib import Path
|
4
5
|
|
5
6
|
import cv2
|
@@ -235,7 +236,7 @@ def main():
|
|
235
236
|
if args.lite:
|
236
237
|
configs["ocr"]["text_recognizer"]["model_name"] = "parseq-small"
|
237
238
|
|
238
|
-
if args.device == "cpu":
|
239
|
+
if args.device == "cpu" or not torch.cuda.is_available():
|
239
240
|
configs["ocr"]["text_detector"]["infer_onnx"] = True
|
240
241
|
|
241
242
|
# Note: Text Detector以外はONNX推論よりもPyTorch推論の方が速いため、ONNX推論は行わない
|
@@ -127,7 +127,6 @@ def extract_words_within_element(pred_words, element):
|
|
127
127
|
if len(contained_words) == 0:
|
128
128
|
return None, None, check_list
|
129
129
|
|
130
|
-
element_direction = "horizontal"
|
131
130
|
word_direction = [word.direction for word in contained_words]
|
132
131
|
cnt_horizontal = word_direction.count("horizontal")
|
133
132
|
cnt_vertical = word_direction.count("vertical")
|
@@ -210,6 +210,7 @@ def test_validate_encoding():
|
|
210
210
|
validate_encoding("utf-9")
|
211
211
|
|
212
212
|
assert validate_encoding("utf-8")
|
213
|
+
assert validate_encoding("utf-8-sig")
|
213
214
|
assert validate_encoding("shift-jis")
|
214
215
|
assert validate_encoding("euc-jp")
|
215
216
|
assert validate_encoding("cp932")
|
@@ -0,0 +1,596 @@
|
|
1
|
+
import pytest
|
2
|
+
import torch
|
3
|
+
from omegaconf import OmegaConf
|
4
|
+
|
5
|
+
from yomitoku import DocumentAnalyzer
|
6
|
+
from yomitoku.document_analyzer import (
|
7
|
+
ParagraphSchema,
|
8
|
+
FigureSchema,
|
9
|
+
DocumentAnalyzerSchema,
|
10
|
+
extract_paragraph_within_figure,
|
11
|
+
combine_flags,
|
12
|
+
judge_page_direction,
|
13
|
+
extract_words_within_element,
|
14
|
+
is_vertical,
|
15
|
+
is_noise,
|
16
|
+
recursive_update,
|
17
|
+
_extract_words_within_table,
|
18
|
+
_calc_overlap_words_on_lines,
|
19
|
+
_correct_vertical_word_boxes,
|
20
|
+
_correct_horizontal_word_boxes,
|
21
|
+
_split_text_across_cells,
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
from yomitoku.text_detector import TextDetectorSchema
|
26
|
+
|
27
|
+
from yomitoku.table_structure_recognizer import (
|
28
|
+
TableStructureRecognizerSchema,
|
29
|
+
TableLineSchema,
|
30
|
+
TableCellSchema,
|
31
|
+
)
|
32
|
+
|
33
|
+
from yomitoku.ocr import (
|
34
|
+
WordPrediction,
|
35
|
+
)
|
36
|
+
|
37
|
+
|
38
|
+
def test_initialize():
|
39
|
+
device = "cpu"
|
40
|
+
visualize = True
|
41
|
+
config = {
|
42
|
+
"ocr": {
|
43
|
+
"text_detector": {
|
44
|
+
"path_cfg": "tests/yaml/text_detector.yaml",
|
45
|
+
},
|
46
|
+
"text_recognizer": {
|
47
|
+
"path_cfg": "tests/yaml/text_recognizer.yaml",
|
48
|
+
},
|
49
|
+
},
|
50
|
+
"layout_analyzer": {
|
51
|
+
"layout_parser": {
|
52
|
+
"path_cfg": "tests/yaml/layout_parser.yaml",
|
53
|
+
},
|
54
|
+
"table_structure_recognizer": {
|
55
|
+
"path_cfg": "tests/yaml/table_structure_recognizer.yaml",
|
56
|
+
},
|
57
|
+
},
|
58
|
+
}
|
59
|
+
|
60
|
+
analyzer = DocumentAnalyzer(configs=config, device=device, visualize=visualize)
|
61
|
+
|
62
|
+
# サブモジュールのパラメータが更新されているか確認
|
63
|
+
assert analyzer.text_detector.device == torch.device(device)
|
64
|
+
assert analyzer.text_recognizer.device == torch.device(device)
|
65
|
+
assert analyzer.layout.layout_parser.device == torch.device(device)
|
66
|
+
assert analyzer.layout.table_structure_recognizer.device == torch.device(device)
|
67
|
+
|
68
|
+
assert analyzer.text_detector.visualize == visualize
|
69
|
+
assert analyzer.text_recognizer.visualize == visualize
|
70
|
+
assert analyzer.layout.layout_parser.visualize == visualize
|
71
|
+
assert analyzer.layout.table_structure_recognizer.visualize == visualize
|
72
|
+
|
73
|
+
text_detector_cfg = OmegaConf.load(config["ocr"]["text_detector"]["path_cfg"])
|
74
|
+
text_recognizer_cfg = OmegaConf.load(config["ocr"]["text_recognizer"]["path_cfg"])
|
75
|
+
layout_parser_cfg = OmegaConf.load(
|
76
|
+
config["layout_analyzer"]["layout_parser"]["path_cfg"]
|
77
|
+
)
|
78
|
+
table_structure_recognizer_cfg = OmegaConf.load(
|
79
|
+
config["layout_analyzer"]["table_structure_recognizer"]["path_cfg"]
|
80
|
+
)
|
81
|
+
|
82
|
+
assert (
|
83
|
+
analyzer.text_detector.post_processor.thresh
|
84
|
+
== text_detector_cfg.post_process.thresh
|
85
|
+
)
|
86
|
+
|
87
|
+
assert (
|
88
|
+
analyzer.text_recognizer.model.refine_iters == text_recognizer_cfg.refine_iters
|
89
|
+
)
|
90
|
+
|
91
|
+
assert analyzer.layout.layout_parser.thresh_score == layout_parser_cfg.thresh_score
|
92
|
+
|
93
|
+
assert (
|
94
|
+
analyzer.layout.table_structure_recognizer.thresh_score
|
95
|
+
== table_structure_recognizer_cfg.thresh_score
|
96
|
+
)
|
97
|
+
|
98
|
+
|
99
|
+
def test_invalid_path():
|
100
|
+
config = {
|
101
|
+
"ocr": {
|
102
|
+
"text_detector": {
|
103
|
+
"path_cfg": "tests/yaml/dummy.yaml",
|
104
|
+
},
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
with pytest.raises(FileNotFoundError):
|
109
|
+
DocumentAnalyzer(
|
110
|
+
configs=config,
|
111
|
+
)
|
112
|
+
|
113
|
+
|
114
|
+
def test_invalid_config():
|
115
|
+
with pytest.raises(ValueError):
|
116
|
+
DocumentAnalyzer(
|
117
|
+
configs="invalid",
|
118
|
+
)
|
119
|
+
|
120
|
+
|
121
|
+
def test_extract_paragraph_within_figure():
|
122
|
+
paragraphs = [
|
123
|
+
{
|
124
|
+
"box": [0, 0, 2, 1],
|
125
|
+
"contents": "This is a test.",
|
126
|
+
"direction": "horizontal",
|
127
|
+
"order": 1,
|
128
|
+
"role": None,
|
129
|
+
},
|
130
|
+
{
|
131
|
+
"box": [0, 0, 1, 2],
|
132
|
+
"contents": "This is a test.",
|
133
|
+
"direction": "vertical",
|
134
|
+
"order": 1,
|
135
|
+
"role": None,
|
136
|
+
},
|
137
|
+
{
|
138
|
+
"box": [10, 10, 1, 2],
|
139
|
+
"contents": "This is a test.",
|
140
|
+
"direction": "horizontal",
|
141
|
+
"order": 1,
|
142
|
+
"role": None,
|
143
|
+
},
|
144
|
+
]
|
145
|
+
|
146
|
+
figures = [
|
147
|
+
{
|
148
|
+
"box": [0, 0, 2, 2],
|
149
|
+
"order": 1,
|
150
|
+
"paragraphs": [],
|
151
|
+
"direction": None,
|
152
|
+
}
|
153
|
+
]
|
154
|
+
|
155
|
+
paragraphs = [ParagraphSchema(**paragraph) for paragraph in paragraphs]
|
156
|
+
figures = [FigureSchema(**figure) for figure in figures]
|
157
|
+
|
158
|
+
figures, checklist = extract_paragraph_within_figure(paragraphs, figures)
|
159
|
+
|
160
|
+
assert checklist == [True, True, False]
|
161
|
+
assert len(figures[0].paragraphs) == 2
|
162
|
+
|
163
|
+
|
164
|
+
def test_combile_flags():
|
165
|
+
flags1 = [True, False, True]
|
166
|
+
flags2 = [False, False, True]
|
167
|
+
|
168
|
+
assert combine_flags(flags1, flags2) == [True, False, True]
|
169
|
+
|
170
|
+
|
171
|
+
def test_judge_page_direction():
|
172
|
+
paragraphs = [
|
173
|
+
{
|
174
|
+
"box": [0, 0, 2, 1],
|
175
|
+
"contents": "This is a test.",
|
176
|
+
"direction": "horizontal",
|
177
|
+
"order": 1,
|
178
|
+
"role": None,
|
179
|
+
},
|
180
|
+
{
|
181
|
+
"box": [0, 0, 1, 2],
|
182
|
+
"contents": "This is a test.",
|
183
|
+
"direction": "vertical",
|
184
|
+
"order": 1,
|
185
|
+
"role": None,
|
186
|
+
},
|
187
|
+
{
|
188
|
+
"box": [10, 10, 1, 2],
|
189
|
+
"contents": "This is a test.",
|
190
|
+
"direction": "horizontal",
|
191
|
+
"order": 1,
|
192
|
+
"role": None,
|
193
|
+
},
|
194
|
+
]
|
195
|
+
|
196
|
+
paragraphs = [ParagraphSchema(**paragraph) for paragraph in paragraphs]
|
197
|
+
assert judge_page_direction(paragraphs) == "horizontal"
|
198
|
+
|
199
|
+
paragraphs = [
|
200
|
+
{
|
201
|
+
"box": [0, 0, 2, 1],
|
202
|
+
"contents": "This is a test.",
|
203
|
+
"direction": "horizontal",
|
204
|
+
"order": 1,
|
205
|
+
"role": None,
|
206
|
+
},
|
207
|
+
{
|
208
|
+
"box": [0, 0, 1, 2],
|
209
|
+
"contents": "This is a test.",
|
210
|
+
"direction": "vertical",
|
211
|
+
"order": 1,
|
212
|
+
"role": None,
|
213
|
+
},
|
214
|
+
{
|
215
|
+
"box": [10, 10, 2, 1],
|
216
|
+
"contents": "This is a test.",
|
217
|
+
"direction": "vertical",
|
218
|
+
"order": 1,
|
219
|
+
"role": None,
|
220
|
+
},
|
221
|
+
]
|
222
|
+
|
223
|
+
paragraphs = [ParagraphSchema(**paragraph) for paragraph in paragraphs]
|
224
|
+
assert judge_page_direction(paragraphs) == "vertical"
|
225
|
+
|
226
|
+
|
227
|
+
def test_extract_words_within_element():
|
228
|
+
paragraph = {
|
229
|
+
"box": [0, 0, 1, 1],
|
230
|
+
"contents": "This is a test.",
|
231
|
+
"direction": "horizontal",
|
232
|
+
"order": 1,
|
233
|
+
"role": None,
|
234
|
+
}
|
235
|
+
|
236
|
+
element = ParagraphSchema(**paragraph)
|
237
|
+
|
238
|
+
words = [
|
239
|
+
{
|
240
|
+
"points": [[10, 10], [11, 10], [11, 11], [10, 11]],
|
241
|
+
"content": "This",
|
242
|
+
"direction": "horizontal",
|
243
|
+
"rec_score": 0.9,
|
244
|
+
"det_score": 0.9,
|
245
|
+
}
|
246
|
+
]
|
247
|
+
|
248
|
+
words = [WordPrediction(**word) for word in words]
|
249
|
+
|
250
|
+
words, direction, checklist = extract_words_within_element(words, element)
|
251
|
+
|
252
|
+
assert words is None
|
253
|
+
assert direction is None
|
254
|
+
assert checklist == [False]
|
255
|
+
|
256
|
+
paragraph = {
|
257
|
+
"box": [0, 0, 5, 5],
|
258
|
+
"contents": "This is a test.",
|
259
|
+
"direction": "horizontal",
|
260
|
+
"order": 1,
|
261
|
+
"role": None,
|
262
|
+
}
|
263
|
+
|
264
|
+
element = ParagraphSchema(**paragraph)
|
265
|
+
|
266
|
+
words = [
|
267
|
+
{
|
268
|
+
"points": [[0, 0], [1, 0], [1, 1], [0, 1]],
|
269
|
+
"content": "Hello",
|
270
|
+
"direction": "horizontal",
|
271
|
+
"rec_score": 0.9,
|
272
|
+
"det_score": 0.9,
|
273
|
+
},
|
274
|
+
{
|
275
|
+
"points": [[0, 1], [1, 1], [1, 2], [0, 2]],
|
276
|
+
"content": "World",
|
277
|
+
"direction": "horizontal",
|
278
|
+
"rec_score": 0.9,
|
279
|
+
"det_score": 0.9,
|
280
|
+
},
|
281
|
+
]
|
282
|
+
|
283
|
+
words = [WordPrediction(**word) for word in words]
|
284
|
+
|
285
|
+
words, direction, checklist = extract_words_within_element(words, element)
|
286
|
+
|
287
|
+
assert words == "Hello\nWorld"
|
288
|
+
assert direction == "horizontal"
|
289
|
+
assert checklist == [True, True]
|
290
|
+
|
291
|
+
paragraph = {
|
292
|
+
"box": [0, 0, 5, 5],
|
293
|
+
"contents": "This is a test.",
|
294
|
+
"direction": "horizontal",
|
295
|
+
"order": 1,
|
296
|
+
"role": None,
|
297
|
+
}
|
298
|
+
|
299
|
+
element = ParagraphSchema(**paragraph)
|
300
|
+
|
301
|
+
words = [
|
302
|
+
{
|
303
|
+
"points": [[2, 0], [3, 0], [3, 1], [2, 1]],
|
304
|
+
"content": "Hello",
|
305
|
+
"direction": "vertical",
|
306
|
+
"rec_score": 0.9,
|
307
|
+
"det_score": 0.9,
|
308
|
+
},
|
309
|
+
{
|
310
|
+
"points": [[0, 1], [1, 1], [1, 2], [0, 2]],
|
311
|
+
"content": "World",
|
312
|
+
"direction": "vertical",
|
313
|
+
"rec_score": 0.9,
|
314
|
+
"det_score": 0.9,
|
315
|
+
},
|
316
|
+
]
|
317
|
+
|
318
|
+
words = [WordPrediction(**word) for word in words]
|
319
|
+
|
320
|
+
words, direction, checklist = extract_words_within_element(words, element)
|
321
|
+
|
322
|
+
assert words == "Hello\nWorld"
|
323
|
+
assert direction == "vertical"
|
324
|
+
assert checklist == [True, True]
|
325
|
+
|
326
|
+
|
327
|
+
def test_is_vertical():
|
328
|
+
quad = [[0, 0], [1, 0], [1, 1], [0, 1]]
|
329
|
+
assert not is_vertical(quad)
|
330
|
+
quad = [[0, 0], [1, 0], [1, 3], [0, 3]]
|
331
|
+
assert is_vertical(quad)
|
332
|
+
|
333
|
+
|
334
|
+
def test_is_noise():
|
335
|
+
quad = [[0, 0], [1, 0], [1, 1], [0, 1]]
|
336
|
+
assert is_noise(quad)
|
337
|
+
|
338
|
+
quad = [[0, 0], [20, 0], [20, 20], [0, 20]]
|
339
|
+
assert not is_noise(quad)
|
340
|
+
|
341
|
+
|
342
|
+
def test_recursive_update():
|
343
|
+
original = {"a": {"b": {"c": 1, "d": 2}}}
|
344
|
+
update = {"a": {"b": {"d": 3, "e": 4}}}
|
345
|
+
|
346
|
+
updated = recursive_update(original, update)
|
347
|
+
|
348
|
+
assert updated == {"a": {"b": {"c": 1, "d": 3, "e": 4}}}
|
349
|
+
|
350
|
+
|
351
|
+
def test_extract_words_within_table():
|
352
|
+
points = [
|
353
|
+
[[0, 0], [3, 0], [3, 1], [0, 1]],
|
354
|
+
[[3, 0], [5, 0], [5, 1], [3, 1]],
|
355
|
+
[[0, 1], [1, 1], [1, 4], [0, 4]],
|
356
|
+
[[3, 1], [3, 1], [4, 4], [4, 4]],
|
357
|
+
]
|
358
|
+
|
359
|
+
scores = [0.9, 0.9, 0.9, 0.9]
|
360
|
+
|
361
|
+
words = TextDetectorSchema(points=points, scores=scores)
|
362
|
+
|
363
|
+
table = {
|
364
|
+
"box": [0, 0, 3, 3],
|
365
|
+
"n_row": 2,
|
366
|
+
"n_col": 2,
|
367
|
+
"rows": [],
|
368
|
+
"cols": [],
|
369
|
+
"cells": [],
|
370
|
+
"order": 0,
|
371
|
+
}
|
372
|
+
|
373
|
+
table = TableStructureRecognizerSchema(**table)
|
374
|
+
checklist = [False, False, False, False]
|
375
|
+
h_words, v_words, checklist = _extract_words_within_table(words, table, checklist)
|
376
|
+
|
377
|
+
assert len(h_words) == 1
|
378
|
+
assert len(v_words) == 1
|
379
|
+
assert checklist == [True, False, True, False]
|
380
|
+
|
381
|
+
|
382
|
+
def test_calc_overlap_words_on_lines():
|
383
|
+
lines = [
|
384
|
+
{
|
385
|
+
"box": [0, 0, 2, 1],
|
386
|
+
"score": 0.9,
|
387
|
+
},
|
388
|
+
{
|
389
|
+
"box": [0, 1, 1, 1],
|
390
|
+
"score": 0.9,
|
391
|
+
},
|
392
|
+
]
|
393
|
+
|
394
|
+
lines = [TableLineSchema(**line) for line in lines]
|
395
|
+
|
396
|
+
words = [
|
397
|
+
{
|
398
|
+
"points": [[0, 0], [1, 0], [1, 1], [0, 1]],
|
399
|
+
},
|
400
|
+
{
|
401
|
+
"points": [[1, 0], [3, 0], [3, 1], [1, 1]],
|
402
|
+
},
|
403
|
+
]
|
404
|
+
|
405
|
+
overrap_ratios = _calc_overlap_words_on_lines(lines, words)
|
406
|
+
|
407
|
+
assert overrap_ratios == [[1.0, 0.0], [0.5, 0.0]]
|
408
|
+
|
409
|
+
|
410
|
+
def test_correct_vertical_word_boxes():
|
411
|
+
words = [
|
412
|
+
{
|
413
|
+
"points": [[0, 0], [20, 0], [20, 100], [0, 100]],
|
414
|
+
"score": 0.9,
|
415
|
+
},
|
416
|
+
]
|
417
|
+
|
418
|
+
cols = [TableLineSchema(box=[0, 0, 20, 100], score=0.9)]
|
419
|
+
rows = [
|
420
|
+
TableLineSchema(box=[0, 0, 20, 50], score=0.9),
|
421
|
+
TableLineSchema(box=[0, 50, 20, 100], score=0.9),
|
422
|
+
]
|
423
|
+
|
424
|
+
cells = [
|
425
|
+
{
|
426
|
+
"col": 1,
|
427
|
+
"row": 1,
|
428
|
+
"col_span": 1,
|
429
|
+
"row_span": 1,
|
430
|
+
"box": [0, 0, 20, 50],
|
431
|
+
"contents": None,
|
432
|
+
},
|
433
|
+
{
|
434
|
+
"col": 1,
|
435
|
+
"row": 2,
|
436
|
+
"col_span": 1,
|
437
|
+
"row_span": 1,
|
438
|
+
"box": [0, 50, 20, 100],
|
439
|
+
"contents": None,
|
440
|
+
},
|
441
|
+
]
|
442
|
+
|
443
|
+
cells = [TableCellSchema(**cell) for cell in cells]
|
444
|
+
|
445
|
+
table = {
|
446
|
+
"box": [0, 0, 100, 20],
|
447
|
+
"n_row": 2,
|
448
|
+
"n_col": 1,
|
449
|
+
"rows": rows,
|
450
|
+
"cols": cols,
|
451
|
+
"cells": cells,
|
452
|
+
"order": 0,
|
453
|
+
}
|
454
|
+
|
455
|
+
table = TableStructureRecognizerSchema(**table)
|
456
|
+
|
457
|
+
overrap_ratios = _calc_overlap_words_on_lines(cols, words)
|
458
|
+
|
459
|
+
points, scores = _correct_vertical_word_boxes(
|
460
|
+
overrap_ratios,
|
461
|
+
table,
|
462
|
+
words,
|
463
|
+
)
|
464
|
+
|
465
|
+
assert len(points) == 2
|
466
|
+
assert len(scores) == 2
|
467
|
+
assert points[0] == [[0, 0], [20, 0], [20, 50], [0, 50]]
|
468
|
+
assert points[1] == [[0, 50], [20, 50], [20, 100], [0, 100]]
|
469
|
+
|
470
|
+
|
471
|
+
def test_correct_horizontal_word_boxes():
|
472
|
+
words = [
|
473
|
+
{
|
474
|
+
"points": [[0, 0], [100, 0], [100, 20], [0, 20]],
|
475
|
+
"score": 0.9,
|
476
|
+
},
|
477
|
+
]
|
478
|
+
|
479
|
+
cols = [
|
480
|
+
TableLineSchema(box=[0, 0, 50, 20], score=0.9),
|
481
|
+
TableLineSchema(box=[50, 0, 100, 20], score=0.9),
|
482
|
+
]
|
483
|
+
rows = [
|
484
|
+
TableLineSchema(box=[0, 0, 100, 20], score=0.9),
|
485
|
+
]
|
486
|
+
|
487
|
+
cells = [
|
488
|
+
{
|
489
|
+
"col": 1,
|
490
|
+
"row": 1,
|
491
|
+
"col_span": 1,
|
492
|
+
"row_span": 1,
|
493
|
+
"box": [0, 0, 50, 20],
|
494
|
+
"contents": None,
|
495
|
+
},
|
496
|
+
{
|
497
|
+
"col": 2,
|
498
|
+
"row": 1,
|
499
|
+
"col_span": 1,
|
500
|
+
"row_span": 1,
|
501
|
+
"box": [50, 0, 100, 20],
|
502
|
+
"contents": None,
|
503
|
+
},
|
504
|
+
]
|
505
|
+
|
506
|
+
cells = [TableCellSchema(**cell) for cell in cells]
|
507
|
+
|
508
|
+
table = {
|
509
|
+
"box": [0, 0, 20, 100],
|
510
|
+
"n_row": 2,
|
511
|
+
"n_col": 1,
|
512
|
+
"rows": rows,
|
513
|
+
"cols": cols,
|
514
|
+
"cells": cells,
|
515
|
+
"order": 0,
|
516
|
+
}
|
517
|
+
|
518
|
+
table = TableStructureRecognizerSchema(**table)
|
519
|
+
|
520
|
+
overrap_ratios = _calc_overlap_words_on_lines(cols, words)
|
521
|
+
|
522
|
+
points, scores = _correct_horizontal_word_boxes(
|
523
|
+
overrap_ratios,
|
524
|
+
table,
|
525
|
+
words,
|
526
|
+
)
|
527
|
+
|
528
|
+
assert len(points) == 2
|
529
|
+
assert len(scores) == 2
|
530
|
+
assert points[0] == [[0, 0], [50, 0], [50, 20], [0, 20]]
|
531
|
+
assert points[1] == [[50, 0], [100, 0], [100, 20], [50, 20]]
|
532
|
+
|
533
|
+
|
534
|
+
def test_split_text_across_cells():
|
535
|
+
points = [
|
536
|
+
[[0, 0], [100, 0], [100, 20], [0, 20]],
|
537
|
+
]
|
538
|
+
|
539
|
+
scores = [0.9]
|
540
|
+
|
541
|
+
words = TextDetectorSchema(points=points, scores=scores)
|
542
|
+
|
543
|
+
cols = [
|
544
|
+
TableLineSchema(box=[0, 0, 50, 20], score=0.9),
|
545
|
+
TableLineSchema(box=[50, 0, 100, 20], score=0.9),
|
546
|
+
]
|
547
|
+
rows = [
|
548
|
+
TableLineSchema(box=[0, 0, 100, 20], score=0.9),
|
549
|
+
]
|
550
|
+
|
551
|
+
cells = [
|
552
|
+
{
|
553
|
+
"col": 1,
|
554
|
+
"row": 1,
|
555
|
+
"col_span": 1,
|
556
|
+
"row_span": 1,
|
557
|
+
"box": [0, 0, 50, 20],
|
558
|
+
"contents": None,
|
559
|
+
},
|
560
|
+
{
|
561
|
+
"col": 2,
|
562
|
+
"row": 1,
|
563
|
+
"col_span": 1,
|
564
|
+
"row_span": 1,
|
565
|
+
"box": [50, 0, 100, 20],
|
566
|
+
"contents": None,
|
567
|
+
},
|
568
|
+
]
|
569
|
+
|
570
|
+
cells = [TableCellSchema(**cell) for cell in cells]
|
571
|
+
|
572
|
+
table = {
|
573
|
+
"box": [0, 0, 100, 20],
|
574
|
+
"n_row": 2,
|
575
|
+
"n_col": 1,
|
576
|
+
"rows": rows,
|
577
|
+
"cols": cols,
|
578
|
+
"cells": cells,
|
579
|
+
"order": 0,
|
580
|
+
}
|
581
|
+
|
582
|
+
table = TableStructureRecognizerSchema(**table)
|
583
|
+
|
584
|
+
Layout = DocumentAnalyzerSchema(
|
585
|
+
paragraphs=[],
|
586
|
+
figures=[],
|
587
|
+
tables=[table],
|
588
|
+
words=[],
|
589
|
+
)
|
590
|
+
|
591
|
+
results = _split_text_across_cells(words, Layout)
|
592
|
+
|
593
|
+
assert len(results.points) == 2
|
594
|
+
assert len(results.scores) == 2
|
595
|
+
assert results.points[0] == [[0, 0], [50, 0], [50, 20], [0, 20]]
|
596
|
+
assert results.points[1] == [[50, 0], [100, 0], [100, 20], [50, 20]]
|