PyPI - yomitoku - Versions diffs - 0.9.0__tar.gz → 0.9.1__tar.gz - Mend

yomitoku 0.9.0tar.gz → 0.9.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

{yomitoku-0.9.0 → yomitoku-0.9.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: yomitoku
-Version: 0.9.0
+Version: 0.9.1
 Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
 Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
 License: CC BY-NC-SA 4.0
@@ -66,6 +66,7 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
 ## 📣 リリース情報
+- 2025 年  4 月  4 日 YomiToku v0.8.0 手書き文字認識のサポート
 - 2024 年 11 月 26 日 YomiToku v0.5.1 (beta) を公開
 ## 💡 インストールの方法

{yomitoku-0.9.0 → yomitoku-0.9.1}/README.md RENAMED Viewed

@@ -41,6 +41,7 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
 ## 📣 リリース情報
+- 2025 年  4 月  4 日 YomiToku v0.8.0 手書き文字認識のサポート
 - 2024 年 11 月 26 日 YomiToku v0.5.1 (beta) を公開
 ## 💡 インストールの方法

{yomitoku-0.9.0 → yomitoku-0.9.1}/demo/simple_ocr.py RENAMED Viewed

@@ -4,9 +4,12 @@ from yomitoku import OCR
 from yomitoku.data.functions import load_pdf
 if __name__ == "__main__":
-    ocr = OCR(visualize=True, device="cpu")
+    ocr = OCR(visualize=True, device="cuda")
     # PDFファイルを読み込み
     imgs = load_pdf("demo/sample.pdf")
+    import time
+    start = time.time()
     for i, img in enumerate(imgs):
         results, ocr_vis = ocr(img)

{yomitoku-0.9.0 → yomitoku-0.9.1}/docs/cli.en.md RENAMED Viewed

@@ -107,4 +107,18 @@ If the PDF contains multiple pages, you can export them as a single file.
 ```
 yomitoku ${path_data} -f md --combine
-```
+```
+## Specifying Reading Order
+By default, *Auto* mode automatically detects whether a document is written horizontally or vertically and estimates the appropriate reading order. However, you can explicitly specify a custom reading order. For horizontal documents, the default is `top2left`, and for vertical documents, it is `top2bottom`.
+```
+yomitoku ${path_data} --reading_order left2right
+```
+* `top2bottom`: Prioritizes reading from top to bottom. Useful for multi-column documents such as word processor files with vertical flow.
+* `left2right`: Prioritizes reading from left to right. Suitable for layouts like receipts or health insurance cards, where key-value text pairs are arranged in columns.
+* `right2left`: Prioritizes reading from right to left. Effective for vertically written documents.

{yomitoku-0.9.0 → yomitoku-0.9.1}/docs/cli.ja.md RENAMED Viewed

@@ -104,4 +104,18 @@ PDFに複数ページが含まれる場合に複数ページを一つのファ
 ```
 yomitoku ${path_data} -f md --combine
-```
+```
+## 読み取り順を指定する
+Autoでは、横書きのドキュメント、縦書きのドキュメントを識別し、自動で読み取り順を推定しますが、任意の読み取り順の指定することが可能です。デフォルトでは横書きの文書は`top2left`, 縦書きは`top2bottom`になります。
+```
+yomitoku ${path_data} --reading_order left2right
+```
+- `top2bottom`: 上から下方向に優先的に読み取り順を推定します。段組みのワードドキュメントなどに対して、有効です。
+- `left2right`: 左から右方向に優先的に読み取り順を推定します。レシートや保険証などキーに対して、値を示すテキストが段組みになっているようなレイアウトに有効です。
+- `right2left:` 右から左方向に優先的に読み取り順を推定します。縦書きのドキュメントに対して有効です。

{yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/cli/main.py RENAMED Viewed

@@ -92,7 +92,7 @@ def process_single_file(args, analyzer, path, format):
         if ocr is not None:
             out_path = os.path.join(
-                args.outdir, f"{dirname}_{filename}_p{page+1}_ocr.jpg"
+                args.outdir, f"{dirname}_{filename}_p{page + 1}_ocr.jpg"
             )
             save_image(ocr, out_path)
@@ -100,13 +100,15 @@ def process_single_file(args, analyzer, path, format):
         if layout is not None:
             out_path = os.path.join(
-                args.outdir, f"{dirname}_{filename}_p{page+1}_layout.jpg"
+                args.outdir, f"{dirname}_{filename}_p{page + 1}_layout.jpg"
             )
             save_image(layout, out_path)
             logger.info(f"Output file: {out_path}")
-        out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
+        out_path = os.path.join(
+            args.outdir, f"{dirname}_{filename}_p{page + 1}.{format}"
+        )
         if format == "json":
             if args.combine:
@@ -341,6 +343,12 @@ def main():
         action="store_true",
         help="if set, ignore meta information(header, footer) in the output",
     )
+    parser.add_argument(
+        "--reading_order",
+        default="auto",
+        type=str,
+        choices=["auto", "left2right", "top2bottom", "right2left"],
+    )
     args = parser.parse_args()
@@ -394,6 +402,7 @@ def main():
         visualize=args.vis,
         device=args.device,
         ignore_meta=args.ignore_meta,
+        reading_order=args.reading_order,
     )
     os.makedirs(args.outdir, exist_ok=True)
@@ -408,7 +417,7 @@ def main():
                 logger.info(f"Processing file: {file_path}")
                 process_single_file(args, analyzer, file_path, format)
                 end = time.time()
-                logger.info(f"Total Processing time: {end-start:.2f} sec")
+                logger.info(f"Total Processing time: {end - start:.2f} sec")
             except Exception:
                 continue
     else:
@@ -416,7 +425,7 @@ def main():
         logger.info(f"Processing file: {path}")
         process_single_file(args, analyzer, path, format)
         end = time.time()
-        logger.info(f"Total Processing time: {end-start:.2f} sec")
+        logger.info(f"Total Processing time: {end - start:.2f} sec")
 if __name__ == "__main__":

{yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/data/dataset.py RENAMED Viewed

@@ -8,9 +8,11 @@ from .functions import (
     validate_quads,
 )
+from concurrent.futures import ThreadPoolExecutor
 class ParseqDataset(Dataset):
-    def __init__(self, cfg, img, quads):
+    def __init__(self, cfg, img, quads, num_workers=8):
         self.img = img[:, :, ::-1]
         self.quads = quads
         self.cfg = cfg
@@ -22,19 +24,27 @@ class ParseqDataset(Dataset):
             ]
         )
-        validate_quads(self.img, self.quads)
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            data = list(executor.map(self.preprocess, self.quads))
-    def __len__(self):
-        return len(self.quads)
+        self.data = [tensor for tensor in data if tensor is not None]
+    def preprocess(self, quad):
+        if validate_quads(self.img, quad) is None:
+            return None
+        roi_img = extract_roi_with_perspective(self.img, quad)
-    def __getitem__(self, index):
-        polygon = self.quads[index]
-        roi_img = extract_roi_with_perspective(self.img, polygon)
         if roi_img is None:
-            return
+            return None
         roi_img = rotate_text_image(roi_img, thresh_aspect=2)
         resized = resize_with_padding(roi_img, self.cfg.data.img_size)
-        tensor = self.transform(resized)
-        return tensor
+        return resized
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, index):
+        return self.transform(self.data[index])

{yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/data/functions.py RENAMED Viewed

@@ -191,7 +191,7 @@ def array_to_tensor(img: np.ndarray) -> torch.Tensor:
     return tensor
-def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
+def validate_quads(img: np.ndarray, quad: list[list[list[int]]]):
     """
     Validate the vertices of the quadrilateral.
@@ -204,23 +204,23 @@ def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
     """
     h, w = img.shape[:2]
-    for quad in quads:
-        if len(quad) != 4:
-            raise ValueError("The number of vertices must be 4.")
-        for point in quad:
-            if len(point) != 2:
-                raise ValueError("The number of coordinates must be 2.")
-        quad = np.array(quad, dtype=int)
-        x1 = np.min(quad[:, 0])
-        x2 = np.max(quad[:, 0])
-        y1 = np.min(quad[:, 1])
-        y2 = np.max(quad[:, 1])
-        h, w = img.shape[:2]
+    if len(quad) != 4:
+        # raise ValueError("The number of vertices must be 4.")
+        return None
+    for point in quad:
+        if len(point) != 2:
+            return None
+    quad = np.array(quad, dtype=int)
+    x1 = np.min(quad[:, 0])
+    x2 = np.max(quad[:, 0])
+    y1 = np.min(quad[:, 1])
+    y2 = np.max(quad[:, 1])
+    h, w = img.shape[:2]
-        if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
-            raise ValueError(f"The vertices are out of the image. {quad.tolist()}")
+    if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
+        return None
     return True
@@ -237,19 +237,18 @@ def extract_roi_with_perspective(img, quad):
         np.ndarray: extracted image
     """
     dst = img.copy()
-    quad = np.array(quad, dtype=np.float32)
+    quad = np.array(quad, dtype=np.int64)
     width = np.linalg.norm(quad[0] - quad[1])
     height = np.linalg.norm(quad[1] - quad[2])
     width = int(width)
     height = int(height)
     pts1 = np.float32(quad)
     pts2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
     M = cv2.getPerspectiveTransform(pts1, pts2)
     dst = cv2.warpPerspective(dst, M, (width, height))
     return dst

{yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/document_analyzer.py RENAMED Viewed

@@ -86,8 +86,12 @@ def extract_paragraph_within_figure(paragraphs, figures):
                 check_list[i] = True
         figure["direction"] = judge_page_direction(contained_paragraphs)
+        reading_order = (
+            "left2right" if figure["direction"] == "horizontal" else "right2left"
+        )
         figure_paragraphs = prediction_reading_order(
-            contained_paragraphs, figure["direction"]
+            contained_paragraphs, reading_order
         )
         figure["paragraphs"] = sorted(figure_paragraphs, key=lambda x: x.order)
         figure = FigureSchema(**figure)
@@ -126,8 +130,8 @@ def extract_words_within_element(pred_words, element):
     cnt_vertical = word_direction.count("vertical")
     element_direction = "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
-    prediction_reading_order(contained_words, element_direction)
+    order = "left2right" if element_direction == "horizontal" else "right2left"
+    prediction_reading_order(contained_words, order)
     contained_words = sorted(contained_words, key=lambda x: x.order)
     contained_words = "\n".join([content.contents for content in contained_words])
@@ -328,6 +332,7 @@ class DocumentAnalyzer:
         device="cuda",
         visualize=False,
         ignore_meta=False,
+        reading_order="auto",
     ):
         default_configs = {
             "ocr": {
@@ -352,6 +357,8 @@ class DocumentAnalyzer:
             },
         }
+        self.reading_order = reading_order
         if isinstance(configs, dict):
             recursive_update(default_configs, configs)
         else:
@@ -452,9 +459,17 @@ class DocumentAnalyzer:
         elements = page_contents + layout_res.tables + figures
-        prediction_reading_order(headers, page_direction)
-        prediction_reading_order(footers, page_direction)
-        prediction_reading_order(elements, page_direction, self.img)
+        prediction_reading_order(headers, "left2right")
+        prediction_reading_order(footers, "left2right")
+        if self.reading_order == "auto":
+            reading_order = (
+                "right2left" if page_direction == "vertical" else "top2bottom"
+            )
+        else:
+            reading_order = self.reading_order
+        prediction_reading_order(elements, reading_order, self.img)
         for i, element in enumerate(elements):
             element.order += len(headers)

{yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/reading_order.py RENAMED Viewed

@@ -17,7 +17,6 @@ def _priority_dfs(nodes, direction):
     pending_nodes = sorted(nodes, key=lambda x: x.prop["distance"])
     visited = [False] * len(nodes)
     start = pending_nodes.pop(0)
     stack = [start]
@@ -53,11 +52,11 @@ def _priority_dfs(nodes, direction):
                         children.append(node)
                         stack.remove(node)
-                if direction == "horizontal":
+                if direction in "top2bottom":
                     children = sorted(
                         children, key=lambda x: x.prop["box"][0], reverse=True
                     )
-                else:
+                elif direction in ["right2left", "left2right"]:
                     children = sorted(
                         children, key=lambda x: x.prop["box"][1], reverse=True
                     )
@@ -121,7 +120,7 @@ def _exist_other_node_between_horizontal(node, other_node, nodes):
     return False
-def _create_graph_horizontal(nodes):
+def _create_graph_top2bottom(nodes):
     for i, node in enumerate(nodes):
         for j, other_node in enumerate(nodes):
             if i == j:
@@ -146,7 +145,7 @@ def _create_graph_horizontal(nodes):
         node.children = sorted(node.children, key=lambda x: x.prop["box"][0])
-def _create_graph_vertical(nodes):
+def _create_graph_right2left(nodes):
     max_x = max([node.prop["box"][2] for node in nodes])
     for i, node in enumerate(nodes):
@@ -172,15 +171,46 @@ def _create_graph_vertical(nodes):
         node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
+def _create_graph_left2right(nodes, x_weight=1, y_weight=5):
+    for i, node in enumerate(nodes):
+        for j, other_node in enumerate(nodes):
+            if i == j:
+                continue
+            if is_intersected_horizontal(node.prop["box"], other_node.prop["box"]):
+                tx = node.prop["box"][2]
+                ox = other_node.prop["box"][2]
+                if _exist_other_node_between_horizontal(node, other_node, nodes):
+                    continue
+                if ox < tx:
+                    other_node.add_link(node)
+                else:
+                    node.add_link(other_node)
+            node_distance = (
+                node.prop["box"][0] * x_weight + node.prop["box"][1] * y_weight
+            )
+            node.prop["distance"] = node_distance
+    for node in nodes:
+        node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
 def prediction_reading_order(elements, direction, img=None):
     if len(elements) < 2:
         return elements
     nodes = [Node(i, element.dict()) for i, element in enumerate(elements)]
-    if direction == "horizontal":
-        _create_graph_horizontal(nodes)
+    if direction == "top2bottom":
+        _create_graph_top2bottom(nodes)
+    elif direction == "right2left":
+        _create_graph_right2left(nodes)
+    elif direction == "left2right":
+        _create_graph_left2right(nodes)
     else:
-        _create_graph_vertical(nodes)
+        raise ValueError(f"Invalid direction: {direction}")
     # For debugging
     # if img is not None:

{yomitoku-0.9.0 → yomitoku-0.9.1}/src/yomitoku/utils/misc.py RENAMED Viewed

@@ -80,7 +80,7 @@ def calc_intersection(rect_a, rect_b):
     return [ix1, iy1, ix2, iy2]
-def is_intersected_horizontal(rect_a, rect_b):
+def is_intersected_horizontal(rect_a, rect_b, threshold=0.5):
     _, ay1, _, ay2 = map(int, rect_a)
     _, by1, _, by2 = map(int, rect_b)
@@ -88,9 +88,11 @@ def is_intersected_horizontal(rect_a, rect_b):
     iy1 = max(ay1, by1)
     iy2 = min(ay2, by2)
+    min_height = min(ay2 - ay1, by2 - by1)
     overlap_height = max(0, iy2 - iy1)
-    if overlap_height == 0:
+    if (overlap_height / min_height) < threshold:
         return False
     return True
@@ -119,3 +121,48 @@ def quad_to_xyxy(quad):
     y2 = max([y for _, y in quad])
     return x1, y1, x2, y2
+def convert_table_array(table):
+    n_rows = table.n_row
+    n_cols = table.n_col
+    table_array = [["" for _ in range(n_cols)] for _ in range(n_rows)]
+    for cell in table.cells:
+        row = cell.row - 1
+        col = cell.col - 1
+        row_span = cell.row_span
+        col_span = cell.col_span
+        contents = cell.contents
+        for i in range(row, row + row_span):
+            for j in range(col, col + col_span):
+                table_array[i][j] = contents
+    return table_array
+def convert_table_array_to_dict(table_array, header_row=1):
+    n_cols = len(table_array[0])
+    n_rows = len(table_array)
+    header_cols = []
+    for i in range(n_cols):
+        header = []
+        for j in range(header_row):
+            header.append(table_array[j][i])
+        if len(header) > 0:
+            header_cols.append("_".join(header))
+        else:
+            header_cols.append(f"col_{i}")
+    table_dict = []
+    for i in range(header_row, n_rows):
+        row_dict = {}
+        for j in range(n_cols):
+            row_dict[header_cols[j]] = table_array[i][j]
+        table_dict.append(row_dict)
+    return table_dict

{yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery4_p1.html RENAMED Viewed

@@ -11,20 +11,20 @@
   <p>AM9:00~PM7:00</p>
   <p>お買い上げ、誠にありがとうございます。</p>
   <p>またのお越しをお待ちしております。</p>
-  <p>端末取引ID:50631</p>
+  <p>2024-11-13 13:26:15</p>
+  <p/>
+  <h1>むこたまソフト ※</h1>
   <p>¥529</p>
+  <p>端末取引ID:50631</p>
   <p>小計<br/>¥529</p>
   <p>合計<br/>¥529</p>
   <p>内消費税<br/>(¥39)</p>
   <p>(8%対象 ¥529 内消費税 ¥39)</p>
   <p>合計点数<br/>1点</p>
   <p>お預り金額<br/>¥1,000</p>
-  <p>お釣り<br/>¥471</p>
-  <p>2024-11-13 13:26:15</p>
-  <p/>
-  <h1>むこたまソフト ※</h1>
   <p>注) ※は軽減税率(8%)適用</p>
+  <p>お釣り<br/>¥471</p>
   <p>オンラインでもご購入いただけます!</p>
-  <p>No.7314719750041</p>
   <p>http://www.mukotama.com/</p>
+  <p>No. 7314719750041</p>
 </div>

{yomitoku-0.9.0 → yomitoku-0.9.1}/static/out/in_gallery4_p1.md RENAMED Viewed

@@ -4,7 +4,7 @@
 〒2770871
-千葉県柏市若柴186番地 中央146街区1<br>ARAGE 112<br>KOIL LINK G
+ARAGE 112<br>千葉県柏市若柴186番地 中央146街区1<br>KOIL LINK G
 TEL:0471\-28\-8905
@@ -20,8 +20,12 @@ AM9:00\~PM7:00
 またのお越しをお待ちしております。
+2024\-11\-13 13:26:15
 端末取引ID:50631
+# むこたまソフト ※
 ¥529
 小計<br>¥529
@@ -36,16 +40,12 @@ AM9:00\~PM7:00
 お預り金額<br>¥1,000
-お釣り<br>¥471
-2024\-11\-13 13:26:15
-# むこたまソフト ※
 注\) ※は軽減税率\(8%\)適用
-オンラインでもご購入いただけます\!
+お釣り<br>¥471
-No.7314719750041
+オンラインでもご購入いただけます\!
 http://www.mukotama.com/
+No. 7314719750041

yomitoku-0.9.1/static/out/in_gallery4_p1_layout.jpg ADDED Viewed

Binary file

yomitoku-0.9.1/static/out/in_gallery4_p1_ocr.jpg ADDED Viewed

Binary file

{yomitoku-0.9.0 → yomitoku-0.9.1}/tests/test_data.py RENAMED Viewed

@@ -140,47 +140,29 @@ def test_resize_with_padding():
 def test_validate_quads():
     img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
-    quads = [
-        [[0, 0], [0, 10], [10, 10]],
-    ]
+    quad = [[0, 0], [0, 10], [10, 10]]
-    with pytest.raises(ValueError):
-        validate_quads(img, quads)
+    assert validate_quads(img, quad) is None
-    quads = [
-        [[0], [0, 10], [10, 10], [10, 0]],
-    ]
+    quad = [[0], [0, 10], [10, 10], [10, 0]]
-    with pytest.raises(ValueError):
-        validate_quads(img, quads)
+    assert validate_quads(img, quad) is None
-    quads = [
-        [[0, 0], [0, 150], [10, 150], [10, 0]],
-    ]
+    quad = [[0, 0], [0, 150], [10, 150], [10, 0]]
-    with pytest.raises(ValueError):
-        validate_quads(img, quads)
+    assert validate_quads(img, quad) is None
-    quads = [
-        [[150, 0], [150, 10], [10, 10], [10, 0]],
-    ]
+    quad = [[150, 0], [150, 10], [10, 10], [10, 0]]
-    with pytest.raises(ValueError):
-        validate_quads(img, quads)
+    assert validate_quads(img, quad) is None
-    quads = [
-        [[-1, 0], [-1, 10], [10, 10], [10, 0]],
-    ]
+    quad = [[-1, 0], [-1, 10], [10, 10], [10, 0]]
-    with pytest.raises(ValueError):
-        validate_quads(img, quads)
+    assert validate_quads(img, quad) is None
-    quads = [
-        [[0, -1], [0, 10], [10, 10], [10, -1]],
-    ]
+    quad = [[0, -1], [0, 10], [10, 10], [10, -1]]
-    with pytest.raises(ValueError):
-        validate_quads(img, quads)
+    assert validate_quads(img, quad) is None
     quads = [
         [[0, 0], [0, 10], [10, 10], [10, 0]],
@@ -188,4 +170,5 @@ def test_validate_quads():
         [[10, 0], [10, 30], [80, 30], [80, 0]],
     ]
-    assert validate_quads(img, quads)
+    for quad in quads:
+        assert validate_quads(img, quad)

yomitoku 0.9.0__tar.gz → 0.9.1__tar.gz

yomitoku 0.9.0tar.gz → 0.9.1tar.gz