PyPI - yomitoku - Versions diffs - 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

yomitoku 0.8.1py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

yomitoku/cli/main.py +18 -8
yomitoku/cli/mcp.py +165 -0
yomitoku/data/dataset.py +20 -10
yomitoku/data/functions.py +19 -20
yomitoku/document_analyzer.py +21 -6
yomitoku/export/export_csv.py +2 -2
yomitoku/export/export_html.py +10 -5
yomitoku/export/export_json.py +2 -2
yomitoku/export/export_markdown.py +2 -2
yomitoku/reading_order.py +38 -8
yomitoku/utils/misc.py +61 -2
{yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/METADATA +4 -1
{yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/RECORD +15 -14
{yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/entry_points.txt +1 -0
{yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/WHEEL +0 -0

yomitoku/cli/main.py CHANGED Viewed

@@ -3,7 +3,6 @@ import os
 import time
 from pathlib import Path
-import cv2
 import torch
 from ..constants import SUPPORT_OUTPUT_FORMAT
@@ -14,6 +13,8 @@ from ..utils.logger import set_logger
 from ..export import save_csv, save_html, save_json, save_markdown
 from ..export import convert_json, convert_csv, convert_html, convert_markdown
+from ..utils.misc import save_image
 logger = set_logger(__name__, "INFO")
@@ -91,21 +92,23 @@ def process_single_file(args, analyzer, path, format):
         if ocr is not None:
             out_path = os.path.join(
-                args.outdir, f"{dirname}_{filename}_p{page+1}_ocr.jpg"
+                args.outdir, f"{dirname}_{filename}_p{page + 1}_ocr.jpg"
             )
-            cv2.imwrite(out_path, ocr)
+            save_image(ocr, out_path)
             logger.info(f"Output file: {out_path}")
         if layout is not None:
             out_path = os.path.join(
-                args.outdir, f"{dirname}_{filename}_p{page+1}_layout.jpg"
+                args.outdir, f"{dirname}_{filename}_p{page + 1}_layout.jpg"
             )
-            cv2.imwrite(out_path, layout)
+            save_image(layout, out_path)
             logger.info(f"Output file: {out_path}")
-        out_path = os.path.join(args.outdir, f"{dirname}_{filename}_p{page+1}.{format}")
+        out_path = os.path.join(
+            args.outdir, f"{dirname}_{filename}_p{page + 1}.{format}"
+        )
         if format == "json":
             if args.combine:
@@ -340,6 +343,12 @@ def main():
         action="store_true",
         help="if set, ignore meta information(header, footer) in the output",
     )
+    parser.add_argument(
+        "--reading_order",
+        default="auto",
+        type=str,
+        choices=["auto", "left2right", "top2bottom", "right2left"],
+    )
     args = parser.parse_args()
@@ -393,6 +402,7 @@ def main():
         visualize=args.vis,
         device=args.device,
         ignore_meta=args.ignore_meta,
+        reading_order=args.reading_order,
     )
     os.makedirs(args.outdir, exist_ok=True)
@@ -407,7 +417,7 @@ def main():
                 logger.info(f"Processing file: {file_path}")
                 process_single_file(args, analyzer, file_path, format)
                 end = time.time()
-                logger.info(f"Total Processing time: {end-start:.2f} sec")
+                logger.info(f"Total Processing time: {end - start:.2f} sec")
             except Exception:
                 continue
     else:
@@ -415,7 +425,7 @@ def main():
         logger.info(f"Processing file: {path}")
         process_single_file(args, analyzer, path, format)
         end = time.time()
-        logger.info(f"Total Processing time: {end-start:.2f} sec")
+        logger.info(f"Total Processing time: {end - start:.2f} sec")
 if __name__ == "__main__":

yomitoku/cli/mcp.py ADDED Viewed

@@ -0,0 +1,165 @@
+import json
+import io
+import csv
+import os
+from pathlib import Path
+from mcp.server.fastmcp import Context, FastMCP
+from yomitoku import DocumentAnalyzer
+from yomitoku.data.functions import load_image, load_pdf
+from yomitoku.export import convert_json, convert_markdown, convert_csv, convert_html
+try:
+    RESOURCE_DIR = os.environ["RESOURCE_DIR"]
+except KeyError:
+    raise ValueError("Environment variable 'RESOURCE_DIR' is not set.")
+analyzer = None
+async def load_analyzer(ctx: Context) -> DocumentAnalyzer:
+    """
+    Load the DocumentAnalyzer instance if not already loaded.
+    Args:
+        ctx (Context): The context in which the analyzer is being loaded.
+    Returns:
+        DocumentAnalyzer: The loaded document analyzer instance.
+    """
+    global analyzer
+    if analyzer is None:
+        await ctx.info("Load document analyzer")
+        analyzer = DocumentAnalyzer(visualize=False, device="cuda")
+    return analyzer
+mcp = FastMCP("yomitoku")
+@mcp.tool()
+async def process_ocr(ctx: Context, filename: str, output_format: str) -> str:
+    """
+    Perform OCR on the specified file in the resource direcory and convert
+    the results to the desired format.
+    Args:
+        ctx (Context): The context in which the OCR processing is executed.
+        filename (str): The name of the file to process in the resource directory.
+        output_format (str): The desired format for the output. The available options are:
+            - json: Outputs the text as structured data along with positional information.
+            - markdown: Outputs texts and tables in Markdown format.
+            - html: Outputs texts and tables in HTML format.
+            - csv: Outputs texts and tables in CSV format.
+    Returns:
+        str: The OCR results converted to the specified format.
+    """
+    analyzer = await load_analyzer(ctx)
+    await ctx.info("Start ocr processing")
+    file_path = os.path.join(RESOURCE_DIR, filename)
+    if Path(file_path).suffix[1:].lower() in ["pdf"]:
+        imgs = load_pdf(file_path)
+    else:
+        imgs = load_image(file_path)
+    results = []
+    for page, img in enumerate(imgs):
+        analyzer.img = img
+        result, _, _ = await analyzer.run(img)
+        results.append(result)
+        await ctx.report_progress(page + 1, len(imgs))
+    if output_format == "json":
+        return json.dumps(
+            [
+                convert_json(
+                    result,
+                    out_path=None,
+                    ignore_line_break=True,
+                    img=img,
+                    export_figure=False,
+                    figure_dir=None,
+                ).model_dump()
+                for img, result in zip(imgs, results)
+            ],
+            ensure_ascii=False,
+            sort_keys=True,
+            separators=(",", ": "),
+        )
+    elif output_format == "markdown":
+        return "\n".join(
+            [
+                convert_markdown(
+                    result,
+                    out_path=None,
+                    ignore_line_break=True,
+                    img=img,
+                    export_figure=False,
+                )[0]
+                for img, result in zip(imgs, results)
+            ]
+        )
+    elif output_format == "html":
+        return "\n".join(
+            [
+                convert_html(
+                    result,
+                    out_path=None,
+                    ignore_line_break=True,
+                    img=img,
+                    export_figure=False,
+                    export_figure_letter="",
+                )[0]
+                for img, result in zip(imgs, results)
+            ]
+        )
+    elif output_format == "csv":
+        output = io.StringIO()
+        writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
+        for img, result in zip(imgs, results):
+            elements = convert_csv(
+                result,
+                out_path=None,
+                ignore_line_break=True,
+                img=img,
+                export_figure=False,
+            )
+            for element in elements:
+                if element["type"] == "table":
+                    writer.writerows(element["element"])
+                else:
+                    writer.writerow([element["element"]])
+                writer.writerow([""])
+        return output.getvalue()
+    else:
+        raise ValueError(
+            f"Unsupported output format: {output_format}."
+            " Supported formats are json, markdown, html or csv."
+        )
+@mcp.resource("file://list")
+async def get_file_list() -> list[str]:
+    """
+    Retrieve a list of files in the resource directory.
+    Returns:
+        list[str]: A list of filenames in the resource directory.
+    """
+    return os.listdir(RESOURCE_DIR)
+def run_mcp_server():
+    """
+    Run the MCP server.
+    """
+    mcp.run(transport="stdio")
+if __name__ == "__main__":
+    run_mcp_server()

yomitoku/data/dataset.py CHANGED Viewed

@@ -8,9 +8,11 @@ from .functions import (
     validate_quads,
 )
+from concurrent.futures import ThreadPoolExecutor
 class ParseqDataset(Dataset):
-    def __init__(self, cfg, img, quads):
+    def __init__(self, cfg, img, quads, num_workers=8):
         self.img = img[:, :, ::-1]
         self.quads = quads
         self.cfg = cfg
@@ -22,19 +24,27 @@ class ParseqDataset(Dataset):
             ]
         )
-        validate_quads(self.img, self.quads)
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            data = list(executor.map(self.preprocess, self.quads))
-    def __len__(self):
-        return len(self.quads)
+        self.data = [tensor for tensor in data if tensor is not None]
+    def preprocess(self, quad):
+        if validate_quads(self.img, quad) is None:
+            return None
+        roi_img = extract_roi_with_perspective(self.img, quad)
-    def __getitem__(self, index):
-        polygon = self.quads[index]
-        roi_img = extract_roi_with_perspective(self.img, polygon)
         if roi_img is None:
-            return
+            return None
         roi_img = rotate_text_image(roi_img, thresh_aspect=2)
         resized = resize_with_padding(roi_img, self.cfg.data.img_size)
-        tensor = self.transform(resized)
-        return tensor
+        return resized
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, index):
+        return self.transform(self.data[index])

yomitoku/data/functions.py CHANGED Viewed

@@ -191,7 +191,7 @@ def array_to_tensor(img: np.ndarray) -> torch.Tensor:
     return tensor
-def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
+def validate_quads(img: np.ndarray, quad: list[list[list[int]]]):
     """
     Validate the vertices of the quadrilateral.
@@ -204,23 +204,23 @@ def validate_quads(img: np.ndarray, quads: list[list[list[int]]]):
     """
     h, w = img.shape[:2]
-    for quad in quads:
-        if len(quad) != 4:
-            raise ValueError("The number of vertices must be 4.")
-        for point in quad:
-            if len(point) != 2:
-                raise ValueError("The number of coordinates must be 2.")
-        quad = np.array(quad, dtype=int)
-        x1 = np.min(quad[:, 0])
-        x2 = np.max(quad[:, 0])
-        y1 = np.min(quad[:, 1])
-        y2 = np.max(quad[:, 1])
-        h, w = img.shape[:2]
+    if len(quad) != 4:
+        # raise ValueError("The number of vertices must be 4.")
+        return None
+    for point in quad:
+        if len(point) != 2:
+            return None
+    quad = np.array(quad, dtype=int)
+    x1 = np.min(quad[:, 0])
+    x2 = np.max(quad[:, 0])
+    y1 = np.min(quad[:, 1])
+    y2 = np.max(quad[:, 1])
+    h, w = img.shape[:2]
-        if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
-            raise ValueError(f"The vertices are out of the image. {quad.tolist()}")
+    if x1 < 0 or x2 > w or y1 < 0 or y2 > h:
+        return None
     return True
@@ -237,19 +237,18 @@ def extract_roi_with_perspective(img, quad):
         np.ndarray: extracted image
     """
     dst = img.copy()
-    quad = np.array(quad, dtype=np.float32)
+    quad = np.array(quad, dtype=np.int64)
     width = np.linalg.norm(quad[0] - quad[1])
     height = np.linalg.norm(quad[1] - quad[2])
     width = int(width)
     height = int(height)
     pts1 = np.float32(quad)
     pts2 = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
     M = cv2.getPerspectiveTransform(pts1, pts2)
     dst = cv2.warpPerspective(dst, M, (width, height))
     return dst

yomitoku/document_analyzer.py CHANGED Viewed

@@ -86,8 +86,12 @@ def extract_paragraph_within_figure(paragraphs, figures):
                 check_list[i] = True
         figure["direction"] = judge_page_direction(contained_paragraphs)
+        reading_order = (
+            "left2right" if figure["direction"] == "horizontal" else "right2left"
+        )
         figure_paragraphs = prediction_reading_order(
-            contained_paragraphs, figure["direction"]
+            contained_paragraphs, reading_order
         )
         figure["paragraphs"] = sorted(figure_paragraphs, key=lambda x: x.order)
         figure = FigureSchema(**figure)
@@ -126,8 +130,8 @@ def extract_words_within_element(pred_words, element):
     cnt_vertical = word_direction.count("vertical")
     element_direction = "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
-    prediction_reading_order(contained_words, element_direction)
+    order = "left2right" if element_direction == "horizontal" else "right2left"
+    prediction_reading_order(contained_words, order)
     contained_words = sorted(contained_words, key=lambda x: x.order)
     contained_words = "\n".join([content.contents for content in contained_words])
@@ -328,6 +332,7 @@ class DocumentAnalyzer:
         device="cuda",
         visualize=False,
         ignore_meta=False,
+        reading_order="auto",
     ):
         default_configs = {
             "ocr": {
@@ -352,6 +357,8 @@ class DocumentAnalyzer:
             },
         }
+        self.reading_order = reading_order
         if isinstance(configs, dict):
             recursive_update(default_configs, configs)
         else:
@@ -452,9 +459,17 @@ class DocumentAnalyzer:
         elements = page_contents + layout_res.tables + figures
-        prediction_reading_order(headers, page_direction)
-        prediction_reading_order(footers, page_direction)
-        prediction_reading_order(elements, page_direction, self.img)
+        prediction_reading_order(headers, "left2right")
+        prediction_reading_order(footers, "left2right")
+        if self.reading_order == "auto":
+            reading_order = (
+                "right2left" if page_direction == "vertical" else "top2bottom"
+            )
+        else:
+            reading_order = self.reading_order
+        prediction_reading_order(elements, reading_order, self.img)
         for i, element in enumerate(elements):
             element.order += len(headers)

yomitoku/export/export_csv.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import csv
 import os
-import cv2
+from ..utils.misc import save_image
 def table_to_csv(table, ignore_line_break):
@@ -54,7 +54,7 @@ def save_figure(
         filename = os.path.splitext(os.path.basename(out_path))[0]
         figure_name = f"{filename}_figure_{i}.png"
         figure_path = os.path.join(save_dir, figure_name)
-        cv2.imwrite(figure_path, figure_img)
+        save_image(figure_img, figure_path)
 def convert_csv(

yomitoku/export/export_html.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import os
 import re
 from html import escape
-import cv2
 from lxml import etree, html
+from ..utils.misc import save_image
 def convert_text_to_html(text):
     """
@@ -122,7 +122,7 @@ def figure_to_html(
         filename = os.path.splitext(os.path.basename(out_path))[0]
         figure_name = f"{filename}_figure_{i}.png"
         figure_path = os.path.join(save_dir, figure_name)
-        cv2.imwrite(figure_path, figure_img)
+        save_image(figure_img, figure_path)
         elements.append(
             {
@@ -180,8 +180,13 @@ def convert_html(
     elements = sorted(elements, key=lambda x: x["order"])
     html_string = "".join([element["html"] for element in elements])
-    parsed_html = html.fromstring(html_string)
-    formatted_html = etree.tostring(parsed_html, pretty_print=True, encoding="unicode")
+    if not len(html_string) == 0:
+        parsed_html = html.fromstring(html_string)
+        formatted_html = etree.tostring(
+            parsed_html, pretty_print=True, encoding="unicode"
+        )
+    else:
+        formatted_html = ""
     return formatted_html, elements

yomitoku/export/export_json.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 import os
-import cv2
+from ..utils.misc import save_image
 def paragraph_to_json(paragraph, ignore_line_break):
@@ -33,7 +33,7 @@ def save_figure(
         filename = os.path.splitext(os.path.basename(out_path))[0]
         figure_name = f"{filename}_figure_{i}.png"
         figure_path = os.path.join(save_dir, figure_name)
-        cv2.imwrite(figure_path, figure_img)
+        save_image(figure_img, figure_path)
 def convert_json(inputs, out_path, ignore_line_break, img, export_figure, figure_dir):

yomitoku/export/export_markdown.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import re
-import cv2
+from ..utils.misc import save_image
 def escape_markdown_special_chars(text):
@@ -89,7 +89,7 @@ def figure_to_md(
         filename = os.path.splitext(os.path.basename(out_path))[0]
         figure_name = f"{filename}_figure_{i}.png"
         figure_path = os.path.join(save_dir, figure_name)
-        cv2.imwrite(figure_path, figure_img)
+        save_image(figure_img, figure_path)
         elements.append(
             {

yomitoku/reading_order.py CHANGED Viewed

@@ -17,7 +17,6 @@ def _priority_dfs(nodes, direction):
     pending_nodes = sorted(nodes, key=lambda x: x.prop["distance"])
     visited = [False] * len(nodes)
     start = pending_nodes.pop(0)
     stack = [start]
@@ -53,11 +52,11 @@ def _priority_dfs(nodes, direction):
                         children.append(node)
                         stack.remove(node)
-                if direction == "horizontal":
+                if direction in "top2bottom":
                     children = sorted(
                         children, key=lambda x: x.prop["box"][0], reverse=True
                     )
-                else:
+                elif direction in ["right2left", "left2right"]:
                     children = sorted(
                         children, key=lambda x: x.prop["box"][1], reverse=True
                     )
@@ -121,7 +120,7 @@ def _exist_other_node_between_horizontal(node, other_node, nodes):
     return False
-def _create_graph_horizontal(nodes):
+def _create_graph_top2bottom(nodes):
     for i, node in enumerate(nodes):
         for j, other_node in enumerate(nodes):
             if i == j:
@@ -146,7 +145,7 @@ def _create_graph_horizontal(nodes):
         node.children = sorted(node.children, key=lambda x: x.prop["box"][0])
-def _create_graph_vertical(nodes):
+def _create_graph_right2left(nodes):
     max_x = max([node.prop["box"][2] for node in nodes])
     for i, node in enumerate(nodes):
@@ -172,15 +171,46 @@ def _create_graph_vertical(nodes):
         node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
+def _create_graph_left2right(nodes, x_weight=1, y_weight=5):
+    for i, node in enumerate(nodes):
+        for j, other_node in enumerate(nodes):
+            if i == j:
+                continue
+            if is_intersected_horizontal(node.prop["box"], other_node.prop["box"]):
+                tx = node.prop["box"][2]
+                ox = other_node.prop["box"][2]
+                if _exist_other_node_between_horizontal(node, other_node, nodes):
+                    continue
+                if ox < tx:
+                    other_node.add_link(node)
+                else:
+                    node.add_link(other_node)
+            node_distance = (
+                node.prop["box"][0] * x_weight + node.prop["box"][1] * y_weight
+            )
+            node.prop["distance"] = node_distance
+    for node in nodes:
+        node.children = sorted(node.children, key=lambda x: x.prop["box"][1])
 def prediction_reading_order(elements, direction, img=None):
     if len(elements) < 2:
         return elements
     nodes = [Node(i, element.dict()) for i, element in enumerate(elements)]
-    if direction == "horizontal":
-        _create_graph_horizontal(nodes)
+    if direction == "top2bottom":
+        _create_graph_top2bottom(nodes)
+    elif direction == "right2left":
+        _create_graph_right2left(nodes)
+    elif direction == "left2right":
+        _create_graph_left2right(nodes)
     else:
-        _create_graph_vertical(nodes)
+        raise ValueError(f"Invalid direction: {direction}")
     # For debugging
     # if img is not None:

yomitoku/utils/misc.py CHANGED Viewed

@@ -1,3 +1,6 @@
+import cv2
 def load_charset(charset_path):
     with open(charset_path, "r", encoding="utf-8") as f:
         charset = f.read()
@@ -9,6 +12,15 @@ def filter_by_flag(elements, flags):
     return [element for element, flag in zip(elements, flags) if flag]
+def save_image(img, path):
+    success, buffer = cv2.imencode(".jpg", img)
+    if not success:
+        raise ValueError("Failed to encode image")
+    with open(path, "wb") as f:
+        f.write(buffer.tobytes())
 def calc_overlap_ratio(rect_a, rect_b):
     intersection = calc_intersection(rect_a, rect_b)
     if intersection is None:
@@ -68,7 +80,7 @@ def calc_intersection(rect_a, rect_b):
     return [ix1, iy1, ix2, iy2]
-def is_intersected_horizontal(rect_a, rect_b):
+def is_intersected_horizontal(rect_a, rect_b, threshold=0.5):
     _, ay1, _, ay2 = map(int, rect_a)
     _, by1, _, by2 = map(int, rect_b)
@@ -76,9 +88,11 @@ def is_intersected_horizontal(rect_a, rect_b):
     iy1 = max(ay1, by1)
     iy2 = min(ay2, by2)
+    min_height = min(ay2 - ay1, by2 - by1)
     overlap_height = max(0, iy2 - iy1)
-    if overlap_height == 0:
+    if (overlap_height / min_height) < threshold:
         return False
     return True
@@ -107,3 +121,48 @@ def quad_to_xyxy(quad):
     y2 = max([y for _, y in quad])
     return x1, y1, x2, y2
+def convert_table_array(table):
+    n_rows = table.n_row
+    n_cols = table.n_col
+    table_array = [["" for _ in range(n_cols)] for _ in range(n_rows)]
+    for cell in table.cells:
+        row = cell.row - 1
+        col = cell.col - 1
+        row_span = cell.row_span
+        col_span = cell.col_span
+        contents = cell.contents
+        for i in range(row, row + row_span):
+            for j in range(col, col + col_span):
+                table_array[i][j] = contents
+    return table_array
+def convert_table_array_to_dict(table_array, header_row=1):
+    n_cols = len(table_array[0])
+    n_rows = len(table_array)
+    header_cols = []
+    for i in range(n_cols):
+        header = []
+        for j in range(header_row):
+            header.append(table_array[j][i])
+        if len(header) > 0:
+            header_cols.append("_".join(header))
+        else:
+            header_cols.append(f"col_{i}")
+    table_dict = []
+    for i in range(header_row, n_rows):
+        row_dict = {}
+        for j in range(n_cols):
+            row_dict[header_cols[j]] = table_array[i][j]
+        table_dict.append(row_dict)
+    return table_dict

{yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: yomitoku
-Version: 0.8.1
+Version: 0.9.1
 Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
 Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
 License: CC BY-NC-SA 4.0
@@ -19,6 +19,8 @@ Requires-Dist: shapely>=2.0.6
 Requires-Dist: timm>=1.0.11
 Requires-Dist: torch>=2.5.0
 Requires-Dist: torchvision>=0.20.0
+Provides-Extra: mcp
+Requires-Dist: mcp[cli]>=1.6.0; extra == 'mcp'
 Description-Content-Type: text/markdown
 日本語版 | [English](README_EN.md)
@@ -64,6 +66,7 @@ Markdown でエクスポートした結果は関してはリポジトリ内の[s
 ## 📣 リリース情報
+- 2025 年  4 月  4 日 YomiToku v0.8.0 手書き文字認識のサポート
 - 2024 年 11 月 26 日 YomiToku v0.5.1 (beta) を公開
 ## 💡 インストールの方法

{yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,17 @@
 yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
 yomitoku/base.py,sha256=9U3sfe69O6vuO430JzzKQQNkgPsLM9WdLfOUUhp3Ljs,3878
 yomitoku/constants.py,sha256=zlW5QRc_u_F3C2RAgBFWyHJZexBnJT5N15GC-9d3iLo,686
-yomitoku/document_analyzer.py,sha256=wQMmXACDsDmyaxg2OnG9Og5Nx53WPUkQdUmgYtljACQ,16412
+yomitoku/document_analyzer.py,sha256=xliAelQdfsK64FtVuFvstDBr9uf2TwhqW31g2g91_CY,16888
 yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
 yomitoku/layout_parser.py,sha256=0MgbCsD90srQdsxkGEL0TgKm4rkmGzsQYx0sjKQ03yc,7718
 yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
-yomitoku/reading_order.py,sha256=OfhOS9ttPDoPSuHrIRKyOzG19GGeRufbuSKDqhsohh4,6404
+yomitoku/reading_order.py,sha256=_T09PqT7guk57zWo4HdSazLSQTwM91piyELA_wNHQAQ,7521
 yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK-ynm4ALxg,9625
 yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
 yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
 yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-yomitoku/cli/main.py,sha256=jQCSwHw4oOwLQjARvaIO1yoSjz-2Rdb9c3DNShLS5OE,12038
+yomitoku/cli/main.py,sha256=VZG8DZf-k_QytlDZtB91eBNY69MRpbryQg1rkn3fs20,12304
+yomitoku/cli/mcp.py,sha256=5h704SsUGNAqVnoO_5S-HY2-bApy_Rf8ajDxl1pkT2k,4888
 yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
 yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
 yomitoku/configs/cfg_layout_parser_rtdtrv2_v2.py,sha256=nMrL3uvoVmyzZ909Bz2zmfp9b6AEBLKhIprOvQ5yiQE,2324
@@ -21,13 +22,13 @@ yomitoku/configs/cfg_text_recognizer_parseq.py,sha256=hpFs3nKqh4XdU3BZMTultegtLE
 yomitoku/configs/cfg_text_recognizer_parseq_small.py,sha256=uCm_VC_G79IbZpOiK8fgYzAJ4b98H5pf328wyQomtfo,1259
 yomitoku/configs/cfg_text_recognizer_parseq_v2.py,sha256=GfHzbByOKjH21PRTxT8x_fU4r4Mda6F750Z8pjNeb8g,1249
 yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
-yomitoku/data/dataset.py,sha256=-I4f-FDtgsPnJ2MnXB7FtwihMW3koDaSI1OEoqKneIg,1014
-yomitoku/data/functions.py,sha256=HIrffs0zCJOq8IvQiI_z-b4MwTb-H2wmZjEE_5VpxFs,8040
+yomitoku/data/dataset.py,sha256=lpBcpkMuQzRIyLJ4_mqtuhR9s2ZmzgBgc-XYuE_b2Sc,1326
+yomitoku/data/functions.py,sha256=RExCUxI3-gccIMw-H0ribX2jeGKkrJWhS4fNn_12c3Y,7878
 yomitoku/export/__init__.py,sha256=gmlikMHRXfzfJ_8q4fyDlnpGms-x1oggQOwJEWHMgBU,508
-yomitoku/export/export_csv.py,sha256=4DT5Nf4FdeGP0olIzv1ypBlswkZSdMB4MeQOgYWe8uk,3375
-yomitoku/export/export_html.py,sha256=syzAapHcUHcUlabmZcQdWiNy2NrRs7LPzA_x39pFtfQ,5494
-yomitoku/export/export_json.py,sha256=6cSXSsyEVJ5Rw2nKSUOcW8_XlGmSLWlWQWCBNmRKsps,2386
-yomitoku/export/export_markdown.py,sha256=7Jib-YXOw70H46kvNc6z0_3LFwX9iwp1eXxsGeylF0I,4681
+yomitoku/export/export_csv.py,sha256=VY8mntUCPDbDco_dyvq5O0_Q4wga9_GTyjHCS-y4UiQ,3399
+yomitoku/export/export_html.py,sha256=LQDyZgbzmI0qJ0-FEK-54r9816H3L9hD10ChMcw0KyA,5620
+yomitoku/export/export_json.py,sha256=iNG37tdIuYG2x3NiiZemKaB6-X45WrhVPZhbX7RUzRI,2410
+yomitoku/export/export_markdown.py,sha256=KrdxDmKzVP_LbTKuDNGGsT31QOPKVsNNlb6wtLEW-1Q,4705
 yomitoku/models/__init__.py,sha256=Enxq9sjJWusZuxecTori8IQa8NEYKaiiptDluHX1avg,144
 yomitoku/models/dbnet_plus.py,sha256=jeWJZm0ihbxoJeAXBFK7uVIwoosx2IUNk7Ut5wRH0vA,7998
 yomitoku/models/parseq.py,sha256=psCPjP3eKjOFAUZJPQQhbD0nWEV5FeOZ0tTK27Rvvbw,8748
@@ -49,9 +50,9 @@ yomitoku/resource/charset.txt,sha256=sU91kSi-9Wk4733bCXy4j_UDmvcsj96sHOq1ppUJlOY
 yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
 yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
-yomitoku/utils/misc.py,sha256=FbwPLeIYYBvNf9wQh2RoEonTM5BF7_IwaEqmRsYHKA8,2673
+yomitoku/utils/misc.py,sha256=r92x45kQR8lC5jO1MZaHBDtcCWBkQXg_WS9H4RXJzSY,4127
 yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
-yomitoku-0.8.1.dist-info/METADATA,sha256=Ds7gHmT1DxOJROrfpGaJGQKZ1qZ8ur78c6oxdld9kG4,8555
-yomitoku-0.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-yomitoku-0.8.1.dist-info/entry_points.txt,sha256=nFV3S11zgBNW0Qq_D0XQNg2R4lNXU_9XUFr6rdJoyF8,52
-yomitoku-0.8.1.dist-info/RECORD,,
+yomitoku-0.9.1.dist-info/METADATA,sha256=ozEkYekTPuEP1GwnCCQKgJC9DzEQpyActU_DltQGMHc,8700
+yomitoku-0.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+yomitoku-0.9.1.dist-info/entry_points.txt,sha256=N3PzzSo-fdgri5liPpZ3ItMmRH6oVX14pIU_5pUJiAs,99
+yomitoku-0.9.1.dist-info/RECORD,,

{yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,2 +1,3 @@
 [console_scripts]
 yomitoku = yomitoku.cli.main:main
+yomitoku_mcp = yomitoku.cli.mcp:run_mcp_server

{yomitoku-0.8.1.dist-info → yomitoku-0.9.1.dist-info}/WHEEL RENAMED Viewed

File without changes

yomitoku 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl

yomitoku 0.8.1py3-none-any.whl → 0.9.1py3-none-any.whl