PyPI - yomitoku - Versions diffs - 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

yomitoku 0.9.3py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

yomitoku/cli/main.py +11 -1
yomitoku/document_analyzer.py +7 -40
yomitoku/export/export_csv.py +17 -0
yomitoku/layout_analyzer.py +3 -13
yomitoku/layout_parser.py +2 -15
yomitoku/ocr.py +1 -22
yomitoku/schemas.py +241 -0
yomitoku/table_structure_recognizer.py +2 -29
yomitoku/text_detector.py +2 -15
yomitoku/text_recognizer.py +17 -20
yomitoku/utils/searchable_pdf.py +0 -2
yomitoku/utils/visualizer.py +16 -5
{yomitoku-0.9.3.dist-info → yomitoku-0.10.0.dist-info}/METADATA +2 -2
{yomitoku-0.9.3.dist-info → yomitoku-0.10.0.dist-info}/RECORD +16 -15
{yomitoku-0.9.3.dist-info → yomitoku-0.10.0.dist-info}/WHEEL +0 -0
{yomitoku-0.9.3.dist-info → yomitoku-0.10.0.dist-info}/entry_points.txt +0 -0

yomitoku/cli/main.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import argparse
 import os
+import re
 import time
 from pathlib import Path
@@ -96,7 +97,7 @@ def process_single_file(args, analyzer, path, format):
     format_results = []
     for page, img in enumerate(imgs):
         result, ocr, layout = analyzer(img)
-        dirname = path.parent.name
+        dirname = _sanitize_path_component(path.parent.name)
         filename = path.stem
         # cv2.imwrite(
@@ -158,6 +159,7 @@ def process_single_file(args, analyzer, path, format):
                     args.ignore_line_break,
                     img,
                     args.figure,
+                    args.figure_letter,
                     args.figure_dir,
                 )
             else:
@@ -167,6 +169,7 @@ def process_single_file(args, analyzer, path, format):
                     encoding=args.encoding,
                     img=img,
                     export_figure=args.figure,
+                    export_figure_letter=args.figure_letter,
                     figure_dir=args.figure_dir,
                 )
@@ -469,5 +472,12 @@ def main():
         logger.info(f"Total Processing time: {end - start:.2f} sec")
+def _sanitize_path_component(component):
+    if not component:
+        return component
+    return re.sub(r"^\.+", lambda m: "_" * len(m.group(0)), component)
 if __name__ == "__main__":
     main()

yomitoku/document_analyzer.py CHANGED Viewed

@@ -1,53 +1,17 @@
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
-from typing import List, Union
 import numpy as np
-from pydantic import conlist
 from yomitoku.text_detector import TextDetector
 from yomitoku.text_recognizer import TextRecognizer
-from .base import BaseSchema
-from .export import export_csv, export_html, export_markdown
 from .layout_analyzer import LayoutAnalyzer
-from .ocr import OCRSchema, WordPrediction, ocr_aggregate
+from .ocr import OCRSchema, ocr_aggregate
 from .reading_order import prediction_reading_order
-from .table_structure_recognizer import TableStructureRecognizerSchema
 from .utils.misc import calc_overlap_ratio, is_contained, quad_to_xyxy
 from .utils.visualizer import det_visualizer, reading_order_visualizer
-class ParagraphSchema(BaseSchema):
-    box: conlist(int, min_length=4, max_length=4)
-    contents: Union[str, None]
-    direction: Union[str, None]
-    order: Union[int, None]
-    role: Union[str, None]
-class FigureSchema(BaseSchema):
-    box: conlist(int, min_length=4, max_length=4)
-    order: Union[int, None]
-    paragraphs: List[ParagraphSchema]
-    order: Union[int, None]
-    direction: Union[str, None]
-class DocumentAnalyzerSchema(BaseSchema):
-    paragraphs: List[ParagraphSchema]
-    tables: List[TableStructureRecognizerSchema]
-    words: List[WordPrediction]
-    figures: List[FigureSchema]
-    def to_html(self, out_path: str, **kwargs):
-        return export_html(self, out_path, **kwargs)
-    def to_markdown(self, out_path: str, **kwargs):
-        return export_markdown(self, out_path, **kwargs)
-    def to_csv(self, out_path: str, **kwargs):
-        return export_csv(self, out_path, **kwargs)
+from .schemas import ParagraphSchema, FigureSchema, DocumentAnalyzerSchema
 def combine_flags(flag1, flag2):
@@ -333,6 +297,7 @@ class DocumentAnalyzer:
         visualize=False,
         ignore_meta=False,
         reading_order="auto",
+        split_text_across_cells=False,
     ):
         default_configs = {
             "ocr": {
@@ -363,7 +328,7 @@ class DocumentAnalyzer:
             recursive_update(default_configs, configs)
         else:
             raise ValueError(
-                "configs must be a dict. See the https://kotaro-kinoshita.github.io/yomitoku-dev/usage/"
+                "configs must be a dict. See the https://kotaro-kinoshita.github.io/yomitoku/module/#config"
             )
         self.text_detector = TextDetector(
@@ -379,6 +344,7 @@ class DocumentAnalyzer:
         self.visualize = visualize
         self.ignore_meta = ignore_meta
+        self.split_text_across_cells = split_text_across_cells
     def aggregate(self, ocr_res, layout_res):
         paragraphs = []
@@ -504,7 +470,8 @@ class DocumentAnalyzer:
             results_det, _ = results[0]
             results_layout, layout = results[1]
-            results_det = _split_text_across_cells(results_det, results_layout)
+            if self.split_text_across_cells:
+                results_det = _split_text_across_cells(results_det, results_layout)
             vis_det = None
             if self.visualize:

yomitoku/export/export_csv.py CHANGED Viewed

@@ -63,6 +63,7 @@ def convert_csv(
     ignore_line_break,
     img=None,
     export_figure: bool = True,
+    export_figure_letter: bool = False,
     figure_dir="figures",
 ):
     elements = []
@@ -89,6 +90,20 @@ def convert_csv(
             }
         )
+    if export_figure_letter:
+        for figure in inputs.figures:
+            paragraphs = sorted(figure.paragraphs, key=lambda x: x.order)
+            for paragraph in paragraphs:
+                contents = paragraph_to_csv(paragraph, ignore_line_break)
+                elements.append(
+                    {
+                        "type": "paragraph",
+                        "box": paragraph.box,
+                        "element": contents,
+                        "order": figure.order,
+                    }
+                )
     elements = sorted(elements, key=lambda x: x["order"])
     if export_figure:
@@ -109,6 +124,7 @@ def export_csv(
     encoding: str = "utf-8",
     img=None,
     export_figure: bool = True,
+    export_figure_letter: bool = False,
     figure_dir="figures",
 ):
     elements = convert_csv(
@@ -117,6 +133,7 @@ def export_csv(
         ignore_line_break,
         img,
         export_figure,
+        export_figure_letter,
         figure_dir,
     )

yomitoku/layout_analyzer.py CHANGED Viewed

@@ -1,17 +1,7 @@
-from typing import List
+from .layout_parser import LayoutParser
+from .table_structure_recognizer import TableStructureRecognizer
-from .base import BaseSchema
-from .layout_parser import Element, LayoutParser
-from .table_structure_recognizer import (
-    TableStructureRecognizer,
-    TableStructureRecognizerSchema,
-)
-class LayoutAnalyzerSchema(BaseSchema):
-    paragraphs: List[Element]
-    tables: List[TableStructureRecognizerSchema]
-    figures: List[Element]
+from .schemas import LayoutAnalyzerSchema
 class LayoutAnalyzer:

yomitoku/layout_parser.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import List, Union
 import cv2
 import os
 import onnx
@@ -7,28 +5,17 @@ import onnxruntime
 import torch
 import torchvision.transforms as T
 from PIL import Image
-from pydantic import conlist
 from .constants import ROOT_DIR
-from .base import BaseModelCatalog, BaseModule, BaseSchema
+from .base import BaseModelCatalog, BaseModule
 from .configs import LayoutParserRTDETRv2Config, LayoutParserRTDETRv2V2Config
 from .models import RTDETRv2
 from .postprocessor import RTDETRPostProcessor
 from .utils.misc import filter_by_flag, is_contained
 from .utils.visualizer import layout_visualizer
-class Element(BaseSchema):
-    box: conlist(int, min_length=4, max_length=4)
-    score: float
-    role: Union[str, None]
-class LayoutParserSchema(BaseSchema):
-    paragraphs: List[Element]
-    tables: List[Element]
-    figures: List[Element]
+from .schemas import LayoutParserSchema
 class LayoutParserModelCatalog(BaseModelCatalog):

yomitoku/ocr.py CHANGED Viewed

@@ -1,27 +1,6 @@
-from typing import List
-from pydantic import conlist
 from yomitoku.text_detector import TextDetector
 from yomitoku.text_recognizer import TextRecognizer
-from .base import BaseSchema
-class WordPrediction(BaseSchema):
-    points: conlist(
-        conlist(int, min_length=2, max_length=2),
-        min_length=4,
-        max_length=4,
-    )
-    content: str
-    direction: str
-    rec_score: float
-    det_score: float
-class OCRSchema(BaseSchema):
-    words: List[WordPrediction]
+from .schemas import OCRSchema
 def ocr_aggregate(det_outputs, rec_outputs):

yomitoku/schemas.py ADDED Viewed

@@ -0,0 +1,241 @@
+from typing import List, Union
+from pydantic import conlist, Field
+from .base import BaseSchema
+from .export import export_csv, export_html, export_markdown, export_json
+class Element(BaseSchema):
+    box: conlist(int, min_length=4, max_length=4) = Field(
+        ...,
+        description="Bounding box of the layout element in the format [x1, y1, x2, y2]",
+    )
+    score: float = Field(
+        ...,
+        description="Confidence score of the layout element detection",
+    )
+    role: Union[str, None] = Field(
+        ...,
+        description="Role of the element, e.g., ['section_headings', 'page_header', 'page_footer', 'list_item', 'caption', 'inline_formula', 'display_formula', 'index']",
+    )
+class ParagraphSchema(BaseSchema):
+    box: conlist(int, min_length=4, max_length=4) = Field(
+        ...,
+        description="Bounding box of the paragraph in the format [x1, y1, x2, y2]",
+    )
+    contents: Union[str, None] = Field(
+        ...,
+        description="Text content of the paragraph",
+    )
+    direction: Union[str, None] = Field(
+        ...,
+        description="Text direction, e.g., ['horizontal' or 'vertical']",
+    )
+    order: Union[int, None] = Field(
+        ...,
+        description="Order of the paragraph in the document",
+    )
+    role: Union[str, None] = Field(
+        ...,
+        description="Role of the paragraph, e.g., ['section_headings', 'page_header', 'page_footer'])",
+    )
+class TableCellSchema(BaseSchema):
+    col: int = Field(
+        ...,
+        description="Column index of the cell",
+    )
+    row: int = Field(
+        ...,
+        description="Row index of the cell",
+    )
+    col_span: int = Field(
+        ...,
+        description="Number of columns spanned by the cell",
+    )
+    row_span: int = Field(
+        ...,
+        description="Number of rows spanned by the cell",
+    )
+    box: conlist(int, min_length=4, max_length=4) = Field(
+        ...,
+        description="Bounding box of the cell in the format [x1, y1, x2, y2]",
+    )
+    contents: Union[str, None] = Field(
+        ...,
+        description="Text content of the cell",
+    )
+class TableLineSchema(BaseSchema):
+    box: conlist(int, min_length=4, max_length=4) = Field(
+        ...,
+        description="Bounding box of the table line in the format [x1, y1, x2, y2]",
+    )
+    score: float = Field(
+        ...,
+        description="Confidence score of the table line detection",
+    )
+class TableStructureRecognizerSchema(BaseSchema):
+    box: conlist(int, min_length=4, max_length=4) = Field(
+        ...,
+        description="Bounding box of the table in the format [x1, y1, x2, y2]",
+    )
+    n_row: int = Field(..., description="Number of rows in the table")
+    n_col: int = Field(..., description="Number of columns in the table")
+    rows: List[TableLineSchema] = Field(
+        ...,
+        description="List of table lines representing rows",
+    )
+    cols: List[TableLineSchema] = Field(
+        ...,
+        description="List of table lines representing columns",
+    )
+    spans: List[TableLineSchema] = Field(
+        ...,
+        description="List of table lines representing spans",
+    )
+    cells: List[TableCellSchema] = Field(
+        ...,
+        description="List of table cells",
+    )
+    order: int = Field(
+        ...,
+        description="Order of the table in the document",
+    )
+class LayoutAnalyzerSchema(BaseSchema):
+    paragraphs: List[Element] = Field(
+        ...,
+        description="List of detected paragraphs",
+    )
+    tables: List[TableStructureRecognizerSchema] = Field(
+        ...,
+        description="List of detected tables",
+    )
+    figures: List[Element] = Field(
+        ...,
+        description="List of detected figures",
+    )
+class WordPrediction(BaseSchema):
+    points: conlist(
+        conlist(int, min_length=2, max_length=2),
+        min_length=4,
+        max_length=4,
+    ) = Field(
+        ...,
+        description="Bounding box of the word in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]",
+    )
+    content: str = Field(..., description="Text content of the word")
+    direction: str = Field(
+        ..., description="Text direction, e.g., 'horizontal' or 'vertical'"
+    )
+    rec_score: float = Field(
+        ..., description="Confidence score of the word recognition"
+    )
+    det_score: float = Field(
+        ...,
+        description="Confidence score of the word detection",
+    )
+class TextDetectorSchema(BaseSchema):
+    points: List[
+        conlist(
+            conlist(int, min_length=2, max_length=2),
+            min_length=4,
+            max_length=4,
+        )
+    ] = Field(
+        ...,
+        description="List of bounding boxes of detected text regions in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]",
+    )
+    scores: List[float] = Field(
+        ...,
+        description="List of confidence scores for each detected text region",
+    )
+class OCRSchema(BaseSchema):
+    words: List[WordPrediction] = Field(
+        ...,
+        description="List of recognized words with their bounding boxes, content, direction, and scores",
+    )
+class LayoutParserSchema(BaseSchema):
+    paragraphs: List[Element] = Field(..., description="List of detected paragraphs")
+    tables: List[Element] = Field(..., description="List of detected tables")
+    figures: List[Element] = Field(..., description="List of detected figures")
+class FigureSchema(BaseSchema):
+    box: conlist(int, min_length=4, max_length=4) = Field(
+        ..., description="Bounding box of the figure in the format [x1, y1, x2, y2]"
+    )
+    order: Union[int, None] = Field(
+        ..., description="Order of the figure in the document"
+    )
+    paragraphs: List[ParagraphSchema] = Field(
+        ..., description="List of paragraphs associated with the figure"
+    )
+    order: Union[int, None] = Field(
+        ..., description="Order of the figure in the document"
+    )
+    direction: Union[str, None] = Field(
+        ..., description="Text direction, e.g., ['horizontal' or 'vertical']"
+    )
+class DocumentAnalyzerSchema(BaseSchema):
+    paragraphs: List[ParagraphSchema] = Field(
+        ..., description="List of detected paragraphs"
+    )
+    tables: List[TableStructureRecognizerSchema] = Field(
+        ..., description="List of detected tables"
+    )
+    words: List[WordPrediction] = Field(..., description="List of recognized words")
+    figures: List[FigureSchema] = Field(..., description="List of detected figures")
+    def to_html(self, out_path: str, **kwargs):
+        return export_html(self, out_path, **kwargs)
+    def to_markdown(self, out_path: str, **kwargs):
+        return export_markdown(self, out_path, **kwargs)
+    def to_csv(self, out_path: str, **kwargs):
+        return export_csv(self, out_path, **kwargs)
+    def to_json(self, out_path: str, **kwargs):
+        return export_json(self, out_path, **kwargs)
+class TextRecognizerSchema(BaseSchema):
+    contents: List[str] = Field(
+        ...,
+        description="List of recognized text contents",
+    )
+    directions: List[str] = Field(
+        ..., description="List of text directions, e.g., ['horizontal' or 'vertical']"
+    )
+    scores: List[float] = Field(
+        ..., description="List of confidence scores for each recognized text"
+    )
+    points: List[
+        conlist(
+            conlist(int, min_length=2, max_length=2),
+            min_length=4,
+            max_length=4,
+        )
+    ] = Field(
+        ...,
+        description="List of bounding boxes of recognized text in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]",
+    )

yomitoku/table_structure_recognizer.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import List, Union
 import cv2
 import os
 import onnx
@@ -7,17 +5,17 @@ import onnxruntime
 import torch
 import torchvision.transforms as T
 from PIL import Image
-from pydantic import conlist
 from .constants import ROOT_DIR
-from .base import BaseModelCatalog, BaseModule, BaseSchema
+from .base import BaseModelCatalog, BaseModule
 from .configs import TableStructureRecognizerRTDETRv2Config
 from .layout_parser import filter_contained_rectangles_within_category
 from .models import RTDETRv2
 from .postprocessor import RTDETRPostProcessor
 from .utils.misc import calc_intersection, filter_by_flag, is_contained
 from .utils.visualizer import table_visualizer
+from .schemas import TableStructureRecognizerSchema
 class TableStructureRecognizerModelCatalog(BaseModelCatalog):
@@ -26,31 +24,6 @@ class TableStructureRecognizerModelCatalog(BaseModelCatalog):
         self.register("rtdetrv2", TableStructureRecognizerRTDETRv2Config, RTDETRv2)
-class TableCellSchema(BaseSchema):
-    col: int
-    row: int
-    col_span: int
-    row_span: int
-    box: conlist(int, min_length=4, max_length=4)
-    contents: Union[str, None]
-class TableLineSchema(BaseSchema):
-    box: conlist(int, min_length=4, max_length=4)
-    score: float
-class TableStructureRecognizerSchema(BaseSchema):
-    box: conlist(int, min_length=4, max_length=4)
-    n_row: int
-    n_col: int
-    rows: List[TableLineSchema]
-    cols: List[TableLineSchema]
-    cells: List[TableCellSchema]
-    spans: List[TableLineSchema]
-    order: int
 def extract_cells(row_boxes, col_boxes):
     cells = []
     for i, row_box in enumerate(row_boxes):

yomitoku/text_detector.py CHANGED Viewed

@@ -1,11 +1,8 @@
-from typing import List
 import numpy as np
 import torch
 import os
-from pydantic import conlist
-from .base import BaseModelCatalog, BaseModule, BaseSchema
+from .base import BaseModelCatalog, BaseModule
 from .configs import (
     TextDetectorDBNetConfig,
     TextDetectorDBNetV2Config,
@@ -19,6 +16,7 @@ from .models import DBNet
 from .postprocessor import DBnetPostProcessor
 from .utils.visualizer import det_visualizer
 from .constants import ROOT_DIR
+from .schemas import TextDetectorSchema
 import onnx
 import onnxruntime
@@ -31,17 +29,6 @@ class TextDetectorModelCatalog(BaseModelCatalog):
         self.register("dbnetv2", TextDetectorDBNetV2Config, DBNet)
-class TextDetectorSchema(BaseSchema):
-    points: List[
-        conlist(
-            conlist(int, min_length=2, max_length=2),
-            min_length=4,
-            max_length=4,
-        )
-    ]
-    scores: List[float]
 class TextDetector(BaseModule):
     model_catalog = TextDetectorModelCatalog()

yomitoku/text_recognizer.py CHANGED Viewed

@@ -1,12 +1,9 @@
-from typing import List
 import numpy as np
 import torch
 import os
 import unicodedata
-from pydantic import conlist
-from .base import BaseModelCatalog, BaseModule, BaseSchema
+from .base import BaseModelCatalog, BaseModule
 from .configs import (
     TextRecognizerPARSeqConfig,
     TextRecognizerPARSeqSmallConfig,
@@ -19,6 +16,8 @@ from .utils.misc import load_charset
 from .utils.visualizer import rec_visualizer
 from .constants import ROOT_DIR
+from .schemas import TextRecognizerSchema
 import onnx
 import onnxruntime
@@ -31,19 +30,6 @@ class TextRecognizerModelCatalog(BaseModelCatalog):
         self.register("parseq-small", TextRecognizerPARSeqSmallConfig, PARSeq)
-class TextRecognizerSchema(BaseSchema):
-    contents: List[str]
-    directions: List[str]
-    scores: List[float]
-    points: List[
-        conlist(
-            conlist(int, min_length=2, max_length=2),
-            min_length=4,
-            max_length=4,
-        )
-    ]
 class TextRecognizer(BaseModule):
     model_catalog = TextRecognizerModelCatalog()
@@ -94,10 +80,21 @@ class TextRecognizer(BaseModule):
             self.model.to(self.device)
     def preprocess(self, img, polygons):
+        if polygons is None:
+            h, w = img.shape[:2]
+            polygons = [
+                [
+                    [0, 0],
+                    [w, 0],
+                    [w, h],
+                    [0, h],
+                ]
+            ]
         dataset = ParseqDataset(self._cfg, img, polygons)
         dataloader = self._make_mini_batch(dataset)
-        return dataloader
+        return dataloader, polygons
     def _make_mini_batch(self, dataset):
         mini_batches = []
@@ -150,7 +147,7 @@ class TextRecognizer(BaseModule):
         return pred, score, directions
-    def __call__(self, img, points, vis=None):
+    def __call__(self, img, points=None, vis=None):
         """
         Apply the recognition model to the input image.
@@ -160,7 +157,7 @@ class TextRecognizer(BaseModule):
             vis (np.ndarray, optional): rendering image. Defaults to None.
         """
-        dataloader = self.preprocess(img, points)
+        dataloader, points = self.preprocess(img, points)
         preds = []
         scores = []
         directions = []

yomitoku/utils/searchable_pdf.py CHANGED Viewed

@@ -72,8 +72,6 @@ def create_searchable_pdf(images, ocr_results, output_path, font_path=None):
     for i, (image, ocr_result) in enumerate(zip(images, ocr_results)):
         image = Image.fromarray(image[:, :, ::-1])  # Convert BGR to RGB
-        pdfmetrics.registerFont(TTFont("MPLUS1p-Medium", FONT_PATH))
         image_path = f"tmp_{i}.png"
         image.save(image_path)
         w, h = image.size

yomitoku/utils/visualizer.py CHANGED Viewed

@@ -10,17 +10,28 @@ logger = set_logger(__name__, "INFO")
 def _reading_order_visualizer(img, elements, line_color, tip_size):
     out = img.copy()
     for i, element in enumerate(elements):
-        if i == 0:
-            continue
-        prev_element = elements[i - 1]
         cur_x1, cur_y1, cur_x2, cur_y2 = element.box
-        prev_x1, prev_y1, prev_x2, prev_y2 = prev_element.box
         cur_center = (
             cur_x1 + (cur_x2 - cur_x1) / 2,
             cur_y1 + (cur_y2 - cur_y1) / 2,
         )
+        cv2.putText(
+            out,
+            str(i),
+            (int(cur_center[0]), int(cur_center[1])),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 200, 0),
+            2,
+        )
+        if i == 0:
+            continue
+        prev_element = elements[i - 1]
+        prev_x1, prev_y1, prev_x2, prev_y2 = prev_element.box
         prev_center = (
             prev_x1 + (prev_x2 - prev_x1) / 2,
             prev_y1 + (prev_y2 - prev_y1) / 2,

{yomitoku-0.9.3.dist-info → yomitoku-0.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: yomitoku
-Version: 0.9.3
+Version: 0.10.0
 Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
 Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
 License: CC BY-NC-SA 4.0
@@ -41,7 +41,7 @@ Description-Content-Type: text/markdown
 YomiToku は日本語に特化した AI 文章画像解析エンジン(Document AI)です。画像内の文字の全文 OCR およびレイアウト解析機能を有しており、画像内の文字情報や図表を認識、抽出、変換します。
 - 🤖 日本語データセットで学習した 4 種類(文字位置の検知、文字列認識、レイアウト解析、表の構造認識)の AI モデルを搭載しています。4 種類のモデルはすべて独自に学習されたモデルで日本語文書に対して、高精度に推論可能です。
-- 🇯🇵 各モデルは日本語の文書画像に特化して学習されており、7000 文字を超える日本語文字の認識をサーポート、手書き文字、縦書きなど日本語特有のレイアウト構造の文書画像の解析も可能です。（日本語以外にも英語の文書に対しても対応しています）。
+- 🇯🇵 各モデルは日本語の文書画像に特化して学習されており、7000 文字を超える日本語文字の認識をサポート、手書き文字、縦書きなど日本語特有のレイアウト構造の文書画像の解析も可能です。（日本語以外にも英語の文書に対しても対応しています）。
 - 📈 レイアウト解析、表の構造解析, 読み順推定機能により、文書画像のレイアウトの意味的構造を壊さずに情報を抽出することが可能です。
 - 📄 多様な出力形式をサポートしています。html やマークダウン、json、csv のいずれかのフォーマットに変換可能です。また、文書内に含まれる図表、画像の抽出の出力も可能です。文書画像をサーチャブルPDFに変換する処理もサポートしています。
 - ⚡ GPU 環境で高速に動作し、効率的に文書の文字起こし解析が可能です。また、VRAM も 8GB 以内で動作し、ハイエンドな GPU を用意する必要はありません。

{yomitoku-0.9.3.dist-info → yomitoku-0.10.0.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,17 @@
 yomitoku/__init__.py,sha256=kXOM8RbpwwLABG3p3vPT3dJWBk4JX2MFGrOeBEW0hKM,543
 yomitoku/base.py,sha256=9U3sfe69O6vuO430JzzKQQNkgPsLM9WdLfOUUhp3Ljs,3878
 yomitoku/constants.py,sha256=2jya14UflDkMdYWMKc-ZllkWbJW2qh59Cnt2brrgNb4,693
-yomitoku/document_analyzer.py,sha256=xliAelQdfsK64FtVuFvstDBr9uf2TwhqW31g2g91_CY,16888
-yomitoku/layout_analyzer.py,sha256=VhNf1ZQFoozj6WUGk5ll1p2p1jk5X3j-JPcDbTAoSl4,1856
-yomitoku/layout_parser.py,sha256=0MgbCsD90srQdsxkGEL0TgKm4rkmGzsQYx0sjKQ03yc,7718
-yomitoku/ocr.py,sha256=JSTjkupcxHITQm6ERnzU7As0c3KWf8-oxc0AqNoWHXo,2272
+yomitoku/document_analyzer.py,sha256=FyF85m7k-BxzpOKb3sIfBRpxh_4NDPC7EC3x91hxoGo,15959
+yomitoku/layout_analyzer.py,sha256=soLDcX09NlNicRYenOhFLgq8L8ct9xo7N9Hsj1IWKZw,1643
+yomitoku/layout_parser.py,sha256=BSWiL8Xl7c0CY2CXNteLye5e-bLdR1hXKtps94kon9w,7440
+yomitoku/ocr.py,sha256=gKWNciOQIgUcYrNmKhksSK8TSNisK8wY2zG2ZPXh2Fk,1920
 yomitoku/reading_order.py,sha256=_T09PqT7guk57zWo4HdSazLSQTwM91piyELA_wNHQAQ,7521
-yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK-ynm4ALxg,9625
-yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
-yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
+yomitoku/schemas.py,sha256=azI9iVQ88-JPSuRmDVxCcdr2KNICJmLuMl0AQmfof-0,7582
+yomitoku/table_structure_recognizer.py,sha256=UjYdzY-9dIClWP9iz0HCLr1DU2UY7n7Rtr7L9vOJwDU,9043
+yomitoku/text_detector.py,sha256=gXofo7ywFsI3hNMKKfYoOwlYVDerJym2Zg_Eq7NNGv4,4136
+yomitoku/text_recognizer.py,sha256=hS_spLnINVkMFOWm1bBG3WVfI7rK4o7ONt_nTUnpMLM,5969
 yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-yomitoku/cli/main.py,sha256=5An9usBfBYqNiBA6QqZTCaYI4b3W1j-efAsggK_HCss,13522
+yomitoku/cli/main.py,sha256=s7wxBtgxPu7P-ARtjVmQlOosus3srlfS4-RuV0BFpyM,13821
 yomitoku/cli/mcp_server.py,sha256=WnWzxd13HaemC3b-5i9B9NVBGc3WGfum2nYhoBolEnk,5641
 yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
 yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
@@ -25,7 +26,7 @@ yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
 yomitoku/data/dataset.py,sha256=lpBcpkMuQzRIyLJ4_mqtuhR9s2ZmzgBgc-XYuE_b2Sc,1326
 yomitoku/data/functions.py,sha256=RExCUxI3-gccIMw-H0ribX2jeGKkrJWhS4fNn_12c3Y,7878
 yomitoku/export/__init__.py,sha256=gmlikMHRXfzfJ_8q4fyDlnpGms-x1oggQOwJEWHMgBU,508
-yomitoku/export/export_csv.py,sha256=VY8mntUCPDbDco_dyvq5O0_Q4wga9_GTyjHCS-y4UiQ,3399
+yomitoku/export/export_csv.py,sha256=4U4KQ2RcBQmyUZ9O7a4uLoB6RUw80HPL1EEJUDwQlcI,4044
 yomitoku/export/export_html.py,sha256=LQDyZgbzmI0qJ0-FEK-54r9816H3L9hD10ChMcw0KyA,5620
 yomitoku/export/export_json.py,sha256=iNG37tdIuYG2x3NiiZemKaB6-X45WrhVPZhbX7RUzRI,2410
 yomitoku/export/export_markdown.py,sha256=KrdxDmKzVP_LbTKuDNGGsT31QOPKVsNNlb6wtLEW-1Q,4705
@@ -51,9 +52,9 @@ yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
 yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
 yomitoku/utils/misc.py,sha256=r92x45kQR8lC5jO1MZaHBDtcCWBkQXg_WS9H4RXJzSY,4127
-yomitoku/utils/searchable_pdf.py,sha256=7JQCFhwpBJVV1Fx9q4p6fFGlEsJ-SmR0arddI3NzEeo,3567
-yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
-yomitoku-0.9.3.dist-info/METADATA,sha256=0r3tOl0ohoegcYQXWM3ROCSOr5px3IK-0zwqyADc9Mc,8872
-yomitoku-0.9.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-yomitoku-0.9.3.dist-info/entry_points.txt,sha256=n3c8bQSj5Be5GHAOv_NZ8cldJFmWeigQxSmteFTmu_k,96
-yomitoku-0.9.3.dist-info/RECORD,,
+yomitoku/utils/searchable_pdf.py,sha256=taZ-XtXN4RItePMDv4q0fRVlryusdkexA3TCXzwlXRo,3497
+yomitoku/utils/visualizer.py,sha256=ycC7SGuyXGGnX9KMJecdcEe1PWq30fG-EghB0E0EmWY,5468
+yomitoku-0.10.0.dist-info/METADATA,sha256=Xd2cOvxpBFl-jSyGK61MLEwwC7CDEIEUIUAVk0L58tI,8870
+yomitoku-0.10.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+yomitoku-0.10.0.dist-info/entry_points.txt,sha256=n3c8bQSj5Be5GHAOv_NZ8cldJFmWeigQxSmteFTmu_k,96
+yomitoku-0.10.0.dist-info/RECORD,,

{yomitoku-0.9.3.dist-info → yomitoku-0.10.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{yomitoku-0.9.3.dist-info → yomitoku-0.10.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

yomitoku 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl

yomitoku 0.9.3py3-none-any.whl → 0.10.0py3-none-any.whl