yomitoku 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yomitoku/cli/main.py CHANGED
@@ -158,6 +158,7 @@ def process_single_file(args, analyzer, path, format):
158
158
  args.ignore_line_break,
159
159
  img,
160
160
  args.figure,
161
+ args.figure_letter,
161
162
  args.figure_dir,
162
163
  )
163
164
  else:
@@ -167,6 +168,7 @@ def process_single_file(args, analyzer, path, format):
167
168
  encoding=args.encoding,
168
169
  img=img,
169
170
  export_figure=args.figure,
171
+ export_figure_letter=args.figure_letter,
170
172
  figure_dir=args.figure_dir,
171
173
  )
172
174
 
@@ -63,6 +63,7 @@ def convert_csv(
63
63
  ignore_line_break,
64
64
  img=None,
65
65
  export_figure: bool = True,
66
+ export_figure_letter: bool = False,
66
67
  figure_dir="figures",
67
68
  ):
68
69
  elements = []
@@ -89,6 +90,20 @@ def convert_csv(
89
90
  }
90
91
  )
91
92
 
93
+ if export_figure_letter:
94
+ for figure in inputs.figures:
95
+ paragraphs = sorted(figure.paragraphs, key=lambda x: x.order)
96
+ for paragraph in paragraphs:
97
+ contents = paragraph_to_csv(paragraph, ignore_line_break)
98
+ elements.append(
99
+ {
100
+ "type": "paragraph",
101
+ "box": paragraph.box,
102
+ "element": contents,
103
+ "order": figure.order,
104
+ }
105
+ )
106
+
92
107
  elements = sorted(elements, key=lambda x: x["order"])
93
108
 
94
109
  if export_figure:
@@ -109,6 +124,7 @@ def export_csv(
109
124
  encoding: str = "utf-8",
110
125
  img=None,
111
126
  export_figure: bool = True,
127
+ export_figure_letter: bool = False,
112
128
  figure_dir="figures",
113
129
  ):
114
130
  elements = convert_csv(
@@ -117,6 +133,7 @@ def export_csv(
117
133
  ignore_line_break,
118
134
  img,
119
135
  export_figure,
136
+ export_figure_letter,
120
137
  figure_dir,
121
138
  )
122
139
 
@@ -72,8 +72,6 @@ def create_searchable_pdf(images, ocr_results, output_path, font_path=None):
72
72
 
73
73
  for i, (image, ocr_result) in enumerate(zip(images, ocr_results)):
74
74
  image = Image.fromarray(image[:, :, ::-1]) # Convert BGR to RGB
75
- pdfmetrics.registerFont(TTFont("MPLUS1p-Medium", FONT_PATH))
76
-
77
75
  image_path = f"tmp_{i}.png"
78
76
  image.save(image_path)
79
77
  w, h = image.size
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yomitoku
3
- Version: 0.9.3
3
+ Version: 0.9.4
4
4
  Summary: Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.
5
5
  Author-email: Kotaro Kinoshita <kotaro.kinoshita@mlism.com>
6
6
  License: CC BY-NC-SA 4.0
@@ -10,7 +10,7 @@ yomitoku/table_structure_recognizer.py,sha256=tHjex6deT_FjRK5ePz9bUXA_QIhgv_vYtK
10
10
  yomitoku/text_detector.py,sha256=6IwEJJKp_F8YH0Oki0QV-Mqi--P2LGbNKo-_kxBB_eo,4383
11
11
  yomitoku/text_recognizer.py,sha256=eaxozNu-Ms6iv8efbKZzn8pJNW1Wo4f86bGhzSMtv3s,5992
12
12
  yomitoku/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yomitoku/cli/main.py,sha256=5An9usBfBYqNiBA6QqZTCaYI4b3W1j-efAsggK_HCss,13522
13
+ yomitoku/cli/main.py,sha256=v1UYsnQdnylhLvDURuxLODU3IU-ssVGqOJT9r-TCVns,13623
14
14
  yomitoku/cli/mcp_server.py,sha256=WnWzxd13HaemC3b-5i9B9NVBGc3WGfum2nYhoBolEnk,5641
15
15
  yomitoku/configs/__init__.py,sha256=x5-ccjGiP6xxRtDPT7f1Enl7SsE0hSk0G8f7eF9V85I,886
16
16
  yomitoku/configs/cfg_layout_parser_rtdtrv2.py,sha256=8PRxB2Ar9UF7-DLtbgSokhrzdXb0veWI6Wc-X8qigRw,2329
@@ -25,7 +25,7 @@ yomitoku/data/__init__.py,sha256=KAofFc9rk9ZdTKBjemu9RM8Vj9XnKbWC2MPZ2RWtOdE,82
25
25
  yomitoku/data/dataset.py,sha256=lpBcpkMuQzRIyLJ4_mqtuhR9s2ZmzgBgc-XYuE_b2Sc,1326
26
26
  yomitoku/data/functions.py,sha256=RExCUxI3-gccIMw-H0ribX2jeGKkrJWhS4fNn_12c3Y,7878
27
27
  yomitoku/export/__init__.py,sha256=gmlikMHRXfzfJ_8q4fyDlnpGms-x1oggQOwJEWHMgBU,508
28
- yomitoku/export/export_csv.py,sha256=VY8mntUCPDbDco_dyvq5O0_Q4wga9_GTyjHCS-y4UiQ,3399
28
+ yomitoku/export/export_csv.py,sha256=4U4KQ2RcBQmyUZ9O7a4uLoB6RUw80HPL1EEJUDwQlcI,4044
29
29
  yomitoku/export/export_html.py,sha256=LQDyZgbzmI0qJ0-FEK-54r9816H3L9hD10ChMcw0KyA,5620
30
30
  yomitoku/export/export_json.py,sha256=iNG37tdIuYG2x3NiiZemKaB6-X45WrhVPZhbX7RUzRI,2410
31
31
  yomitoku/export/export_markdown.py,sha256=KrdxDmKzVP_LbTKuDNGGsT31QOPKVsNNlb6wtLEW-1Q,4705
@@ -51,9 +51,9 @@ yomitoku/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  yomitoku/utils/graph.py,sha256=LKNB8ZhSQwOZMfeAimPMF5UCVVr2ZaUWoGDkz8z-uGU,456
52
52
  yomitoku/utils/logger.py,sha256=uOmtQDr0A0JD7wyFshedL08BiNrQorHnpktRXba8bjU,424
53
53
  yomitoku/utils/misc.py,sha256=r92x45kQR8lC5jO1MZaHBDtcCWBkQXg_WS9H4RXJzSY,4127
54
- yomitoku/utils/searchable_pdf.py,sha256=7JQCFhwpBJVV1Fx9q4p6fFGlEsJ-SmR0arddI3NzEeo,3567
54
+ yomitoku/utils/searchable_pdf.py,sha256=taZ-XtXN4RItePMDv4q0fRVlryusdkexA3TCXzwlXRo,3497
55
55
  yomitoku/utils/visualizer.py,sha256=DjDwHiAu1iFRKh96H3Egq4vuI2s_-9dLCDeykhKi8jo,5251
56
- yomitoku-0.9.3.dist-info/METADATA,sha256=0r3tOl0ohoegcYQXWM3ROCSOr5px3IK-0zwqyADc9Mc,8872
57
- yomitoku-0.9.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
- yomitoku-0.9.3.dist-info/entry_points.txt,sha256=n3c8bQSj5Be5GHAOv_NZ8cldJFmWeigQxSmteFTmu_k,96
59
- yomitoku-0.9.3.dist-info/RECORD,,
56
+ yomitoku-0.9.4.dist-info/METADATA,sha256=oDIp-lxMIQjIfVtrzQXBcY2PJFHlRwktVGFXndQRJZo,8872
57
+ yomitoku-0.9.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
+ yomitoku-0.9.4.dist-info/entry_points.txt,sha256=n3c8bQSj5Be5GHAOv_NZ8cldJFmWeigQxSmteFTmu_k,96
59
+ yomitoku-0.9.4.dist-info/RECORD,,