vlmparse 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vlmparse/benchpdf2md/bench_tests/benchmark_tsts.py +1763 -0
- vlmparse/benchpdf2md/bench_tests/utils.py +0 -0
- vlmparse/benchpdf2md/create_dataset.py +60 -0
- vlmparse/benchpdf2md/olmocrbench/katex/__init__.py +1 -0
- vlmparse/benchpdf2md/olmocrbench/katex/render.py +592 -0
- vlmparse/benchpdf2md/olmocrbench/repeatdetect.py +175 -0
- vlmparse/benchpdf2md/olmocrbench/run_olmocr_bench.py +256 -0
- vlmparse/benchpdf2md/olmocrbench/tests.py +1334 -0
- vlmparse/benchpdf2md/run_benchmark.py +296 -0
- vlmparse/benchpdf2md/st_visu_benchmark/app.py +271 -0
- vlmparse/benchpdf2md/st_visu_benchmark/highligh_text.py +117 -0
- vlmparse/benchpdf2md/st_visu_benchmark/test_form.py +95 -0
- vlmparse/benchpdf2md/st_visu_benchmark/ui_elements.py +20 -0
- vlmparse/benchpdf2md/st_visu_benchmark/utils.py +50 -0
- vlmparse/benchpdf2md/utils.py +56 -0
- vlmparse/clients/chandra.py +323 -0
- vlmparse/clients/deepseekocr.py +52 -0
- vlmparse/clients/docling.py +146 -0
- vlmparse/clients/dotsocr.py +277 -0
- vlmparse/clients/granite_docling.py +132 -0
- vlmparse/clients/hunyuanocr.py +45 -0
- vlmparse/clients/lightonocr.py +43 -0
- vlmparse/clients/mineru.py +119 -0
- vlmparse/clients/nanonetocr.py +29 -0
- vlmparse/clients/olmocr.py +46 -0
- vlmparse/clients/openai_converter.py +173 -0
- vlmparse/clients/paddleocrvl.py +48 -0
- vlmparse/clients/pipe_utils/cleaner.py +74 -0
- vlmparse/clients/pipe_utils/html_to_md_conversion.py +136 -0
- vlmparse/clients/pipe_utils/utils.py +12 -0
- vlmparse/clients/prompts.py +66 -0
- vlmparse/data_model/box.py +551 -0
- vlmparse/data_model/document.py +148 -0
- vlmparse/servers/docker_server.py +199 -0
- vlmparse/servers/utils.py +250 -0
- vlmparse/st_viewer/fs_nav.py +53 -0
- vlmparse/st_viewer/st_viewer.py +80 -0
- {vlmparse-0.1.0.dist-info → vlmparse-0.1.2.dist-info}/METADATA +11 -1
- vlmparse-0.1.2.dist-info/RECORD +50 -0
- vlmparse-0.1.0.dist-info/RECORD +0 -13
- {vlmparse-0.1.0.dist-info → vlmparse-0.1.2.dist-info}/WHEEL +0 -0
- {vlmparse-0.1.0.dist-info → vlmparse-0.1.2.dist-info}/entry_points.txt +0 -0
- {vlmparse-0.1.0.dist-info → vlmparse-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {vlmparse-0.1.0.dist-info → vlmparse-0.1.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import streamlit as st
|
|
6
|
+
from streamlit import runtime
|
|
7
|
+
|
|
8
|
+
from vlmparse.data_model.document import Document
|
|
9
|
+
from vlmparse.st_viewer.fs_nav import file_selector
|
|
10
|
+
|
|
11
|
+
st.set_page_config(layout="wide")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@st.cache_resource
|
|
15
|
+
def get_doc(file_path):
|
|
16
|
+
return Document.from_zip(file_path)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def render_sidebar_controls(doc, file_path):
|
|
20
|
+
"""Render sidebar controls and return settings."""
|
|
21
|
+
return {
|
|
22
|
+
"page_no": st.number_input("Page", 0, len(doc.pages) - 1, 0),
|
|
23
|
+
"plot_layouts": st.checkbox("Plot layouts", value=True),
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def run_streamlit(folder: str) -> None:
|
|
28
|
+
with st.sidebar:
|
|
29
|
+
file_path = file_selector(folder)
|
|
30
|
+
|
|
31
|
+
if not file_path:
|
|
32
|
+
st.info("Please select a file from the sidebar.")
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
doc = get_doc(file_path)
|
|
36
|
+
|
|
37
|
+
with st.sidebar:
|
|
38
|
+
settings = render_sidebar_controls(doc, file_path)
|
|
39
|
+
|
|
40
|
+
col1, col2 = st.columns(2)
|
|
41
|
+
with col1:
|
|
42
|
+
with st.container(height=700):
|
|
43
|
+
st.write(doc.pages[settings["page_no"]].text)
|
|
44
|
+
|
|
45
|
+
with col2:
|
|
46
|
+
if settings["plot_layouts"]:
|
|
47
|
+
st.image(doc.pages[settings["page_no"]].get_image_with_boxes(layout=True))
|
|
48
|
+
else:
|
|
49
|
+
st.image(doc.pages[settings["page_no"]].image)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def parse_args() -> argparse.Namespace:
|
|
53
|
+
"""Parse command line arguments."""
|
|
54
|
+
parser = argparse.ArgumentParser(description="Document viewer with Streamlit")
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"folder", type=str, nargs="?", default=".", help="Root folder path"
|
|
57
|
+
)
|
|
58
|
+
return parser.parse_args()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def main() -> None:
|
|
62
|
+
"""Main entry point."""
|
|
63
|
+
folder = parse_args().folder
|
|
64
|
+
|
|
65
|
+
if runtime.exists():
|
|
66
|
+
run_streamlit(folder)
|
|
67
|
+
else:
|
|
68
|
+
try:
|
|
69
|
+
subprocess.run(
|
|
70
|
+
[sys.executable, "-m", "streamlit", "run", __file__, "--", folder],
|
|
71
|
+
check=True,
|
|
72
|
+
)
|
|
73
|
+
except KeyboardInterrupt:
|
|
74
|
+
print("\nStreamlit app terminated by user.")
|
|
75
|
+
except subprocess.CalledProcessError as e:
|
|
76
|
+
print(f"Error while running Streamlit: {e}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vlmparse
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Requires-Python: >=3.12.0
|
|
5
5
|
Description-Content-Type: text/markdown
|
|
6
6
|
License-File: LICENSE
|
|
@@ -182,3 +182,13 @@ server.start()
|
|
|
182
182
|
|
|
183
183
|
server.stop()
|
|
184
184
|
```
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
Converter with automatic server deployment:
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
from vlmparse.converter_with_server import ConverterWithServer
|
|
191
|
+
|
|
192
|
+
converter_with_server = ConverterWithServer(model="mineru2.5")
|
|
193
|
+
documents = converter_with_server.parse(inputs=["file1.pdf", "file2.pdf"], out_folder="./output")
|
|
194
|
+
```
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
vlmparse/base_model.py,sha256=4U4UPe8SNArliKnUf8pp8zQugWYsnhg9okylt7mrW1U,381
|
|
2
|
+
vlmparse/build_doc.py,sha256=9evdU6GwVAZU15dZ1Qght6hNo_QxBQN8X3gmYdU2ltg,1965
|
|
3
|
+
vlmparse/cli.py,sha256=tQma1IkOsFnqPKqqHVO1PJh18n1w82gp4ewA7oraJkE,15855
|
|
4
|
+
vlmparse/converter.py,sha256=5wTA_cFyDMDSY8YgLzZV9SVBKmHjEbJCW8KPoJjmVVA,6880
|
|
5
|
+
vlmparse/converter_with_server.py,sha256=9yoqfv8akB0xZZ7Snjq3aHW5NPNam2AgbK7_rfFqNkk,3909
|
|
6
|
+
vlmparse/registries.py,sha256=TdSR1fx1Tz3roGk4Tk5ckIK6Iz-e4UD4erWUk96fFpQ,5846
|
|
7
|
+
vlmparse/utils.py,sha256=jZWbNMwpZSZL--ZzvL8wPG_7mwpw9Pi36qTO9TjvHZU,1239
|
|
8
|
+
vlmparse/benchpdf2md/create_dataset.py,sha256=0o4I0O3pHm1W7NYOTnW1JvPmgxJM8KLElKFvAbPAIic,1855
|
|
9
|
+
vlmparse/benchpdf2md/run_benchmark.py,sha256=LMHElWyWIgB4ppBL0s-qjfMz5FZQnZOEm5mXxd0p0C8,9800
|
|
10
|
+
vlmparse/benchpdf2md/utils.py,sha256=Q62vtvLIzxOEzSi-w210d7qnaRz-q_5ykmLNTkmbs-8,1732
|
|
11
|
+
vlmparse/benchpdf2md/bench_tests/benchmark_tsts.py,sha256=XISZKlN1CiPBGDHgiDkjFkhGWxPxMFRu1GfStiOprmo,69527
|
|
12
|
+
vlmparse/benchpdf2md/bench_tests/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
vlmparse/benchpdf2md/olmocrbench/repeatdetect.py,sha256=SWCfYgAVmtyNypCsEZnuj6HoNGjFjTOqPBn2fCnEsiQ,5509
|
|
14
|
+
vlmparse/benchpdf2md/olmocrbench/run_olmocr_bench.py,sha256=XnVlN5Sy7mXkr7nqWbcCgROsoavFBXqn4haSoxVsyLM,8495
|
|
15
|
+
vlmparse/benchpdf2md/olmocrbench/tests.py,sha256=bszavrGQNCB3l60gPrsMgxtcMJvDjwZBBL_0B_K1oFA,53625
|
|
16
|
+
vlmparse/benchpdf2md/olmocrbench/katex/__init__.py,sha256=DD9Knd52Ur9WiatymQGI9B1-yJ7OkL2w5E0quITDkPY,72
|
|
17
|
+
vlmparse/benchpdf2md/olmocrbench/katex/render.py,sha256=l8mYRnz2I-10RE3JoxYrMfZ2UhxECTiRFazuf-dHnqU,18219
|
|
18
|
+
vlmparse/benchpdf2md/st_visu_benchmark/app.py,sha256=ng8o_q20rY88BFQSEi62dBylklV6yyGyrrMiTrl7xtM,8921
|
|
19
|
+
vlmparse/benchpdf2md/st_visu_benchmark/highligh_text.py,sha256=vsGtd9nPrtL5uDqALMumVlsllLvVZr2E6_d9TOLQoN4,3915
|
|
20
|
+
vlmparse/benchpdf2md/st_visu_benchmark/test_form.py,sha256=qNmFZoSdbWcw1EJKesgO7rYV_LpslYPzij_gyJEuDP8,3701
|
|
21
|
+
vlmparse/benchpdf2md/st_visu_benchmark/ui_elements.py,sha256=WkKncexShO3SU-DO7dPT4DOe-8UNjsCaHlj9L1B2mkI,572
|
|
22
|
+
vlmparse/benchpdf2md/st_visu_benchmark/utils.py,sha256=JSmOJQY1DDETtWmjWv07SlQlORE6yBewiMcE5qRZI_Q,1109
|
|
23
|
+
vlmparse/clients/chandra.py,sha256=zfu-A6Slh-fIAyrtrlVoCb6QHLBimnimefap_K9YwYw,9775
|
|
24
|
+
vlmparse/clients/deepseekocr.py,sha256=iCG5wI5yPv98hIPgVJX4gkkkH1OekblZjFhh5ORVWAk,1813
|
|
25
|
+
vlmparse/clients/docling.py,sha256=K-Grl_nZiSdooEdEaflevprE56l3Keby9xSMBtFwdis,5355
|
|
26
|
+
vlmparse/clients/dotsocr.py,sha256=9ygvIVVOi9UhTUJwmrI-h6AjMV9vL9J2vMaBfUyTorY,9895
|
|
27
|
+
vlmparse/clients/granite_docling.py,sha256=EQpsv5qSJG0HtMSacmJStER2sq4TGf1EMU5_NmJsl4g,4634
|
|
28
|
+
vlmparse/clients/hunyuanocr.py,sha256=Xw0Q1l-3pQzaEgFngnfM8vrSWpnT3I99QvDaGZ8XooM,1712
|
|
29
|
+
vlmparse/clients/lightonocr.py,sha256=wx1Im8Z3wlRWwYbPqnSd3LqTtdAU8CnX5mzu1BuCUY8,1314
|
|
30
|
+
vlmparse/clients/mineru.py,sha256=OL56O6cMGksoVMVDyMYCQvwIvZEjKiAZXjEMEshqXaY,3549
|
|
31
|
+
vlmparse/clients/nanonetocr.py,sha256=BT5vaeerCsK5agvOaHK3NvLUqWd1FfDmrMmDYbp646I,1543
|
|
32
|
+
vlmparse/clients/olmocr.py,sha256=mQEDpfyLY8a80Zlps5mG0QaWytIgnNQZVEVWKWjPIjk,1849
|
|
33
|
+
vlmparse/clients/openai_converter.py,sha256=QBHomw3_K_BpMVboaMd4rlhrntsqEFT46kxpg9KqBIs,5636
|
|
34
|
+
vlmparse/clients/paddleocrvl.py,sha256=tmaqg3boV4edywiiiNiNiI3dBHi111wz4dFb52OISXw,1376
|
|
35
|
+
vlmparse/clients/prompts.py,sha256=-J60lqxgRzlkQ9VsQLxmWsIMaDt-gNqWqWoqHIw9CLc,4228
|
|
36
|
+
vlmparse/clients/pipe_utils/cleaner.py,sha256=oxBkBTOkluN1lmeNbzajRIe0_D__ZGwUOBaI_Ph0uxE,2396
|
|
37
|
+
vlmparse/clients/pipe_utils/html_to_md_conversion.py,sha256=cFFqzD2jCNw_968_eu3Wt--Ox7iJj2Rn5UoP_DZWosU,4112
|
|
38
|
+
vlmparse/clients/pipe_utils/utils.py,sha256=935ecIO446I0pstszE_1nrIPHn1Ffrxunq7fVd0dsd8,315
|
|
39
|
+
vlmparse/data_model/box.py,sha256=lJsh4qhjgYXZF5vTSJ1qMXD5GVlBi2_SBedBMlfJikU,16868
|
|
40
|
+
vlmparse/data_model/document.py,sha256=pdCZvWzRFkez53ZJpNaB4ezUW-OVUlbR3_SBmmgVzGQ,4217
|
|
41
|
+
vlmparse/servers/docker_server.py,sha256=nI7K8CEzJwSZxLY7Jg9IuYHHLR5YQpOSgY8Ln71POJA,6608
|
|
42
|
+
vlmparse/servers/utils.py,sha256=gMk5Y8FA1nlSxi7JzKxZu7XyljkYUZ5AnsTb3YFqu28,8821
|
|
43
|
+
vlmparse/st_viewer/fs_nav.py,sha256=7GNH68h2Loh5pQ64Pe72-D2cs2BLhqRXevEmKdFmPX0,1616
|
|
44
|
+
vlmparse/st_viewer/st_viewer.py,sha256=m2rQTtk5rlwErNmivNAg-4rkHkvNkvLhoJZxFQi7Dwk,2105
|
|
45
|
+
vlmparse-0.1.2.dist-info/licenses/LICENSE,sha256=3TKJHk8hPBR5dbLWZ3IpfCftl-_m-iyBwpYQGZYxj14,1080
|
|
46
|
+
vlmparse-0.1.2.dist-info/METADATA,sha256=Pnz_bFtUUXZ--3A_UHpkv-xIsESljDN7IlfNiOI7eLU,5074
|
|
47
|
+
vlmparse-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
48
|
+
vlmparse-0.1.2.dist-info/entry_points.txt,sha256=gD5berP6HwE2wNIkls-Lw5goiceA8uMgPEd7ifnFJXs,47
|
|
49
|
+
vlmparse-0.1.2.dist-info/top_level.txt,sha256=k4ni-GNH_iAX7liQEsk_KY_c3xgZgt8k9fsSs9IXLXs,9
|
|
50
|
+
vlmparse-0.1.2.dist-info/RECORD,,
|
vlmparse-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
vlmparse/base_model.py,sha256=4U4UPe8SNArliKnUf8pp8zQugWYsnhg9okylt7mrW1U,381
|
|
2
|
-
vlmparse/build_doc.py,sha256=9evdU6GwVAZU15dZ1Qght6hNo_QxBQN8X3gmYdU2ltg,1965
|
|
3
|
-
vlmparse/cli.py,sha256=tQma1IkOsFnqPKqqHVO1PJh18n1w82gp4ewA7oraJkE,15855
|
|
4
|
-
vlmparse/converter.py,sha256=5wTA_cFyDMDSY8YgLzZV9SVBKmHjEbJCW8KPoJjmVVA,6880
|
|
5
|
-
vlmparse/converter_with_server.py,sha256=9yoqfv8akB0xZZ7Snjq3aHW5NPNam2AgbK7_rfFqNkk,3909
|
|
6
|
-
vlmparse/registries.py,sha256=TdSR1fx1Tz3roGk4Tk5ckIK6Iz-e4UD4erWUk96fFpQ,5846
|
|
7
|
-
vlmparse/utils.py,sha256=jZWbNMwpZSZL--ZzvL8wPG_7mwpw9Pi36qTO9TjvHZU,1239
|
|
8
|
-
vlmparse-0.1.0.dist-info/licenses/LICENSE,sha256=3TKJHk8hPBR5dbLWZ3IpfCftl-_m-iyBwpYQGZYxj14,1080
|
|
9
|
-
vlmparse-0.1.0.dist-info/METADATA,sha256=4xxtT6rE3pJyqfqbVjl8Llj7C5Az99TeusxXmMHzMMQ,4788
|
|
10
|
-
vlmparse-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
vlmparse-0.1.0.dist-info/entry_points.txt,sha256=gD5berP6HwE2wNIkls-Lw5goiceA8uMgPEd7ifnFJXs,47
|
|
12
|
-
vlmparse-0.1.0.dist-info/top_level.txt,sha256=k4ni-GNH_iAX7liQEsk_KY_c3xgZgt8k9fsSs9IXLXs,9
|
|
13
|
-
vlmparse-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|