vlmparse 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vlmparse-0.1.0/vlmparse.egg-info → vlmparse-0.1.2}/PKG-INFO +11 -1
- {vlmparse-0.1.0 → vlmparse-0.1.2}/README.md +10 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/pyproject.toml +2 -2
- vlmparse-0.1.2/vlmparse/benchpdf2md/bench_tests/benchmark_tsts.py +1763 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/bench_tests/utils.py +0 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/create_dataset.py +60 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/olmocrbench/katex/__init__.py +1 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/olmocrbench/katex/render.py +592 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/olmocrbench/repeatdetect.py +175 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/olmocrbench/run_olmocr_bench.py +256 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/olmocrbench/tests.py +1334 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/run_benchmark.py +296 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/st_visu_benchmark/app.py +271 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/st_visu_benchmark/highligh_text.py +117 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/st_visu_benchmark/test_form.py +95 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/st_visu_benchmark/ui_elements.py +20 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/st_visu_benchmark/utils.py +50 -0
- vlmparse-0.1.2/vlmparse/benchpdf2md/utils.py +56 -0
- vlmparse-0.1.2/vlmparse/clients/chandra.py +323 -0
- vlmparse-0.1.2/vlmparse/clients/deepseekocr.py +52 -0
- vlmparse-0.1.2/vlmparse/clients/docling.py +146 -0
- vlmparse-0.1.2/vlmparse/clients/dotsocr.py +277 -0
- vlmparse-0.1.2/vlmparse/clients/granite_docling.py +132 -0
- vlmparse-0.1.2/vlmparse/clients/hunyuanocr.py +45 -0
- vlmparse-0.1.2/vlmparse/clients/lightonocr.py +43 -0
- vlmparse-0.1.2/vlmparse/clients/mineru.py +119 -0
- vlmparse-0.1.2/vlmparse/clients/nanonetocr.py +29 -0
- vlmparse-0.1.2/vlmparse/clients/olmocr.py +46 -0
- vlmparse-0.1.2/vlmparse/clients/openai_converter.py +173 -0
- vlmparse-0.1.2/vlmparse/clients/paddleocrvl.py +48 -0
- vlmparse-0.1.2/vlmparse/clients/pipe_utils/cleaner.py +74 -0
- vlmparse-0.1.2/vlmparse/clients/pipe_utils/html_to_md_conversion.py +136 -0
- vlmparse-0.1.2/vlmparse/clients/pipe_utils/utils.py +12 -0
- vlmparse-0.1.2/vlmparse/clients/prompts.py +66 -0
- vlmparse-0.1.2/vlmparse/data_model/box.py +551 -0
- vlmparse-0.1.2/vlmparse/data_model/document.py +148 -0
- vlmparse-0.1.2/vlmparse/servers/docker_server.py +199 -0
- vlmparse-0.1.2/vlmparse/servers/utils.py +250 -0
- vlmparse-0.1.2/vlmparse/st_viewer/fs_nav.py +53 -0
- vlmparse-0.1.2/vlmparse/st_viewer/st_viewer.py +80 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2/vlmparse.egg-info}/PKG-INFO +11 -1
- vlmparse-0.1.2/vlmparse.egg-info/SOURCES.txt +60 -0
- vlmparse-0.1.0/vlmparse.egg-info/SOURCES.txt +0 -23
- {vlmparse-0.1.0 → vlmparse-0.1.2}/LICENSE +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/setup.cfg +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/tests/test_all_converters_mocked.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/tests/test_batch_parser.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/tests/test_benchmark_tests.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/tests/test_cli.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/tests/test_end2end.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/tests/test_process_and_run_benchmark.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/tests/test_table_tests.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse/base_model.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse/build_doc.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse/cli.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse/converter.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse/converter_with_server.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse/registries.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse/utils.py +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse.egg-info/dependency_links.txt +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse.egg-info/entry_points.txt +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse.egg-info/requires.txt +0 -0
- {vlmparse-0.1.0 → vlmparse-0.1.2}/vlmparse.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vlmparse
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Requires-Python: >=3.12.0
|
|
5
5
|
Description-Content-Type: text/markdown
|
|
6
6
|
License-File: LICENSE
|
|
@@ -182,3 +182,13 @@ server.start()
|
|
|
182
182
|
|
|
183
183
|
server.stop()
|
|
184
184
|
```
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
Converter with automatic server deployment:
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
from vlmparse.converter_with_server import ConverterWithServer
|
|
191
|
+
|
|
192
|
+
converter_with_server = ConverterWithServer(model="mineru2.5")
|
|
193
|
+
documents = converter_with_server.parse(inputs=["file1.pdf", "file2.pdf"], out_folder="./output")
|
|
194
|
+
```
|
|
@@ -129,3 +129,13 @@ server.start()
|
|
|
129
129
|
|
|
130
130
|
server.stop()
|
|
131
131
|
```
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
Converter with automatic server deployment:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from vlmparse.converter_with_server import ConverterWithServer
|
|
138
|
+
|
|
139
|
+
converter_with_server = ConverterWithServer(model="mineru2.5")
|
|
140
|
+
documents = converter_with_server.parse(inputs=["file1.pdf", "file2.pdf"], out_folder="./output")
|
|
141
|
+
```
|
|
@@ -8,7 +8,7 @@ version = "0.1.0"
|
|
|
8
8
|
|
|
9
9
|
[project]
|
|
10
10
|
name = "vlmparse"
|
|
11
|
-
version = "0.1.
|
|
11
|
+
version = "0.1.2"
|
|
12
12
|
authors = []
|
|
13
13
|
description = ""
|
|
14
14
|
readme = "README.md"
|
|
@@ -69,7 +69,7 @@ test = [
|
|
|
69
69
|
]
|
|
70
70
|
|
|
71
71
|
[tool.setuptools.packages.find]
|
|
72
|
-
include = ["vlmparse"]
|
|
72
|
+
include = ["vlmparse", "vlmparse.*"]
|
|
73
73
|
|
|
74
74
|
[project.scripts]
|
|
75
75
|
vlmparse = "vlmparse.cli:main"
|