vlmparse 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. vlmparse/clients/docling.py +2 -2
  2. vlmparse/clients/dotsocr.py +11 -2
  3. vlmparse/clients/mineru.py +8 -7
  4. vlmparse/clients/openai_converter.py +1 -0
  5. vlmparse/converter_with_server.py +5 -4
  6. vlmparse/registries.py +2 -4
  7. vlmparse/servers/docker_server.py +1 -1
  8. vlmparse/servers/utils.py +3 -2
  9. {vlmparse-0.1.4.dist-info → vlmparse-0.1.5.dist-info}/METADATA +17 -3
  10. vlmparse-0.1.5.dist-info/RECORD +36 -0
  11. vlmparse/benchpdf2md/bench_tests/benchmark_tsts.py +0 -1763
  12. vlmparse/benchpdf2md/bench_tests/utils.py +0 -0
  13. vlmparse/benchpdf2md/create_dataset.py +0 -60
  14. vlmparse/benchpdf2md/olmocrbench/katex/__init__.py +0 -1
  15. vlmparse/benchpdf2md/olmocrbench/katex/render.py +0 -592
  16. vlmparse/benchpdf2md/olmocrbench/repeatdetect.py +0 -175
  17. vlmparse/benchpdf2md/olmocrbench/run_olmocr_bench.py +0 -256
  18. vlmparse/benchpdf2md/olmocrbench/tests.py +0 -1334
  19. vlmparse/benchpdf2md/run_benchmark.py +0 -296
  20. vlmparse/benchpdf2md/st_visu_benchmark/app.py +0 -271
  21. vlmparse/benchpdf2md/st_visu_benchmark/highligh_text.py +0 -117
  22. vlmparse/benchpdf2md/st_visu_benchmark/test_form.py +0 -95
  23. vlmparse/benchpdf2md/st_visu_benchmark/ui_elements.py +0 -20
  24. vlmparse/benchpdf2md/st_visu_benchmark/utils.py +0 -50
  25. vlmparse/benchpdf2md/utils.py +0 -56
  26. vlmparse-0.1.4.dist-info/RECORD +0 -51
  27. {vlmparse-0.1.4.dist-info → vlmparse-0.1.5.dist-info}/WHEEL +0 -0
  28. {vlmparse-0.1.4.dist-info → vlmparse-0.1.5.dist-info}/entry_points.txt +0 -0
  29. {vlmparse-0.1.4.dist-info → vlmparse-0.1.5.dist-info}/licenses/LICENSE +0 -0
  30. {vlmparse-0.1.4.dist-info → vlmparse-0.1.5.dist-info}/top_level.txt +0 -0
@@ -34,7 +34,7 @@ class DoclingDockerServerConfig(DockerServerConfig):
34
34
  "LOG_LEVEL": "DEBUG", # Enable verbose logging
35
35
  # Performance Tuning
36
36
  # "UVICORN_WORKERS": "4", # Increase web server workers (Default: 1)
37
- # "DOCLING_SERVE_ENG_LOC_NUM_WORKERS": "4", # Increase processing workers (Default: 2)
37
+ "DOCLING_SERVE_ENG_LOC_NUM_WORKERS": "16", # Increase processing workers (Default: 2)
38
38
  "DOCLING_NUM_THREADS": "32", # Increase torch threads (Default: 4)
39
39
  }
40
40
  )
@@ -62,8 +62,8 @@ class DoclingDockerServerConfig(DockerServerConfig):
62
62
  class DoclingConverterConfig(ConverterConfig):
63
63
  """Configuration for Docling converter client."""
64
64
 
65
+ base_url: str
65
66
  model_name: str = "docling"
66
- base_url: str = "http://localhost:5001"
67
67
  timeout: int = 300
68
68
  api_kwargs: dict = {"output_format": "markdown", "image_export_mode": "referenced"}
69
69
 
@@ -8,6 +8,7 @@ from PIL import Image
8
8
  from pydantic import Field
9
9
 
10
10
  from vlmparse.clients.openai_converter import (
11
+ LLMParams,
11
12
  OpenAIConverterClient,
12
13
  OpenAIConverterConfig,
13
14
  )
@@ -28,6 +29,7 @@ class DotsOCRDockerServerConfig(DockerServerConfig):
28
29
  dockerfile_dir: str = str(DOCKERFILE_DIR / "dotsocr")
29
30
  command_args: list[str] = Field(
30
31
  default_factory=lambda: [
32
+ "/workspace/weights/DotsOCR",
31
33
  "--tensor-parallel-size",
32
34
  "1",
33
35
  "--gpu-memory-utilization",
@@ -44,12 +46,19 @@ class DotsOCRDockerServerConfig(DockerServerConfig):
44
46
  # "16384",
45
47
  ]
46
48
  )
47
- add_model_key_to_server: bool = False
49
+ add_model_key_to_server: bool = True
48
50
  aliases: list[str] = Field(default_factory=lambda: ["dotsocr"])
49
51
 
50
52
  @property
51
53
  def client_config(self):
52
- return DotsOCRConverterConfig(llm_params=self.llm_params)
54
+ return DotsOCRConverterConfig(
55
+ llm_params=LLMParams(
56
+ base_url=f"http://localhost:{self.docker_port}{self.get_base_url_suffix()}",
57
+ )
58
+ )
59
+
60
+ def get_base_url_suffix(self) -> str:
61
+ return "/v1"
53
62
 
54
63
 
55
64
  class DotsOCRConverterConfig(OpenAIConverterConfig):
@@ -1,6 +1,5 @@
1
1
  import asyncio
2
2
  import io
3
- import os
4
3
 
5
4
  import orjson
6
5
  from loguru import logger
@@ -20,18 +19,21 @@ class MinerUDockerServerConfig(DockerServerConfig):
20
19
  docker_image: str = "pulsia/mineru25apipulsia:latest"
21
20
  docker_port: int = 4299
22
21
  container_port: int = 8000
22
+ server_ready_indicators: list[str] = Field(
23
+ default_factory=lambda: ["Uvicorn running"]
24
+ )
23
25
 
24
26
  @property
25
27
  def client_config(self):
26
- return MinerUConverterConfig(api_url=f"http://localhost:{self.docker_port}")
28
+ return MinerUConverterConfig(base_url=f"http://localhost:{self.docker_port}")
27
29
 
28
30
 
29
31
  class MinerUConverterConfig(ConverterConfig):
30
32
  """Configuration for MinerU API converter."""
31
33
 
32
- base_url: str = Field(
33
- default_factory=lambda: os.getenv("MINERU_API_URL", "http://localhost:4299")
34
- )
34
+ base_url: str
35
+ model_name: str = "opendatalab/MinerU2.5-2509-1.2B"
36
+ aliases: list[str] = Field(default_factory=lambda: ["mineru25"])
35
37
  timeout: int = 600
36
38
 
37
39
  def get_client(self, **kwargs) -> "MinerUConverter":
@@ -54,13 +56,12 @@ class MinerUConverter(BaseConverter):
54
56
  super().__init__(config=config, **kwargs)
55
57
  from httpx import AsyncClient
56
58
 
57
- self.client = AsyncClient(base_url=config.api_url, timeout=config.timeout)
59
+ self.client = AsyncClient(base_url=config.base_url, timeout=config.timeout)
58
60
 
59
61
  async def _async_inference_with_api(self, image) -> list:
60
62
  """Run async inference with MinerU API."""
61
63
 
62
64
  img_byte_arr = await asyncio.to_thread(to_bytes_io, image)
63
-
64
65
  response = await self.client.post(
65
66
  "process-image",
66
67
  files={"image": ("image.png", img_byte_arr, "image/png")},
@@ -92,6 +92,7 @@ class OpenAIConverterClient(BaseConverter):
92
92
  base_url=self.config.llm_params.base_url,
93
93
  api_key=self.config.llm_params.api_key,
94
94
  timeout=self.config.llm_params.timeout,
95
+ max_retries=self.config.llm_params.max_retries,
95
96
  )
96
97
 
97
98
  async def _get_chat_completion(
@@ -42,13 +42,13 @@ class ConverterWithServer:
42
42
  docker_config = docker_config_registry.get(
43
43
  self.model, default=self.with_vllm_server
44
44
  )
45
- if self.port is not None:
46
- docker_config.docker_port = self.port
47
45
 
48
46
  if docker_config is not None:
47
+ if self.port is not None:
48
+ docker_config.docker_port = self.port
49
49
  docker_config.gpu_device_ids = gpu_device_ids
50
- server = docker_config.get_server(auto_stop=True)
51
- server.start()
50
+ self.server = docker_config.get_server(auto_stop=True)
51
+ self.server.start()
52
52
 
53
53
  self.client = docker_config.get_client()
54
54
  else:
@@ -56,6 +56,7 @@ class ConverterWithServer:
56
56
 
57
57
  else:
58
58
  client_config = converter_config_registry.get(self.model, uri=self.uri)
59
+
59
60
  self.client = client_config.get_client()
60
61
 
61
62
  def parse(
vlmparse/registries.py CHANGED
@@ -108,6 +108,7 @@ for gemini_model in [
108
108
  "gemini-2.5-flash",
109
109
  "gemini-2.5-flash-lite",
110
110
  "gemini-3-pro-preview",
111
+ "gemini-3-flash-preview",
111
112
  ]:
112
113
  converter_config_registry.register(
113
114
  gemini_model,
@@ -120,12 +121,9 @@ for gemini_model in [
120
121
  ),
121
122
  )
122
123
  for openai_model in [
123
- "gpt-5.1",
124
- "gpt-5.1-mini",
125
- "gpt-5.1-nano",
124
+ "gpt-5.2",
126
125
  "gpt-5",
127
126
  "gpt-5-mini",
128
- "gpt-5-nano",
129
127
  ]:
130
128
  converter_config_registry.register(
131
129
  openai_model,
@@ -78,7 +78,7 @@ class VLLMDockerServerConfig(DockerServerConfig):
78
78
  from vlmparse.clients.openai_converter import LLMParams
79
79
 
80
80
  return LLMParams(
81
- base_url=f"http://localhost:{self.docker_port}/v1",
81
+ base_url=f"http://localhost:{self.docker_port}{self.get_base_url_suffix()}",
82
82
  model_name=self.default_model_name,
83
83
  )
84
84
 
vlmparse/servers/utils.py CHANGED
@@ -3,9 +3,8 @@ import time
3
3
  from contextlib import contextmanager
4
4
  from pathlib import Path
5
5
 
6
- from loguru import logger
7
-
8
6
  import docker
7
+ from loguru import logger
9
8
 
10
9
 
11
10
  def _ensure_image_exists(
@@ -230,6 +229,8 @@ def get_model_from_uri(uri: str) -> str:
230
229
  for container in containers:
231
230
  c_uri = container.labels.get("vlmparse_uri")
232
231
  c_model = container.labels.get("vlmparse_model_name")
232
+ if c_uri is not None:
233
+ c_uri = c_uri.replace("localhost", "0.0.0.0")
233
234
 
234
235
  # Check if user URI matches container URI (ignoring /v1 suffix if missing)
235
236
  if c_uri and (
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vlmparse
3
- Version: 0.1.4
4
- Requires-Python: >=3.12.0
3
+ Version: 0.1.5
4
+ Requires-Python: >=3.11.0
5
5
  Description-Content-Type: text/markdown
6
6
  License-File: LICENSE
7
7
  Requires-Dist: devtools>=0.12.2
@@ -72,6 +72,19 @@ Supported Converters:
72
72
 
73
73
  ## Installation
74
74
 
75
+ Simplest solution with only the cli:
76
+
77
+ ```bash
78
+ uv tool install vlmparse
79
+ ```
80
+
81
+ If you want to run the granite-docling model or use the streamlit viewing app:
82
+
83
+ ```bash
84
+ uv tool install vlmparse[docling_core,st_app]
85
+ ```
86
+
87
+ If you prefer cloning the repository and using the local version:
75
88
  ```bash
76
89
  uv sync
77
90
  ```
@@ -86,10 +99,11 @@ Activate the virtual environment:
86
99
  ```bash
87
100
  source .venv/bin/activate
88
101
  ```
89
- Other solution: append uv run to all the commands below.
90
102
 
91
103
  ## CLI Usage
92
104
 
105
+ Note that you can bypass the previous installation step and just add uvx before each of the commands below.
106
+
93
107
  ### Convert PDFs
94
108
 
95
109
  With a general VLM (requires setting your api key as an environment variable):
@@ -0,0 +1,36 @@
1
+ vlmparse/base_model.py,sha256=4U4UPe8SNArliKnUf8pp8zQugWYsnhg9okylt7mrW1U,381
2
+ vlmparse/build_doc.py,sha256=LAWrnFrqamN5PwJo57AUtQOPrMFGnCGw4gBjEKZ6pYo,2127
3
+ vlmparse/cli.py,sha256=tQma1IkOsFnqPKqqHVO1PJh18n1w82gp4ewA7oraJkE,15855
4
+ vlmparse/constants.py,sha256=7-47S01n4MI2ebR09bpdOo3_P16d-z-NVGsm6KJP8ls,110
5
+ vlmparse/converter.py,sha256=F0JSY9sFYUggCvaUCb27kKGJJpnZKW2FStMDVJoIOeQ,7383
6
+ vlmparse/converter_with_server.py,sha256=G393O7vU_lJz6Vz-qYVkrjFhf0Vmpjjl8OjPKQe2blU,3928
7
+ vlmparse/registries.py,sha256=6bEUKTkTjc8C7c1R1ZvAHSF5NCXmAuhNpw0qNnuQ7-A,5818
8
+ vlmparse/utils.py,sha256=rcVrtPiQVj_8HAmFQOu___72uYIapp_X89yxrMNCBow,1236
9
+ vlmparse/clients/chandra.py,sha256=zfu-A6Slh-fIAyrtrlVoCb6QHLBimnimefap_K9YwYw,9775
10
+ vlmparse/clients/deepseekocr.py,sha256=rQvaOaPPoDiZ0MzXqfqqH9BgUBfjmlfHu3NlMjSDgiQ,6501
11
+ vlmparse/clients/docling.py,sha256=SAkLsqseuWfkuiel8FWR1G0Z5s-SZU3dE2JbsOvF4SA,5328
12
+ vlmparse/clients/dotsocr.py,sha256=w2T-xkhlw1AfT-CUYoF0ectr2jDYHe9239B24XKB1UQ,10139
13
+ vlmparse/clients/granite_docling.py,sha256=EQpsv5qSJG0HtMSacmJStER2sq4TGf1EMU5_NmJsl4g,4634
14
+ vlmparse/clients/hunyuanocr.py,sha256=Xw0Q1l-3pQzaEgFngnfM8vrSWpnT3I99QvDaGZ8XooM,1712
15
+ vlmparse/clients/lightonocr.py,sha256=wx1Im8Z3wlRWwYbPqnSd3LqTtdAU8CnX5mzu1BuCUY8,1314
16
+ vlmparse/clients/mineru.py,sha256=bilDPcUoLk2rcFVqMk4q2Hx2txilc3GDUbjAEoMM_BI,3671
17
+ vlmparse/clients/nanonetocr.py,sha256=BT5vaeerCsK5agvOaHK3NvLUqWd1FfDmrMmDYbp646I,1543
18
+ vlmparse/clients/olmocr.py,sha256=mQEDpfyLY8a80Zlps5mG0QaWytIgnNQZVEVWKWjPIjk,1849
19
+ vlmparse/clients/openai_converter.py,sha256=j2H0iAQTADRRpu1Zy1b-1OFfWyXuqCvrQKy2UcwggTA,5696
20
+ vlmparse/clients/paddleocrvl.py,sha256=tmaqg3boV4edywiiiNiNiI3dBHi111wz4dFb52OISXw,1376
21
+ vlmparse/clients/prompts.py,sha256=-J60lqxgRzlkQ9VsQLxmWsIMaDt-gNqWqWoqHIw9CLc,4228
22
+ vlmparse/clients/pipe_utils/cleaner.py,sha256=oxBkBTOkluN1lmeNbzajRIe0_D__ZGwUOBaI_Ph0uxE,2396
23
+ vlmparse/clients/pipe_utils/html_to_md_conversion.py,sha256=cFFqzD2jCNw_968_eu3Wt--Ox7iJj2Rn5UoP_DZWosU,4112
24
+ vlmparse/clients/pipe_utils/utils.py,sha256=935ecIO446I0pstszE_1nrIPHn1Ffrxunq7fVd0dsd8,315
25
+ vlmparse/data_model/box.py,sha256=lJsh4qhjgYXZF5vTSJ1qMXD5GVlBi2_SBedBMlfJikU,16868
26
+ vlmparse/data_model/document.py,sha256=pdCZvWzRFkez53ZJpNaB4ezUW-OVUlbR3_SBmmgVzGQ,4217
27
+ vlmparse/servers/docker_server.py,sha256=qOoZcWSHrK7kK7tAL61RJSW-Jmee93It2SEfWG3jGrc,6633
28
+ vlmparse/servers/utils.py,sha256=qy2-rnQTCQKt6CeTV5H74tvRTXyzBV2KswQiYW8Tf-k,8908
29
+ vlmparse/st_viewer/fs_nav.py,sha256=7GNH68h2Loh5pQ64Pe72-D2cs2BLhqRXevEmKdFmPX0,1616
30
+ vlmparse/st_viewer/st_viewer.py,sha256=m2rQTtk5rlwErNmivNAg-4rkHkvNkvLhoJZxFQi7Dwk,2105
31
+ vlmparse-0.1.5.dist-info/licenses/LICENSE,sha256=3TKJHk8hPBR5dbLWZ3IpfCftl-_m-iyBwpYQGZYxj14,1080
32
+ vlmparse-0.1.5.dist-info/METADATA,sha256=LN4W1cvXJvL22hwLAgeSwd3PGTmlrt6lgqNi-tL9pes,5446
33
+ vlmparse-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ vlmparse-0.1.5.dist-info/entry_points.txt,sha256=gD5berP6HwE2wNIkls-Lw5goiceA8uMgPEd7ifnFJXs,47
35
+ vlmparse-0.1.5.dist-info/top_level.txt,sha256=k4ni-GNH_iAX7liQEsk_KY_c3xgZgt8k9fsSs9IXLXs,9
36
+ vlmparse-0.1.5.dist-info/RECORD,,