vlmparse 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,7 +71,14 @@ class ConverterWithServer:
71
71
  concurrency: int = 10,
72
72
  vllm_args: dict | None = None,
73
73
  forget_predefined_vllm_args: bool = False,
74
+ return_documents: bool = False,
74
75
  ):
76
+ if model is None and uri is None:
77
+ raise ValueError("Either 'model' or 'uri' must be provided")
78
+
79
+ if concurrency < 1:
80
+ raise ValueError("concurrency must be at least 1")
81
+
75
82
  self.model = model
76
83
  self.uri = uri
77
84
  self.port = port
@@ -80,10 +87,11 @@ class ConverterWithServer:
80
87
  self.concurrency = concurrency
81
88
  self.vllm_args = vllm_args
82
89
  self.forget_predefined_vllm_args = forget_predefined_vllm_args
90
+ self.return_documents = return_documents
83
91
  self.server = None
84
92
  self.client = None
85
93
 
86
- if self.uri is not None and self.model is None:
94
+ if self.uri is not None:
87
95
  self.model = get_model_from_uri(self.uri)
88
96
 
89
97
  def start_server_and_client(self):
@@ -101,14 +109,20 @@ class ConverterWithServer:
101
109
  )
102
110
 
103
111
  if docker_config is not None:
104
- self.client = docker_config.get_client()
112
+ self.client = docker_config.get_client(
113
+ return_documents_in_batch_mode=self.return_documents
114
+ )
105
115
  else:
106
- self.client = converter_config_registry.get(self.model).get_client()
116
+ self.client = converter_config_registry.get(self.model).get_client(
117
+ return_documents_in_batch_mode=self.return_documents
118
+ )
107
119
 
108
120
  else:
109
121
  client_config = converter_config_registry.get(self.model, uri=self.uri)
110
122
 
111
- self.client = client_config.get_client()
123
+ self.client = client_config.get_client(
124
+ return_documents_in_batch_mode=self.return_documents
125
+ )
112
126
 
113
127
  def stop_server(self):
114
128
  if self.server is not None and self.server.auto_stop:
@@ -119,16 +133,30 @@ class ConverterWithServer:
119
133
  return self
120
134
 
121
135
  def __exit__(self, exc_type, exc_value, traceback):
122
- self.stop_server()
136
+ try:
137
+ self.stop_server()
138
+ except Exception as e:
139
+ logger.warning(f"Error stopping server during cleanup: {e}")
140
+ return False # Don't suppress exceptions
123
141
 
124
142
  def parse(
125
143
  self,
126
144
  inputs: str | list[str],
127
145
  out_folder: str = ".",
128
146
  mode: Literal["document", "md", "md_page"] = "document",
147
+ conversion_mode: Literal[
148
+ "ocr",
149
+ "ocr_layout",
150
+ "table",
151
+ "image_description",
152
+ "formula",
153
+ "chart",
154
+ ]
155
+ | None = None,
129
156
  dpi: int | None = None,
130
157
  debug: bool = False,
131
158
  retrylast: bool = False,
159
+ completion_kwargs: dict | None = None,
132
160
  ):
133
161
  assert (
134
162
  self.client is not None
@@ -165,6 +193,14 @@ class ConverterWithServer:
165
193
  if dpi is not None:
166
194
  self.client.config.dpi = int(dpi)
167
195
 
196
+ if conversion_mode is not None:
197
+ self.client.config.conversion_mode = conversion_mode
198
+
199
+ if completion_kwargs is not None and hasattr(
200
+ self.client.config, "completion_kwargs"
201
+ ):
202
+ self.client.config.completion_kwargs |= completion_kwargs
203
+
168
204
  if debug:
169
205
  self.client.debug = debug
170
206
 
vlmparse/registries.py CHANGED
@@ -1,37 +1,24 @@
1
1
  import os
2
2
  from collections.abc import Callable
3
3
 
4
- from vlmparse.clients.chandra import ChandraConverterConfig, ChandraDockerServerConfig
5
- from vlmparse.clients.deepseekocr import (
6
- DeepSeekOCRConverterConfig,
7
- DeepSeekOCRDockerServerConfig,
8
- )
9
- from vlmparse.clients.docling import DoclingConverterConfig, DoclingDockerServerConfig
10
- from vlmparse.clients.dotsocr import DotsOCRConverterConfig, DotsOCRDockerServerConfig
11
- from vlmparse.clients.granite_docling import (
12
- GraniteDoclingConverterConfig,
13
- GraniteDoclingDockerServerConfig,
14
- )
15
- from vlmparse.clients.hunyuanocr import (
16
- HunyuanOCRConverterConfig,
17
- HunyuanOCRDockerServerConfig,
18
- )
4
+ from vlmparse.clients.chandra import ChandraDockerServerConfig
5
+ from vlmparse.clients.deepseekocr import DeepSeekOCRDockerServerConfig
6
+ from vlmparse.clients.docling import DoclingDockerServerConfig
7
+ from vlmparse.clients.dotsocr import DotsOCRDockerServerConfig
8
+ from vlmparse.clients.granite_docling import GraniteDoclingDockerServerConfig
9
+ from vlmparse.clients.hunyuanocr import HunyuanOCRDockerServerConfig
19
10
  from vlmparse.clients.lightonocr import (
20
- LightOnOCRConverterConfig,
11
+ LightonOCR21BServerConfig,
21
12
  LightOnOCRDockerServerConfig,
22
13
  )
23
- from vlmparse.clients.mineru import MinerUConverterConfig, MinerUDockerServerConfig
24
- from vlmparse.clients.nanonetocr import (
25
- NanonetOCR2ConverterConfig,
26
- NanonetOCR2DockerServerConfig,
27
- )
28
- from vlmparse.clients.olmocr import OlmOCRConverterConfig, OlmOCRDockerServerConfig
29
- from vlmparse.clients.openai_converter import LLMParams, OpenAIConverterConfig
30
- from vlmparse.clients.paddleocrvl import (
31
- PaddleOCRVLConverterConfig,
32
- PaddleOCRVLDockerServerConfig,
33
- )
34
- from vlmparse.servers.docker_server import DEFAULT_MODEL_NAME, docker_config_registry
14
+ from vlmparse.clients.mineru import MinerUDockerServerConfig
15
+ from vlmparse.clients.mistral_converter import MistralOCRConverterConfig
16
+ from vlmparse.clients.nanonetocr import NanonetOCR2DockerServerConfig
17
+ from vlmparse.clients.olmocr import OlmOCRDockerServerConfig
18
+ from vlmparse.clients.openai_converter import OpenAIConverterConfig
19
+ from vlmparse.clients.paddleocrvl import PaddleOCRVLDockerServerConfig
20
+ from vlmparse.converter import ConverterConfig
21
+ from vlmparse.servers.docker_server import DockerServerConfig, docker_config_registry
35
22
 
36
23
 
37
24
  def get_default(cls, field_name):
@@ -43,7 +30,8 @@ def get_default(cls, field_name):
43
30
  return field_info.default
44
31
 
45
32
 
46
- for server_config_cls in [
33
+ # All server configs - single source of truth
34
+ SERVER_CONFIGS: list[type[DockerServerConfig]] = [
47
35
  ChandraDockerServerConfig,
48
36
  LightOnOCRDockerServerConfig,
49
37
  DotsOCRDockerServerConfig,
@@ -55,7 +43,11 @@ for server_config_cls in [
55
43
  MinerUDockerServerConfig,
56
44
  DeepSeekOCRDockerServerConfig,
57
45
  GraniteDoclingDockerServerConfig,
58
- ]:
46
+ LightonOCR21BServerConfig,
47
+ ]
48
+
49
+ # Register docker server configs
50
+ for server_config_cls in SERVER_CONFIGS:
59
51
  aliases = get_default(server_config_cls, "aliases") or []
60
52
  model_name = get_default(server_config_cls, "model_name")
61
53
  names = [n for n in aliases + [model_name] if isinstance(n, str)]
@@ -64,37 +56,81 @@ for server_config_cls in [
64
56
 
65
57
 
66
58
  class ConverterConfigRegistry:
67
- """Registry for mapping model names to their Docker configurations."""
59
+ """Registry for mapping model names to their converter configurations.
60
+
61
+ Thread-safe registry that maps model names to their converter configuration factories.
62
+ """
68
63
 
69
64
  def __init__(self):
70
- self._registry = dict()
65
+ import threading
66
+
67
+ self._registry: dict[str, Callable[[str | None], ConverterConfig]] = {}
68
+ self._lock = threading.RLock()
71
69
 
72
70
  def register(
73
71
  self,
74
72
  model_name: str,
75
- config_factory: Callable[[str], OpenAIConverterConfig | None],
73
+ config_factory: Callable[[str | None], ConverterConfig],
76
74
  ):
77
- """Register a config factory for a model name."""
78
- self._registry[model_name] = config_factory
75
+ """Register a config factory for a model name (thread-safe)."""
76
+ with self._lock:
77
+ self._registry[model_name] = config_factory
79
78
 
80
- def get(self, model_name: str, uri: str | None = None) -> OpenAIConverterConfig:
81
- """Get config for a model name. Returns default if not registered."""
82
- if model_name in self._registry:
83
- return self._registry[model_name](uri=uri)
79
+ def register_from_server(
80
+ self,
81
+ server_config_cls: type[DockerServerConfig],
82
+ ):
83
+ """Register converter config derived from a server config class.
84
+
85
+ This ensures model_name and default_model_name are consistently
86
+ passed from server to client config via _create_client_kwargs.
87
+ """
88
+ aliases = get_default(server_config_cls, "aliases") or []
89
+ model_name = get_default(server_config_cls, "model_name")
90
+ names = [n for n in aliases + [model_name] if isinstance(n, str)]
91
+ # Also register short name (after last /)
92
+ if model_name and "/" in model_name:
93
+ names.append(model_name.split("/")[-1])
94
+
95
+ def factory(uri: str | None, cls=server_config_cls) -> ConverterConfig:
96
+ server = cls()
97
+ client_config = server.client_config
98
+ # Override base_url if provided
99
+ if uri is not None:
100
+ client_config = client_config.model_copy(update={"base_url": uri})
101
+ return client_config
102
+
103
+ with self._lock:
104
+ for name in names:
105
+ self._registry[name] = factory
106
+
107
+ def get(self, model_name: str, uri: str | None = None) -> ConverterConfig:
108
+ """Get config for a model name (thread-safe). Returns default if not registered."""
109
+ with self._lock:
110
+ factory = self._registry.get(model_name)
111
+
112
+ if factory is not None:
113
+ return factory(uri)
84
114
  # Fallback to OpenAIConverterConfig for unregistered models
85
115
  if uri is not None:
86
- return OpenAIConverterConfig(
87
- llm_params=LLMParams(base_url=uri, model_name=model_name)
88
- )
89
- return OpenAIConverterConfig(llm_params=LLMParams(model_name=model_name))
116
+ return OpenAIConverterConfig(base_url=uri)
117
+ return OpenAIConverterConfig(model_name=model_name)
90
118
 
91
119
  def list_models(self) -> list[str]:
92
- """List all registered model names."""
93
- return list(self._registry.keys())
120
+ """List all registered model names (thread-safe)."""
121
+ with self._lock:
122
+ return list(self._registry.keys())
94
123
 
95
124
 
96
125
  # Global registry instance
97
126
  converter_config_registry = ConverterConfigRegistry()
127
+
128
+ # Register all server-backed converters through the server config
129
+ # This ensures model_name and default_model_name are consistently passed
130
+ for server_config_cls in SERVER_CONFIGS:
131
+ converter_config_registry.register_from_server(server_config_cls)
132
+
133
+ # External API configs (no server config - these are cloud APIs)
98
134
  GOOGLE_API_BASE_URL = (
99
135
  os.getenv("GOOGLE_API_BASE_URL")
100
136
  or "https://generativelanguage.googleapis.com/v1beta/openai/"
@@ -111,11 +147,10 @@ for gemini_model in [
111
147
  converter_config_registry.register(
112
148
  gemini_model,
113
149
  lambda uri=None, model=gemini_model: OpenAIConverterConfig(
114
- llm_params=LLMParams(
115
- model_name=model,
116
- base_url=GOOGLE_API_BASE_URL if uri is None else uri,
117
- api_key=os.getenv("GOOGLE_API_KEY"),
118
- )
150
+ model_name=model,
151
+ base_url=GOOGLE_API_BASE_URL if uri is None else uri,
152
+ api_key=os.getenv("GOOGLE_API_KEY"),
153
+ default_model_name=model,
119
154
  ),
120
155
  )
121
156
  for openai_model in [
@@ -126,45 +161,18 @@ for openai_model in [
126
161
  converter_config_registry.register(
127
162
  openai_model,
128
163
  lambda uri=None, model=openai_model: OpenAIConverterConfig(
129
- llm_params=LLMParams(
130
- model_name=model,
131
- base_url=None,
132
- api_key=os.getenv("OPENAI_API_KEY"),
133
- )
164
+ model_name=model,
165
+ base_url=None,
166
+ api_key=os.getenv("OPENAI_API_KEY"),
167
+ default_model_name=model,
134
168
  ),
135
169
  )
136
170
 
137
- for converter_config_cls in [
138
- ChandraConverterConfig,
139
- LightOnOCRConverterConfig,
140
- DotsOCRConverterConfig,
141
- PaddleOCRVLConverterConfig,
142
- NanonetOCR2ConverterConfig,
143
- HunyuanOCRConverterConfig,
144
- DeepSeekOCRConverterConfig,
145
- GraniteDoclingConverterConfig,
146
- OlmOCRConverterConfig,
147
- ]:
148
- aliases = get_default(converter_config_cls, "aliases") or []
149
- model_name = get_default(converter_config_cls, "model_name")
150
- names = [n for n in aliases + [model_name] if isinstance(n, str)]
151
- for name in names:
152
- converter_config_registry.register(
153
- name,
154
- lambda uri, cls=converter_config_cls: cls(
155
- llm_params=LLMParams(
156
- base_url=uri,
157
- model_name=DEFAULT_MODEL_NAME,
158
- api_key="",
159
- )
160
- ),
161
- )
162
- for converter_config_cls in [MinerUConverterConfig, DoclingConverterConfig]:
163
- aliases = get_default(converter_config_cls, "aliases") or []
164
- model_name = get_default(converter_config_cls, "model_name")
165
- names = [n for n in aliases + [model_name] if isinstance(n, str)]
166
- for name in names:
167
- converter_config_registry.register(
168
- name,
169
- lambda uri, cls=converter_config_cls: cls(base_url=uri),
170
- )
171
+ for mistral_model in ["mistral-ocr-latest", "mistral-ocr"]:
172
+ converter_config_registry.register(
173
+ mistral_model,
174
+ lambda uri=None, model=mistral_model: MistralOCRConverterConfig(
175
+ base_url="https://api.mistral.ai/v1" if uri is None else uri,
176
+ api_key=os.getenv("MISTRAL_API_KEY"),
177
+ ),
178
+ )
@@ -2,15 +2,23 @@ import os
2
2
  from typing import Callable
3
3
 
4
4
  from loguru import logger
5
- from pydantic import BaseModel, Field
5
+ from pydantic import Field
6
6
 
7
+ from .model_identity import ModelIdentityMixin
7
8
  from .utils import docker_server
8
9
 
9
10
 
10
- class DockerServerConfig(BaseModel):
11
- """Base configuration for deploying a Docker server."""
11
+ class DockerServerConfig(ModelIdentityMixin):
12
+ """Base configuration for deploying a Docker server.
13
+
14
+ Inherits from ModelIdentityMixin which provides:
15
+ - model_name: str
16
+ - default_model_name: str | None
17
+ - aliases: list[str]
18
+ - _create_client_kwargs(base_url): Helper for creating client configs
19
+ - get_all_names(): All names this model can be referenced by
20
+ """
12
21
 
13
- model_name: str
14
22
  docker_image: str
15
23
  dockerfile_dir: str | None = None
16
24
  command_args: list[str] = Field(default_factory=list)
@@ -27,7 +35,6 @@ class DockerServerConfig(BaseModel):
27
35
  environment: dict[str, str] = Field(default_factory=dict)
28
36
  volumes: dict[str, dict] | None = None
29
37
  entrypoint: str | None = None
30
- aliases: list[str] = Field(default_factory=list)
31
38
 
32
39
  class Config:
33
40
  extra = "allow"
@@ -84,22 +91,16 @@ class VLLMDockerServerConfig(DockerServerConfig):
84
91
  hf_home_folder: str | None = os.getenv("HF_HOME", None)
85
92
  add_model_key_to_server: bool = False
86
93
  container_port: int = 8000
87
- aliases: list[str] = Field(default_factory=list)
88
-
89
- @property
90
- def llm_params(self):
91
- from vlmparse.clients.openai_converter import LLMParams
92
-
93
- return LLMParams(
94
- base_url=f"http://localhost:{self.docker_port}{self.get_base_url_suffix()}",
95
- model_name=self.default_model_name,
96
- )
97
94
 
98
95
  @property
99
96
  def client_config(self):
100
97
  from vlmparse.clients.openai_converter import OpenAIConverterConfig
101
98
 
102
- return OpenAIConverterConfig(llm_params=self.llm_params)
99
+ return OpenAIConverterConfig(
100
+ **self._create_client_kwargs(
101
+ f"http://localhost:{self.docker_port}{self.get_base_url_suffix()}"
102
+ )
103
+ )
103
104
 
104
105
  def get_command(self) -> list[str]:
105
106
  """Build VLLM-specific command."""
@@ -171,41 +172,64 @@ class ConverterServer:
171
172
  def stop(self):
172
173
  """Stop the Docker server."""
173
174
  if self._server_context is not None:
174
- self._server_context.__exit__(None, None, None)
175
- self._server_context = None
176
- self._container = None
177
- self.base_url = None
175
+ try:
176
+ self._server_context.__exit__(None, None, None)
177
+ except Exception as e:
178
+ logger.warning(f"Error during server cleanup: {e}")
179
+ finally:
180
+ self._server_context = None
181
+ self._container = None
182
+ self.base_url = None
178
183
  logger.info("Server stopped")
179
184
 
180
185
  def __del__(self):
181
- """Automatically stop server when object is destroyed if auto_stop is True."""
182
- if self.auto_stop and self._server_context is not None:
183
- self.stop()
186
+ """Automatically stop server when object is destroyed if auto_stop is True.
187
+
188
+ Note: This is a fallback mechanism. Prefer using the context manager
189
+ or explicitly calling stop() for reliable cleanup.
190
+ """
191
+ try:
192
+ if self.auto_stop and self._server_context is not None:
193
+ self.stop()
194
+ except Exception:
195
+ pass # Suppress errors during garbage collection
184
196
 
185
197
 
186
198
  class DockerConfigRegistry:
187
- """Registry for mapping model names to their Docker configurations."""
199
+ """Registry for mapping model names to their Docker configurations.
200
+
201
+ Thread-safe registry that maps model names to their Docker configuration factories.
202
+ """
188
203
 
189
204
  def __init__(self):
190
- self._registry = dict()
205
+ import threading
206
+
207
+ self._registry: dict[str, Callable[[], DockerServerConfig | None]] = {}
208
+ self._lock = threading.RLock()
191
209
 
192
210
  def register(
193
211
  self, model_name: str, config_factory: Callable[[], DockerServerConfig | None]
194
212
  ):
195
- """Register a config factory for a model name."""
196
- self._registry[model_name] = config_factory
213
+ """Register a config factory for a model name (thread-safe)."""
214
+ with self._lock:
215
+ self._registry[model_name] = config_factory
197
216
 
198
217
  def get(self, model_name: str, default=False) -> DockerServerConfig | None:
199
- """Get config for a model name. Returns default if not registered."""
200
- if model_name not in self._registry:
201
- if default:
202
- return VLLMDockerServerConfig(model_name=model_name)
203
- return None
204
- return self._registry[model_name]()
218
+ """Get config for a model name (thread-safe). Returns default if not registered."""
219
+ with self._lock:
220
+ if model_name not in self._registry:
221
+ if default:
222
+ return VLLMDockerServerConfig(
223
+ model_name=model_name, default_model_name=DEFAULT_MODEL_NAME
224
+ )
225
+ return None
226
+ factory = self._registry[model_name]
227
+ return factory()
205
228
 
206
229
  def list_models(self) -> list[str]:
207
- """List all registered model names."""
208
- return list(self._registry.keys())
230
+ """List all registered model names (thread-safe)."""
231
+ with self._lock:
232
+ return list(self._registry.keys())
209
233
 
210
234
 
211
235
  # Global registry instance
@@ -0,0 +1,48 @@
1
+ """Model identity mixin for consistent model name handling between server and client configs."""
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class ModelIdentityMixin(BaseModel):
7
+ """Mixin providing model identity fields with validation.
8
+
9
+ This mixin ensures that model_name and default_model_name are consistently
10
+ passed from server configs to client configs.
11
+ """
12
+
13
+ model_name: str
14
+ default_model_name: str | None = None
15
+ aliases: list[str] = Field(default_factory=list)
16
+
17
+ def get_effective_model_name(self) -> str:
18
+ """Returns the model name to use for API calls."""
19
+ return self.default_model_name if self.default_model_name else self.model_name
20
+
21
+ def _create_client_kwargs(self, base_url: str) -> dict:
22
+ """Generate kwargs for client config with model identity.
23
+
24
+ Use this method in server configs to ensure consistent passing
25
+ of model_name and default_model_name to client configs.
26
+
27
+ Args:
28
+ base_url: The base URL for the client to connect to.
29
+
30
+ Returns:
31
+ Dictionary with base_url, model_name, and default_model_name.
32
+ """
33
+ return {
34
+ "base_url": base_url,
35
+ "model_name": self.model_name,
36
+ "default_model_name": self.get_effective_model_name(),
37
+ }
38
+
39
+ def get_all_names(self) -> list[str]:
40
+ """Get all names this model can be referenced by.
41
+
42
+ Returns:
43
+ List containing model_name, aliases, and short name (after last /).
44
+ """
45
+ names = [self.model_name] + self.aliases
46
+ if "/" in self.model_name:
47
+ names.append(self.model_name.split("/")[-1])
48
+ return [n for n in names if isinstance(n, str)]
vlmparse/utils.py CHANGED
@@ -19,8 +19,19 @@ def from_base64(base64_str: str):
19
19
  return Image.open(BytesIO(image_data))
20
20
 
21
21
 
22
- def get_file_paths(inputs: str | list[str]):
23
- # Expand file paths from glob patterns
22
+ def get_file_paths(inputs: str | list[str], raise_on_empty: bool = False) -> list[str]:
23
+ """Expand file paths from glob patterns.
24
+
25
+ Args:
26
+ inputs: A string or list of strings containing file paths, glob patterns, or directories.
27
+ raise_on_empty: If True, raise FileNotFoundError when no files are found.
28
+
29
+ Returns:
30
+ List of valid file paths.
31
+
32
+ Raises:
33
+ FileNotFoundError: If raise_on_empty is True and no files are found.
34
+ """
24
35
  file_paths = []
25
36
  if isinstance(inputs, str):
26
37
  inputs = [inputs]
@@ -36,6 +47,8 @@ def get_file_paths(inputs: str | list[str]):
36
47
  file_paths = [f for f in file_paths if os.path.exists(f) and os.path.isfile(f)]
37
48
 
38
49
  if not file_paths:
50
+ if raise_on_empty:
51
+ raise FileNotFoundError("No files found matching the input patterns")
39
52
  logger.error("No PDF files found matching the inputs patterns")
40
53
 
41
54
  return file_paths
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vlmparse
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Requires-Python: >=3.11.0
5
5
  Description-Content-Type: text/markdown
6
6
  License-File: LICENSE
@@ -54,6 +54,12 @@ Dynamic: license-file
54
54
 
55
55
  # vlmparse
56
56
 
57
+ <div align="center">
58
+
59
+ [\[📜 arXiv coming soon\]] | [[Dataset (🤗Hugging Face)]](https://huggingface.co/datasets/pulsia/fr-bench-pdf2md) | [[pypi]](https://pypi.org/project/vlmparse/) | [[vlmparse]](https://github.com/ld-lab-pulsia/vlmparse) | [[Benchmark]](https://github.com/ld-lab-pulsia/benchpdf2md)
60
+
61
+ </div>
62
+
57
63
  A unified wrapper for Vision Language Models (VLM) and OCR solutions to parse PDF documents into Markdown.
58
64
 
59
65
  Features:
@@ -209,3 +215,7 @@ with ConverterWithServer(model="mineru2.5") as converter_with_server:
209
215
  ```
210
216
 
211
217
  Note that if you pass an uri of a vllm server to `ConverterWithServer`, the model name is inferred automatically and no server is started.
218
+
219
+ ## Credits
220
+
221
+ This work was financed by La Poste and led by members of Probayes and OpenValue, two subsidiaries (filiales) of La Poste.
@@ -0,0 +1,38 @@
1
+ vlmparse/base_model.py,sha256=4U4UPe8SNArliKnUf8pp8zQugWYsnhg9okylt7mrW1U,381
2
+ vlmparse/build_doc.py,sha256=fb7awoqVN-6NBlKVkMFb1v1iTWcxne5QAyNaKYTyvM4,2275
3
+ vlmparse/cli.py,sha256=asew0JdpbgFZrZqnG-Bqh5A_DrXcP0XomLB3y3AgG6Y,12855
4
+ vlmparse/constants.py,sha256=DYaK7KtTW8p9MPb3iPvoP5H1r7ICRuIFo89P01q4uCI,184
5
+ vlmparse/converter.py,sha256=KKcXqrp3nJo3d7DXjHn3O2SklbsJ489rDY4NJ9O42Fs,8795
6
+ vlmparse/converter_with_server.py,sha256=A84l3YNal-hs2mMlER1sB29rddsO8MNOP2j9ts0ujtE,7280
7
+ vlmparse/registries.py,sha256=B4kxibP7XYbhL9bZ5gn21LQCPhHCYftAM4i0-xD9fRs,6469
8
+ vlmparse/utils.py,sha256=6Ff9OfAIVR-4_37QD5sifoNt_GmB3YUqgFwmIjuemtc,1727
9
+ vlmparse/clients/chandra.py,sha256=zAHjgI_MJ5FVGANHCG8KJQByaw6-zTS6CHXsCBA8TJI,13025
10
+ vlmparse/clients/deepseekocr.py,sha256=pKdNJD9v86BRn7YrXE6PGk_jQxnbZ_6UjgSUxgd3Su4,6859
11
+ vlmparse/clients/docling.py,sha256=BLtNAxVJR6qvPip4ZBP-se8IMNFSbJ-fWEGlTSwimK8,5310
12
+ vlmparse/clients/dotsocr.py,sha256=oAUzDMTObeW0sTy5sFl08O6GQPSTic5ITbJYh_45Z54,10414
13
+ vlmparse/clients/granite_docling.py,sha256=KYaEdgk3oD0TuYDKqTQ4o6IkXC-E3AIYJ2KYxnEsjWM,3595
14
+ vlmparse/clients/hunyuanocr.py,sha256=etpIiA28OoGW-o5pOGeBxOlUDjUQ4zcKXWnJ8ba44DU,1979
15
+ vlmparse/clients/lightonocr.py,sha256=ZWC12U6myDr_2EuOPYGyJYxpBachjOUtLrxS62A8mzg,2048
16
+ vlmparse/clients/mineru.py,sha256=9r6_JwCGB6-C-YKpoadUs7lSuDEJyBFkfjWV5WN-VrE,3599
17
+ vlmparse/clients/mistral_converter.py,sha256=_hEyK_2vM5LEwbt30bFodMrWJtavLsBDxCkVclNUF4M,2837
18
+ vlmparse/clients/nanonetocr.py,sha256=gTbD4OtuHiWd6Ack6Bx-anZM9P_aErfSHXwtymETvqM,1665
19
+ vlmparse/clients/olmocr.py,sha256=V4638WftLCTr5Q6ZRgWKKSPAhFYdpBw3izeuda6EKDQ,1966
20
+ vlmparse/clients/openai_converter.py,sha256=bckm33Pkvqul--DjfEEEI3evn4_va0CoQcigdpCCMGc,7746
21
+ vlmparse/clients/paddleocrvl.py,sha256=q3AgEWj0UyXGpSEVZISdfqv2PV_qY-uF498bL8U1tpg,1596
22
+ vlmparse/clients/prompts.py,sha256=-J60lqxgRzlkQ9VsQLxmWsIMaDt-gNqWqWoqHIw9CLc,4228
23
+ vlmparse/clients/pipe_utils/cleaner.py,sha256=oxBkBTOkluN1lmeNbzajRIe0_D__ZGwUOBaI_Ph0uxE,2396
24
+ vlmparse/clients/pipe_utils/html_to_md_conversion.py,sha256=cFFqzD2jCNw_968_eu3Wt--Ox7iJj2Rn5UoP_DZWosU,4112
25
+ vlmparse/clients/pipe_utils/utils.py,sha256=935ecIO446I0pstszE_1nrIPHn1Ffrxunq7fVd0dsd8,315
26
+ vlmparse/data_model/box.py,sha256=lJsh4qhjgYXZF5vTSJ1qMXD5GVlBi2_SBedBMlfJikU,16868
27
+ vlmparse/data_model/document.py,sha256=xheaMeStOj2c9GZKmdtxcEl_Dj44V5JyVp6JnTrSpH0,4615
28
+ vlmparse/servers/docker_server.py,sha256=FOIHU0_CDfyZ9UA285BrnUFuEMJRxbu-OzlByBa-P9s,7951
29
+ vlmparse/servers/model_identity.py,sha256=DkH7KQAAZA9Sn7eJEnaKfH54XSEI17aqD1ScqqkTBEk,1711
30
+ vlmparse/servers/utils.py,sha256=tIXhgbF9EVOJy2nYEguVq69gn9ATxtya_1F4wZSt68o,9454
31
+ vlmparse/st_viewer/fs_nav.py,sha256=7GNH68h2Loh5pQ64Pe72-D2cs2BLhqRXevEmKdFmPX0,1616
32
+ vlmparse/st_viewer/st_viewer.py,sha256=m2rQTtk5rlwErNmivNAg-4rkHkvNkvLhoJZxFQi7Dwk,2105
33
+ vlmparse-0.1.8.dist-info/licenses/LICENSE,sha256=3TKJHk8hPBR5dbLWZ3IpfCftl-_m-iyBwpYQGZYxj14,1080
34
+ vlmparse-0.1.8.dist-info/METADATA,sha256=dwu5tiTLuhVMYL-ZQCMNYW_MNlJu84V2us0aeRfrSpU,6048
35
+ vlmparse-0.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
36
+ vlmparse-0.1.8.dist-info/entry_points.txt,sha256=gD5berP6HwE2wNIkls-Lw5goiceA8uMgPEd7ifnFJXs,47
37
+ vlmparse-0.1.8.dist-info/top_level.txt,sha256=k4ni-GNH_iAX7liQEsk_KY_c3xgZgt8k9fsSs9IXLXs,9
38
+ vlmparse-0.1.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5