vlmparse 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vlmparse/cli.py CHANGED
@@ -4,13 +4,24 @@ from loguru import logger
4
4
 
5
5
 
6
6
  class DParseCLI:
7
- def serve(self, model: str, port: int | None = None, gpus: str | None = None):
7
+ """Parsing of pdf to text using VLMs: typ in vlmparse to get the command lists, then `vlmparse <command> --help` to get help on a specific command."""
8
+
9
+ def serve(
10
+ self,
11
+ model: str,
12
+ port: int | None = None,
13
+ gpus: str | None = None,
14
+ vllm_kwargs: dict | None = None,
15
+ forget_predefined_vllm_kwargs: bool = False,
16
+ ):
8
17
  """Deploy a VLLM server in a Docker container.
9
18
 
10
19
  Args:
11
20
  model: Model name
12
21
  port: VLLM server port (default: 8056)
13
22
  gpus: Comma-separated GPU device IDs (e.g., "0" or "0,1,2"). If not specified, all GPUs will be used.
23
+ vllm_kwargs: Additional keyword arguments to pass to the VLLM server.
24
+ forget_predefined_vllm_kwargs: If True, the predefined VLLM kwargs from the docker config will be replaced by vllm_kwargs otherwise the predefined kwargs will be updated with vllm_kwargs with a risk of collision of argument names.
14
25
  """
15
26
  if port is None:
16
27
  port = 8056
@@ -32,6 +43,10 @@ class DParseCLI:
32
43
  docker_config.gpu_device_ids = [g.strip() for g in str(gpus).split(",")]
33
44
  server = docker_config.get_server(auto_stop=False)
34
45
 
46
+ if server is None:
47
+ logger.error(f"Model server not found for model: {model}")
48
+ return
49
+
35
50
  # Deploy server and leave it running (cleanup=False)
36
51
  logger.info(
37
52
  f"Deploying VLLM server for {docker_config.model_name} on port {port}..."
@@ -54,6 +69,8 @@ class DParseCLI:
54
69
  with_vllm_server: bool = False,
55
70
  concurrency: int = 10,
56
71
  dpi: int | None = None,
72
+ vllm_kwargs: dict | None = None,
73
+ debug: bool = False,
57
74
  ):
58
75
  """Parse PDF documents and save results.
59
76
 
@@ -67,109 +84,22 @@ class DParseCLI:
67
84
  mode: Output mode - "document" (save as JSON zip), "md" (save as markdown file), "md_page" (save as folder of markdown pages)
68
85
  with_vllm_server: If True, a local VLLM server will be deployed if the model is not found in the registry. Note that if the model is in the registry and the uri is None, the server will be anyway deployed.
69
86
  dpi: DPI to use for the conversion. If not specified, the default DPI will be used.
87
+ vllm_kwargs: Additional keyword arguments to pass to the VLLM server.
88
+ debug: If True, run in debug mode (single-threaded, no concurrency)
70
89
  """
71
90
  from vlmparse.converter_with_server import ConverterWithServer
72
91
 
73
- converter_with_server = ConverterWithServer(
92
+ with ConverterWithServer(
74
93
  model=model,
75
94
  uri=uri,
76
95
  gpus=gpus,
77
96
  with_vllm_server=with_vllm_server,
78
97
  concurrency=concurrency,
79
- )
80
-
81
- return converter_with_server.parse(
82
- inputs=inputs, out_folder=out_folder, mode=mode, dpi=dpi
83
- )
84
- # from vlmparse.registries import converter_config_registry
85
-
86
- # # Infer model from URI if provided
87
- # if uri is not None and model is None:
88
- # import docker
89
-
90
- # try:
91
- # docker_client = docker.from_env()
92
- # containers = docker_client.containers.list()
93
- # for container in containers:
94
- # # Check both exact match and match with/without trailing slash
95
- # container_uri = container.labels.get("vlmparse_uri", "")
96
- # if container_uri and (
97
- # container_uri == uri
98
- # or container_uri.rstrip("/") == uri.rstrip("/")
99
- # ):
100
- # inferred_model = container.labels.get("vlmparse_model_name")
101
- # if inferred_model:
102
- # logger.info(
103
- # f"Inferred model {inferred_model} from URI {uri}"
104
- # )
105
- # model = inferred_model
106
- # break
107
- # except Exception:
108
- # # If Docker is not available or fails, just proceed with provided arguments
109
- # pass
110
-
111
- # if mode not in ["document", "md", "md_page"]:
112
- # logger.error(f"Invalid mode: {mode}. Must be one of: document, md, md_page")
113
- # return
114
-
115
- # # Expand file paths from glob patterns
116
- # file_paths = []
117
- # if isinstance(inputs, str):
118
- # inputs = [inputs]
119
- # for pattern in inputs:
120
- # if "*" in pattern or "?" in pattern:
121
- # file_paths.extend(glob(pattern, recursive=True))
122
- # elif os.path.isdir(pattern):
123
- # file_paths.extend(glob(os.path.join(pattern, "*.pdf"), recursive=True))
124
- # elif os.path.isfile(pattern):
125
- # file_paths.append(pattern)
126
- # else:
127
- # logger.error(f"Invalid input: {pattern}")
128
-
129
- # # Filter to only existing PDF files
130
- # file_paths = [f for f in file_paths if os.path.exists(f) and f.endswith(".pdf")]
131
-
132
- # if not file_paths:
133
- # logger.error("No PDF files found matching the inputs patterns")
134
- # return
135
-
136
- # logger.info(f"Processing {len(file_paths)} files with {model} converter")
137
-
138
- # gpu_device_ids = None
139
- # if gpus is not None:
140
- # gpu_device_ids = [g.strip() for g in gpus.split(",")]
141
-
142
- # if uri is None:
143
- # from vlmparse.registries import docker_config_registry
144
-
145
- # docker_config = docker_config_registry.get(model, default=with_vllm_server)
146
-
147
- # if docker_config is not None:
148
- # docker_config.gpu_device_ids = gpu_device_ids
149
- # server = docker_config.get_server(auto_stop=True)
150
- # server.start()
151
-
152
- # client = docker_config.get_client(
153
- # save_folder=out_folder, save_mode=mode
154
- # )
155
- # else:
156
- # client = converter_config_registry.get(model).get_client(
157
- # save_folder=out_folder, save_mode=mode
158
- # )
159
-
160
- # else:
161
- # client_config = converter_config_registry.get(model, uri=uri)
162
- # client = client_config.get_client(save_folder=out_folder, save_mode=mode)
163
- # client.num_concurrent_files = concurrency
164
- # client.num_concurrent_pages = concurrency
165
- # if dpi is not None:
166
- # client.config.dpi = int(dpi)
167
- # documents = client.batch(file_paths)
168
-
169
- # if documents is not None:
170
- # logger.info(f"Processed {len(documents)} documents to {out_folder}")
171
- # else:
172
- # logger.info(f"Processed {len(file_paths)} documents to {out_folder}")
98
+ vllm_kwargs=vllm_kwargs,
99
+ ) as converter_with_server:
100
+ return converter_with_server.parse(
101
+ inputs=inputs, out_folder=out_folder, mode=mode, dpi=dpi, debug=debug
102
+ )
173
103
 
174
104
  def list(self):
175
105
  """List all containers whose name begins with vlmparse."""
@@ -194,7 +194,7 @@ class ChandraConverterConfig(OpenAIConverterConfig):
194
194
  model_name: str = "datalab-to/chandra"
195
195
  prompt_type: str = "ocr" # Default prompt type
196
196
  bbox_scale: int = 1024
197
- max_retries: int = 6
197
+ max_retries: int = 0
198
198
  max_failure_retries: int = None
199
199
  completion_kwargs: dict = Field(
200
200
  default_factory=lambda: {
@@ -15,6 +15,57 @@ from vlmparse.servers.docker_server import VLLMDockerServerConfig
15
15
  from vlmparse.utils import to_base64
16
16
 
17
17
 
18
+ class DeepSeekOCRDockerServerConfig(VLLMDockerServerConfig):
19
+ """Configuration for DeepSeekOCR model."""
20
+
21
+ model_name: str = "deepseek-ai/DeepSeek-OCR"
22
+ command_args: list[str] = Field(
23
+ default_factory=lambda: [
24
+ "--limit-mm-per-prompt",
25
+ '{"image": 1}',
26
+ "--async-scheduling",
27
+ "--logits_processors",
28
+ "vllm.model_executor.models.deepseek_ocr:NGramPerReqLogitsProcessor",
29
+ "--no-enable-prefix-caching",
30
+ "--mm-processor-cache-gb",
31
+ "0",
32
+ ]
33
+ )
34
+ aliases: list[str] = Field(default_factory=lambda: ["deepseekocr"])
35
+
36
+ @property
37
+ def client_config(self):
38
+ return DeepSeekOCRConverterConfig(llm_params=self.llm_params)
39
+
40
+
41
+ class DeepSeekOCRConverterConfig(OpenAIConverterConfig):
42
+ """DeepSeekOCR converter - backward compatibility alias."""
43
+
44
+ model_name: str = "deepseek-ai/DeepSeek-OCR"
45
+ aliases: list[str] = Field(default_factory=lambda: ["deepseekocr"])
46
+
47
+ prompt_mode: Literal["layout", "ocr"] = "ocr"
48
+ completion_kwargs: dict | None = {
49
+ "temperature": 0.0,
50
+ "max_tokens": 8181,
51
+ "extra_body": {
52
+ "skip_special_tokens": False,
53
+ # args used to control custom logits processor
54
+ "vllm_xargs": {
55
+ "ngram_size": 30,
56
+ "window_size": 90,
57
+ # whitelist: <td>, </td>
58
+ "whitelist_token_ids": [128821, 128822],
59
+ },
60
+ },
61
+ }
62
+ dpi: int = 200
63
+ aliases: list[str] = Field(default_factory=lambda: ["deepseekocr"])
64
+
65
+ def get_client(self, **kwargs) -> "DeepSeekOCRConverterClient":
66
+ return DeepSeekOCRConverterClient(config=self, **kwargs)
67
+
68
+
18
69
  def re_match(text):
19
70
  pattern = r"(<\|ref\|>(.*?)<\|/ref\|><\|det\|>(.*?)<\|/det\|>)"
20
71
  matches = re.findall(pattern, text, re.DOTALL)
@@ -150,54 +201,3 @@ class DeepSeekOCRConverterClient(OpenAIConverterClient):
150
201
  logger.debug(page.text)
151
202
 
152
203
  return page
153
-
154
-
155
- class DeepSeekOCRDockerServerConfig(VLLMDockerServerConfig):
156
- """Configuration for DeepSeekOCR model."""
157
-
158
- model_name: str = "deepseek-ai/DeepSeek-OCR"
159
- command_args: list[str] = Field(
160
- default_factory=lambda: [
161
- "--limit-mm-per-prompt",
162
- '{"image": 1}',
163
- "--async-scheduling",
164
- "--logits_processors",
165
- "vllm.model_executor.models.deepseek_ocr:NGramPerReqLogitsProcessor",
166
- "--no-enable-prefix-caching",
167
- "--mm-processor-cache-gb",
168
- "0",
169
- ]
170
- )
171
- aliases: list[str] = Field(default_factory=lambda: ["deepseekocr"])
172
-
173
- @property
174
- def client_config(self):
175
- return DeepSeekOCRConverterConfig(llm_params=self.llm_params)
176
-
177
-
178
- class DeepSeekOCRConverterConfig(OpenAIConverterConfig):
179
- """DeepSeekOCR converter - backward compatibility alias."""
180
-
181
- model_name: str = "deepseek-ai/DeepSeek-OCR"
182
- aliases: list[str] = Field(default_factory=lambda: ["deepseekocr"])
183
-
184
- prompt_mode: Literal["layout", "ocr"] = "ocr"
185
- completion_kwargs: dict | None = {
186
- "temperature": 0.0,
187
- "max_tokens": 8181,
188
- "extra_body": {
189
- "skip_special_tokens": False,
190
- # args used to control custom logits processor
191
- "vllm_xargs": {
192
- "ngram_size": 30,
193
- "window_size": 90,
194
- # whitelist: <td>, </td>
195
- "whitelist_token_ids": [128821, 128822],
196
- },
197
- },
198
- }
199
- dpi: int = 200
200
- aliases: list[str] = Field(default_factory=lambda: ["deepseekocr"])
201
-
202
- def get_client(self, **kwargs) -> "DeepSeekOCRConverterClient":
203
- return DeepSeekOCRConverterClient(config=self, **kwargs)
@@ -237,7 +237,7 @@ class DotsOCRConverter(OpenAIConverterClient):
237
237
  )
238
238
  prompt = self.PROMPTS[prompt_mode]
239
239
 
240
- response = await self._async_inference_with_vllm(image, prompt)
240
+ response, usage = await self._async_inference_with_vllm(image, prompt)
241
241
 
242
242
  if prompt_mode in ["prompt_layout_all_en"]:
243
243
  try:
@@ -248,17 +248,17 @@ class DotsOCRConverter(OpenAIConverterClient):
248
248
  image.width,
249
249
  image.height,
250
250
  )
251
- return {}, cells, False
251
+ return {}, cells, False, usage
252
252
  except Exception as e:
253
253
  logger.warning(f"cells post process error: {e}, returning raw response")
254
- return {}, response, True
254
+ return {}, response, True, usage
255
255
  else:
256
- return {}, response, None
256
+ return {}, response, None, usage
257
257
 
258
258
  async def async_call_inside_page(self, page: Page) -> Page:
259
259
  image = page.image
260
260
 
261
- _, response, _ = await self._parse_image_vllm(
261
+ _, response, _, usage = await self._parse_image_vllm(
262
262
  image, prompt_mode=self.config.prompt_mode
263
263
  )
264
264
  logger.info("Response: " + str(response))
@@ -283,4 +283,8 @@ class DotsOCRConverter(OpenAIConverterClient):
283
283
  text = clean_response(response)
284
284
  text = html_to_md_keep_tables(text)
285
285
  page.text = text
286
+
287
+ page.completion_tokens = usage.completion_tokens
288
+ page.prompt_tokens = usage.prompt_tokens
289
+ page.reasoning_tokens = usage.reasoning_tokens
286
290
  return page
@@ -39,7 +39,8 @@ class HunyuanOCRConverterConfig(OpenAIConverterConfig):
39
39
  completion_kwargs: dict | None = {
40
40
  "temperature": 0.0,
41
41
  "extra_body": {"top_k": 1, "repetition_penalty": 1.0},
42
+ "max_completion_tokens": 16384, # max token len used in training according to the technical report is 32000, but in practice the model breaks earlier
42
43
  }
43
- max_image_size: int | None = 1540
44
44
  dpi: int = 200
45
45
  aliases: list[str] = Field(default_factory=lambda: ["hunyuanocr"])
46
+ stream: bool = True
@@ -52,24 +52,22 @@ class MinerUConverter(BaseConverter):
52
52
 
53
53
  config: MinerUConverterConfig
54
54
 
55
- def __init__(self, config: MinerUConverterConfig, **kwargs):
56
- super().__init__(config=config, **kwargs)
57
- from httpx import AsyncClient
58
-
59
- self.client = AsyncClient(base_url=config.base_url, timeout=config.timeout)
60
-
61
55
  async def _async_inference_with_api(self, image) -> list:
62
56
  """Run async inference with MinerU API."""
57
+ from httpx import AsyncClient
63
58
 
64
- img_byte_arr = await asyncio.to_thread(to_bytes_io, image)
65
- response = await self.client.post(
66
- "process-image",
67
- files={"image": ("image.png", img_byte_arr, "image/png")},
68
- )
59
+ async with AsyncClient(
60
+ base_url=self.config.base_url, timeout=self.config.timeout
61
+ ) as client:
62
+ img_byte_arr = await asyncio.to_thread(to_bytes_io, image)
63
+ response = await client.post(
64
+ "process-image",
65
+ files={"image": ("image.png", img_byte_arr, "image/png")},
66
+ )
69
67
 
70
- response.raise_for_status()
68
+ response.raise_for_status()
71
69
 
72
- res = orjson.loads(response.content)
70
+ res = orjson.loads(response.content)
73
71
 
74
72
  return res
75
73
 
@@ -41,6 +41,6 @@ class OlmOCRConverterConfig(OpenAIConverterConfig):
41
41
  "temperature": 0.1,
42
42
  "max_tokens": 8000,
43
43
  }
44
- max_image_size: int | None = 1288
44
+ # max_image_size: int | None = 1288
45
45
  dpi: int = 200
46
46
  aliases: list[str] = Field(default_factory=lambda: ["olmocr-2-fp8"])
@@ -40,6 +40,8 @@ def get_llm_params(model_name: str, uri: str | None = None):
40
40
  ]:
41
41
  base_url = None
42
42
  api_key = os.getenv("OPENAI_API_KEY")
43
+ if api_key is None:
44
+ raise ValueError("OPENAI_API_KEY environment variable not set")
43
45
  else:
44
46
  if model_name in [
45
47
  "gemini-2.5-flash-lite",
@@ -48,6 +50,8 @@ def get_llm_params(model_name: str, uri: str | None = None):
48
50
  ]:
49
51
  base_url = GOOGLE_API_BASE_URL
50
52
  api_key = os.getenv("GOOGLE_API_KEY")
53
+ if api_key is None:
54
+ raise ValueError("GOOGLE_API_KEY environment variable not set")
51
55
  else:
52
56
  return None
53
57
  return LLMParams(base_url=base_url, model_name=model_name, api_key=api_key)
@@ -97,7 +101,7 @@ class OpenAIConverterClient(BaseConverter):
97
101
 
98
102
  async def _get_chat_completion(
99
103
  self, messages: list[dict], completion_kwargs: dict | None = None
100
- ) -> str:
104
+ ) -> tuple[str, "CompletionUsage"]: # noqa: F821
101
105
  """Helper to handle chat completion with optional streaming."""
102
106
  if completion_kwargs is None:
103
107
  completion_kwargs = self.config.completion_kwargs
@@ -126,7 +130,8 @@ class OpenAIConverterClient(BaseConverter):
126
130
  "Response is None, finish reason: "
127
131
  + response_obj.choices[0].finish_reason
128
132
  )
129
- return response_obj.choices[0].message.content
133
+
134
+ return response_obj.choices[0].message.content, response_obj.usage
130
135
 
131
136
  async def async_call_inside_page(self, page: Page) -> Page:
132
137
  """Process a single page using OpenAI-compatible API."""
@@ -163,12 +168,16 @@ class OpenAIConverterClient(BaseConverter):
163
168
  },
164
169
  ]
165
170
 
166
- response = await self._get_chat_completion(messages)
167
- logger.info("Response: " + str(response))
171
+ response, usage = await self._get_chat_completion(messages)
172
+ logger.debug("Response: " + str(response))
168
173
  page.raw_response = response
169
174
  text = clean_response(response)
170
175
 
171
176
  text = html_to_md_keep_tables(text)
172
177
  page.text = text
178
+ page.prompt_tokens = usage.prompt_tokens
179
+ page.completion_tokens = usage.completion_tokens
180
+ if hasattr(usage, "reasoning_tokens"):
181
+ page.reasoning_tokens = usage.reasoning_tokens
173
182
 
174
183
  return page
@@ -42,7 +42,8 @@ class PaddleOCRVLConverterConfig(OpenAIConverterConfig):
42
42
  postprompt: str | None = TASKS["ocr"]
43
43
  completion_kwargs: dict | None = {
44
44
  "temperature": 0.0,
45
+ "max_completion_tokens": 16384,
45
46
  }
46
- max_image_size: int | None = 1540
47
47
  dpi: int = 200
48
48
  aliases: list[str] = Field(default_factory=lambda: ["paddleocrvl"])
49
+ stream: bool = True
@@ -12,28 +12,35 @@ from vlmparse.utils import get_file_paths
12
12
  class ConverterWithServer:
13
13
  def __init__(
14
14
  self,
15
- model: str,
15
+ model: str | None = None,
16
16
  uri: str | None = None,
17
17
  gpus: str | None = None,
18
18
  port: int | None = None,
19
19
  with_vllm_server: bool = False,
20
20
  concurrency: int = 10,
21
+ vllm_kwargs: dict | None = None,
22
+ forget_predefined_vllm_kwargs: bool = False,
21
23
  ):
22
- from vlmparse.registries import (
23
- converter_config_registry,
24
- docker_config_registry,
25
- )
26
-
27
24
  self.model = model
28
25
  self.uri = uri
29
26
  self.port = port
30
27
  self.gpus = gpus
31
28
  self.with_vllm_server = with_vllm_server
32
29
  self.concurrency = concurrency
30
+ self.vllm_kwargs = vllm_kwargs
31
+ self.forget_predefined_vllm_kwargs = forget_predefined_vllm_kwargs
32
+ self.server = None
33
+ self.client = None
33
34
 
34
35
  if self.uri is not None and self.model is None:
35
36
  self.model = get_model_from_uri(self.uri)
36
37
 
38
+ def start_server_and_client(self):
39
+ from vlmparse.registries import (
40
+ converter_config_registry,
41
+ docker_config_registry,
42
+ )
43
+
37
44
  gpu_device_ids = None
38
45
  if self.gpus is not None:
39
46
  gpu_device_ids = [g.strip() for g in self.gpus.split(",")]
@@ -47,7 +54,12 @@ class ConverterWithServer:
47
54
  if self.port is not None:
48
55
  docker_config.docker_port = self.port
49
56
  docker_config.gpu_device_ids = gpu_device_ids
57
+ docker_config.update_command_args(
58
+ self.vllm_kwargs,
59
+ forget_predefined_vllm_kwargs=self.forget_predefined_vllm_kwargs,
60
+ )
50
61
  self.server = docker_config.get_server(auto_stop=True)
62
+
51
63
  self.server.start()
52
64
 
53
65
  self.client = docker_config.get_client()
@@ -59,6 +71,17 @@ class ConverterWithServer:
59
71
 
60
72
  self.client = client_config.get_client()
61
73
 
74
+ def stop_server(self):
75
+ if self.server is not None and self.server.auto_stop:
76
+ self.server.stop()
77
+
78
+ def __enter__(self):
79
+ self.start_server_and_client()
80
+ return self
81
+
82
+ def __exit__(self, exc_type, exc_value, traceback):
83
+ self.stop_server()
84
+
62
85
  def parse(
63
86
  self,
64
87
  inputs: str | list[str],
@@ -68,6 +91,9 @@ class ConverterWithServer:
68
91
  debug: bool = False,
69
92
  retrylast: bool = False,
70
93
  ):
94
+ assert (
95
+ self.client is not None
96
+ ), "Client not initialized. Call start_server_and_client() first."
71
97
  file_paths = get_file_paths(inputs)
72
98
  assert (
73
99
  out_folder is not None
@@ -119,5 +145,5 @@ class ConverterWithServer:
119
145
 
120
146
  return documents
121
147
 
122
- def get_out_folder(self) -> Path:
148
+ def get_out_folder(self) -> str | None:
123
149
  return self.client.save_folder
@@ -41,6 +41,10 @@ class Page(VLMParseBaseModel):
41
41
  buffer_image: Optional[Image.Image | str | dict] = None
42
42
  latency: Optional[float] = None
43
43
  """Time taken to process the page in seconds."""
44
+ prompt_tokens: Optional[int] = None
45
+ completion_tokens: Optional[int] = None
46
+ """Include reasoning tokens"""
47
+ reasoning_tokens: Optional[int] = None
44
48
 
45
49
  @property
46
50
  def image(self):
@@ -66,7 +70,7 @@ class Page(VLMParseBaseModel):
66
70
 
67
71
  image = self.image
68
72
 
69
- if layout:
73
+ if layout and image is not None:
70
74
  if self.items is None:
71
75
  return image
72
76
  items = self.items
@@ -85,6 +89,9 @@ class Page(VLMParseBaseModel):
85
89
  )
86
90
  return image
87
91
 
92
+ def to_markdown(self, **kwargs):
93
+ return self.text if self.text is not None else ""
94
+
88
95
 
89
96
  class Document(VLMParseBaseModel):
90
97
  file_path: str
@@ -104,6 +111,9 @@ class Document(VLMParseBaseModel):
104
111
  page.error is not None for page in self.pages
105
112
  )
106
113
 
114
+ def to_markdown(self, **kwargs):
115
+ return "\n\n".join([page.to_markdown(**kwargs) for page in self.pages])
116
+
107
117
  def to_zip(
108
118
  self,
109
119
  file_path,
vlmparse/registries.py CHANGED
@@ -77,9 +77,7 @@ class ConverterConfigRegistry:
77
77
  """Register a config factory for a model name."""
78
78
  self._registry[model_name] = config_factory
79
79
 
80
- def get(
81
- self, model_name: str, uri: str | None = None
82
- ) -> OpenAIConverterConfig | None:
80
+ def get(self, model_name: str, uri: str | None = None) -> OpenAIConverterConfig:
83
81
  """Get config for a model name. Returns default if not registered."""
84
82
  if model_name in self._registry:
85
83
  return self._registry[model_name](uri=uri)
@@ -47,6 +47,20 @@ class DockerServerConfig(BaseModel):
47
47
  """Build command for container. Override in subclasses for specific logic."""
48
48
  return self.command_args if self.command_args else None
49
49
 
50
+ def update_command_args(
51
+ self,
52
+ vllm_kwargs: dict | None = None,
53
+ forget_predefined_vllm_kwargs: bool = False,
54
+ ) -> list[str]:
55
+ if vllm_kwargs is not None:
56
+ new_kwargs = [f"--{k}={v}" for k, v in vllm_kwargs.items()]
57
+ if forget_predefined_vllm_kwargs:
58
+ self.command_args = new_kwargs
59
+ else:
60
+ self.command_args.extend(new_kwargs)
61
+
62
+ return self.command_args
63
+
50
64
  def get_volumes(self) -> dict | None:
51
65
  """Setup volumes for container. Override in subclasses for specific logic."""
52
66
  return self.volumes
@@ -144,7 +158,7 @@ class ConverterServer:
144
158
  """Start the Docker server."""
145
159
  if self._server_context is not None:
146
160
  logger.warning("Server already started")
147
- return self.base_url
161
+ return self.base_url, self._container
148
162
 
149
163
  # Use the generic docker_server for all server types
150
164
  self._server_context = docker_server(config=self.config, cleanup=self.auto_stop)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vlmparse
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Requires-Python: >=3.11.0
5
5
  Description-Content-Type: text/markdown
6
6
  License-File: LICENSE
@@ -199,11 +199,13 @@ server.stop()
199
199
  ```
200
200
 
201
201
 
202
- Converter with automatic server deployment:
202
+ Converter with automatic server management:
203
203
 
204
204
  ```python
205
205
  from vlmparse.converter_with_server import ConverterWithServer
206
206
 
207
- converter_with_server = ConverterWithServer(model="mineru2.5")
208
- documents = converter_with_server.parse(inputs=["file1.pdf", "file2.pdf"], out_folder="./output")
207
+ with ConverterWithServer(model="mineru2.5") as converter_with_server:
208
+ documents = converter_with_server.parse(inputs=["file1.pdf", "file2.pdf"], out_folder="./output")
209
209
  ```
210
+
211
+ Note that if you pass an uri of a vllm server to `ConverterWithServer`, the model name is inferred automatically and no server is started.
@@ -1,36 +1,36 @@
1
1
  vlmparse/base_model.py,sha256=4U4UPe8SNArliKnUf8pp8zQugWYsnhg9okylt7mrW1U,381
2
2
  vlmparse/build_doc.py,sha256=LAWrnFrqamN5PwJo57AUtQOPrMFGnCGw4gBjEKZ6pYo,2127
3
- vlmparse/cli.py,sha256=tQma1IkOsFnqPKqqHVO1PJh18n1w82gp4ewA7oraJkE,15855
3
+ vlmparse/cli.py,sha256=gY45YZe5LanN-ozG2vVtOaB2qyNWpjO2DvPmJeBi_wA,13045
4
4
  vlmparse/constants.py,sha256=7-47S01n4MI2ebR09bpdOo3_P16d-z-NVGsm6KJP8ls,110
5
5
  vlmparse/converter.py,sha256=F0JSY9sFYUggCvaUCb27kKGJJpnZKW2FStMDVJoIOeQ,7383
6
- vlmparse/converter_with_server.py,sha256=G393O7vU_lJz6Vz-qYVkrjFhf0Vmpjjl8OjPKQe2blU,3928
7
- vlmparse/registries.py,sha256=6bEUKTkTjc8C7c1R1ZvAHSF5NCXmAuhNpw0qNnuQ7-A,5818
6
+ vlmparse/converter_with_server.py,sha256=62kcEp0NjzDR2vVmEfCeeLlwbb8E3sWcseb2jjK7DpM,4861
7
+ vlmparse/registries.py,sha256=yBVrrhy61rSoLwdNV-z0C4lqIpTbLoWab3V6u7aSyNM,5797
8
8
  vlmparse/utils.py,sha256=rcVrtPiQVj_8HAmFQOu___72uYIapp_X89yxrMNCBow,1236
9
- vlmparse/clients/chandra.py,sha256=zfu-A6Slh-fIAyrtrlVoCb6QHLBimnimefap_K9YwYw,9775
10
- vlmparse/clients/deepseekocr.py,sha256=rQvaOaPPoDiZ0MzXqfqqH9BgUBfjmlfHu3NlMjSDgiQ,6501
9
+ vlmparse/clients/chandra.py,sha256=EulsCZdwOtm0pQ6CDm320U96k8aWFN4wKqCm1Xo7VCE,9775
10
+ vlmparse/clients/deepseekocr.py,sha256=Uw6tPvP2KVsPDlz1ZUgYdbgQSjmFPuYeFDrGMMOTBAo,6501
11
11
  vlmparse/clients/docling.py,sha256=SAkLsqseuWfkuiel8FWR1G0Z5s-SZU3dE2JbsOvF4SA,5328
12
- vlmparse/clients/dotsocr.py,sha256=w2T-xkhlw1AfT-CUYoF0ectr2jDYHe9239B24XKB1UQ,10139
12
+ vlmparse/clients/dotsocr.py,sha256=uGJoYEiDkP3-rmfdkAnMeAX-T4RZyEPoh6jmow5_-J8,10336
13
13
  vlmparse/clients/granite_docling.py,sha256=EQpsv5qSJG0HtMSacmJStER2sq4TGf1EMU5_NmJsl4g,4634
14
- vlmparse/clients/hunyuanocr.py,sha256=Xw0Q1l-3pQzaEgFngnfM8vrSWpnT3I99QvDaGZ8XooM,1712
14
+ vlmparse/clients/hunyuanocr.py,sha256=UFqaS4b8UM9EtizyrZIxlqcYlESmxm8xrQZP7lL6tkE,1857
15
15
  vlmparse/clients/lightonocr.py,sha256=wx1Im8Z3wlRWwYbPqnSd3LqTtdAU8CnX5mzu1BuCUY8,1314
16
- vlmparse/clients/mineru.py,sha256=bilDPcUoLk2rcFVqMk4q2Hx2txilc3GDUbjAEoMM_BI,3671
16
+ vlmparse/clients/mineru.py,sha256=6jZ1sKn2kGwUvD8gVs4PqEDH7uUXYK8pAB5Fr1JeqnY,3617
17
17
  vlmparse/clients/nanonetocr.py,sha256=BT5vaeerCsK5agvOaHK3NvLUqWd1FfDmrMmDYbp646I,1543
18
- vlmparse/clients/olmocr.py,sha256=mQEDpfyLY8a80Zlps5mG0QaWytIgnNQZVEVWKWjPIjk,1849
19
- vlmparse/clients/openai_converter.py,sha256=j2H0iAQTADRRpu1Zy1b-1OFfWyXuqCvrQKy2UcwggTA,5696
20
- vlmparse/clients/paddleocrvl.py,sha256=tmaqg3boV4edywiiiNiNiI3dBHi111wz4dFb52OISXw,1376
18
+ vlmparse/clients/olmocr.py,sha256=A4Vl0meYpU5QPTML_OxyyRM07xCxtfrMZedgGMYEcuU,1851
19
+ vlmparse/clients/openai_converter.py,sha256=nMKJeWH43UxHMMLns3wjX0pYjU5Xnai6IYxFmS9I63s,6193
20
+ vlmparse/clients/paddleocrvl.py,sha256=qFBDj_UQocyq3WCh24tUOx9Ud7S9DfSm-1n3ztikY2s,1402
21
21
  vlmparse/clients/prompts.py,sha256=-J60lqxgRzlkQ9VsQLxmWsIMaDt-gNqWqWoqHIw9CLc,4228
22
22
  vlmparse/clients/pipe_utils/cleaner.py,sha256=oxBkBTOkluN1lmeNbzajRIe0_D__ZGwUOBaI_Ph0uxE,2396
23
23
  vlmparse/clients/pipe_utils/html_to_md_conversion.py,sha256=cFFqzD2jCNw_968_eu3Wt--Ox7iJj2Rn5UoP_DZWosU,4112
24
24
  vlmparse/clients/pipe_utils/utils.py,sha256=935ecIO446I0pstszE_1nrIPHn1Ffrxunq7fVd0dsd8,315
25
25
  vlmparse/data_model/box.py,sha256=lJsh4qhjgYXZF5vTSJ1qMXD5GVlBi2_SBedBMlfJikU,16868
26
- vlmparse/data_model/document.py,sha256=pdCZvWzRFkez53ZJpNaB4ezUW-OVUlbR3_SBmmgVzGQ,4217
27
- vlmparse/servers/docker_server.py,sha256=qOoZcWSHrK7kK7tAL61RJSW-Jmee93It2SEfWG3jGrc,6633
26
+ vlmparse/data_model/document.py,sha256=xheaMeStOj2c9GZKmdtxcEl_Dj44V5JyVp6JnTrSpH0,4615
27
+ vlmparse/servers/docker_server.py,sha256=FBW2TvtUHUQNwj0rBP92shvoiJCGlc_oAKQBXN8260E,7114
28
28
  vlmparse/servers/utils.py,sha256=qy2-rnQTCQKt6CeTV5H74tvRTXyzBV2KswQiYW8Tf-k,8908
29
29
  vlmparse/st_viewer/fs_nav.py,sha256=7GNH68h2Loh5pQ64Pe72-D2cs2BLhqRXevEmKdFmPX0,1616
30
30
  vlmparse/st_viewer/st_viewer.py,sha256=m2rQTtk5rlwErNmivNAg-4rkHkvNkvLhoJZxFQi7Dwk,2105
31
- vlmparse-0.1.5.dist-info/licenses/LICENSE,sha256=3TKJHk8hPBR5dbLWZ3IpfCftl-_m-iyBwpYQGZYxj14,1080
32
- vlmparse-0.1.5.dist-info/METADATA,sha256=LN4W1cvXJvL22hwLAgeSwd3PGTmlrt6lgqNi-tL9pes,5446
33
- vlmparse-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
- vlmparse-0.1.5.dist-info/entry_points.txt,sha256=gD5berP6HwE2wNIkls-Lw5goiceA8uMgPEd7ifnFJXs,47
35
- vlmparse-0.1.5.dist-info/top_level.txt,sha256=k4ni-GNH_iAX7liQEsk_KY_c3xgZgt8k9fsSs9IXLXs,9
36
- vlmparse-0.1.5.dist-info/RECORD,,
31
+ vlmparse-0.1.6.dist-info/licenses/LICENSE,sha256=3TKJHk8hPBR5dbLWZ3IpfCftl-_m-iyBwpYQGZYxj14,1080
32
+ vlmparse-0.1.6.dist-info/METADATA,sha256=Xad3SjAYvCzUvPo6A6GKvc3daxtf5XNs1AQjDlF7RmI,5597
33
+ vlmparse-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ vlmparse-0.1.6.dist-info/entry_points.txt,sha256=gD5berP6HwE2wNIkls-Lw5goiceA8uMgPEd7ifnFJXs,47
35
+ vlmparse-0.1.6.dist-info/top_level.txt,sha256=k4ni-GNH_iAX7liQEsk_KY_c3xgZgt8k9fsSs9IXLXs,9
36
+ vlmparse-0.1.6.dist-info/RECORD,,