vlm4ocr 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/PKG-INFO +2 -2
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/pyproject.toml +2 -2
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/ocr_engines.py +4 -7
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/README.md +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/__init__.py +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_HTML_system_prompt.txt +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_HTML_user_prompt.txt +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_JSON_system_prompt.txt +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_markdown_system_prompt.txt +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_markdown_user_prompt.txt +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_text_system_prompt.txt +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_text_user_prompt.txt +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/cli.py +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/data_types.py +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/utils.py +0 -0
- {vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/vlm_engines.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vlm4ocr
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: Python package and Web App for OCR with vision language models.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Enshuo (David) Hsu
|
|
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Provides-Extra: tesseract
|
|
13
13
|
Requires-Dist: colorama (>=0.4.4)
|
|
14
14
|
Requires-Dist: json-repair (>=0.30.0)
|
|
15
|
-
Requires-Dist: llm-inference-engine (>=0.1.
|
|
15
|
+
Requires-Dist: llm-inference-engine (>=0.1.5)
|
|
16
16
|
Requires-Dist: pdf2image (>=1.16.0)
|
|
17
17
|
Requires-Dist: pillow (>=10.0.0)
|
|
18
18
|
Requires-Dist: pytesseract (>=0.3.13) ; extra == "tesseract"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "vlm4ocr"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.2"
|
|
4
4
|
description = "Python package and Web App for OCR with vision language models."
|
|
5
5
|
authors = ["Enshuo (David) Hsu"]
|
|
6
6
|
license = "MIT"
|
|
@@ -18,7 +18,7 @@ pdf2image = ">=1.16.0"
|
|
|
18
18
|
colorama = ">=0.4.4"
|
|
19
19
|
pillow = ">=10.0.0"
|
|
20
20
|
json-repair = ">=0.30.0"
|
|
21
|
-
llm-inference-engine = "
|
|
21
|
+
llm-inference-engine = ">=0.1.5"
|
|
22
22
|
pytesseract = { version = ">=0.3.13", optional = true }
|
|
23
23
|
|
|
24
24
|
[tool.poetry.scripts]
|
|
@@ -126,9 +126,8 @@ class OCREngine:
|
|
|
126
126
|
few_shot_examples=few_shot_examples)
|
|
127
127
|
|
|
128
128
|
# Stream response
|
|
129
|
-
response_stream = self.vlm_engine.
|
|
130
|
-
messages
|
|
131
|
-
stream=True
|
|
129
|
+
response_stream = self.vlm_engine.chat_stream(
|
|
130
|
+
messages
|
|
132
131
|
)
|
|
133
132
|
for chunk in response_stream:
|
|
134
133
|
if chunk["type"] == "response":
|
|
@@ -163,9 +162,8 @@ class OCREngine:
|
|
|
163
162
|
image=image,
|
|
164
163
|
few_shot_examples=few_shot_examples)
|
|
165
164
|
# Stream response
|
|
166
|
-
response_stream = self.vlm_engine.
|
|
167
|
-
messages
|
|
168
|
-
stream=True
|
|
165
|
+
response_stream = self.vlm_engine.chat_stream(
|
|
166
|
+
messages
|
|
169
167
|
)
|
|
170
168
|
for chunk in response_stream:
|
|
171
169
|
if chunk["type"] == "response":
|
|
@@ -295,7 +293,6 @@ class OCREngine:
|
|
|
295
293
|
response = self.vlm_engine.chat(
|
|
296
294
|
messages,
|
|
297
295
|
verbose=verbose,
|
|
298
|
-
stream=False,
|
|
299
296
|
messages_logger=messages_logger
|
|
300
297
|
)
|
|
301
298
|
ocr_text = response["response"]
|
|
File without changes
|
|
File without changes
|
{vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_HTML_system_prompt.txt
RENAMED
|
File without changes
|
{vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_HTML_user_prompt.txt
RENAMED
|
File without changes
|
{vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_JSON_system_prompt.txt
RENAMED
|
File without changes
|
|
File without changes
|
{vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_markdown_user_prompt.txt
RENAMED
|
File without changes
|
{vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_text_system_prompt.txt
RENAMED
|
File without changes
|
{vlm4ocr-0.4.1 → vlm4ocr-0.4.2}/vlm4ocr/assets/default_prompt_templates/ocr_text_user_prompt.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|