sparrow-parse 0.5.2__tar.gz → 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/PKG-INFO +3 -3
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/README.md +2 -2
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/setup.py +1 -1
- sparrow-parse-0.5.4/sparrow_parse/__init__.py +1 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/extractors/vllm_extractor.py +3 -3
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/text_extraction.py +1 -1
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse.egg-info/PKG-INFO +3 -3
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse.egg-info/requires.txt +3 -2
- sparrow-parse-0.5.2/sparrow_parse/__init__.py +0 -1
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/setup.cfg +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/__main__.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/extractors/__init__.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/helpers/__init__.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/helpers/image_optimizer.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/helpers/pdf_optimizer.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/processors/__init__.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/processors/table_structure_processor.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/vllm/__init__.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/vllm/huggingface_inference.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/vllm/inference_base.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/vllm/inference_factory.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/vllm/local_gpu_inference.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/vllm/mlx_inference.py +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse.egg-info/SOURCES.txt +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse.egg-info/dependency_links.txt +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse.egg-info/entry_points.txt +0 -0
- {sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sparrow-parse
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.4
|
4
4
|
Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
|
5
5
|
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
6
|
Author: Andrej Baranovskij
|
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
|
|
21
21
|
|
22
22
|
## Description
|
23
23
|
|
24
|
-
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information.
|
24
|
+
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. Library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
|
25
25
|
|
26
26
|
## Install
|
27
27
|
|
@@ -175,6 +175,6 @@ If your organization is seeking to utilize Sparrow under a proprietary license,
|
|
175
175
|
|
176
176
|
## License
|
177
177
|
|
178
|
-
Licensed under the GPL 3.0. Copyright 2020-
|
178
|
+
Licensed under the GPL 3.0. Copyright 2020-2025 Katana ML, Andrej Baranovskij. [Copy of the license](https://github.com/katanaml/sparrow/blob/main/LICENSE).
|
179
179
|
|
180
180
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
## Description
|
4
4
|
|
5
|
-
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information.
|
5
|
+
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. Library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
|
6
6
|
|
7
7
|
## Install
|
8
8
|
|
@@ -156,4 +156,4 @@ If your organization is seeking to utilize Sparrow under a proprietary license,
|
|
156
156
|
|
157
157
|
## License
|
158
158
|
|
159
|
-
Licensed under the GPL 3.0. Copyright 2020-
|
159
|
+
Licensed under the GPL 3.0. Copyright 2020-2025 Katana ML, Andrej Baranovskij. [Copy of the license](https://github.com/katanaml/sparrow/blob/main/LICENSE).
|
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
|
|
8
8
|
|
9
9
|
setup(
|
10
10
|
name="sparrow-parse",
|
11
|
-
version="0.5.
|
11
|
+
version="0.5.4",
|
12
12
|
author="Andrej Baranovskij",
|
13
13
|
author_email="andrejus.baranovskis@gmail.com",
|
14
14
|
description="Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.",
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = '0.5.4'
|
@@ -198,7 +198,7 @@ if __name__ == "__main__":
|
|
198
198
|
# # export HF_TOKEN="hf_"
|
199
199
|
# config = {
|
200
200
|
# "method": "mlx", # Could be 'huggingface', 'mlx' or 'local_gpu'
|
201
|
-
# "model_name": "mlx-community/Qwen2-VL-7B-Instruct-8bit",
|
201
|
+
# "model_name": "mlx-community/Qwen2.5-VL-7B-Instruct-8bit",
|
202
202
|
# # "hf_space": "katanaml/sparrow-qwen2-vl-7b",
|
203
203
|
# # "hf_token": os.getenv('HF_TOKEN'),
|
204
204
|
# # Additional fields for local GPU inference
|
@@ -211,7 +211,7 @@ if __name__ == "__main__":
|
|
211
211
|
#
|
212
212
|
# input_data = [
|
213
213
|
# {
|
214
|
-
# "file_path": "/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/
|
214
|
+
# "file_path": "/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/bonds_table.png",
|
215
215
|
# "text_input": "retrieve document data. return response in JSON format"
|
216
216
|
# }
|
217
217
|
# ]
|
@@ -219,7 +219,7 @@ if __name__ == "__main__":
|
|
219
219
|
# # Now you can run inference without knowing which implementation is used
|
220
220
|
# results_array, num_pages = extractor.run_inference(model_inference_instance, input_data, tables_only=False,
|
221
221
|
# generic_query=False,
|
222
|
-
# crop_size=
|
222
|
+
# crop_size=0,
|
223
223
|
# debug_dir="/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/",
|
224
224
|
# debug=True,
|
225
225
|
# mode=None)
|
@@ -4,7 +4,7 @@ from mlx_vlm.utils import load_image
|
|
4
4
|
# For test purposes, we will use a sample image
|
5
5
|
|
6
6
|
# Load model and processor
|
7
|
-
qwen_vl_model, qwen_vl_processor = load("mlx-community/Qwen2-VL-7B-Instruct-8bit")
|
7
|
+
qwen_vl_model, qwen_vl_processor = load("mlx-community/Qwen2.5-VL-7B-Instruct-8bit")
|
8
8
|
qwen_vl_config = qwen_vl_model.config
|
9
9
|
|
10
10
|
image = load_image("images/graph.png")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sparrow-parse
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.4
|
4
4
|
Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
|
5
5
|
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
6
|
Author: Andrej Baranovskij
|
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
|
|
21
21
|
|
22
22
|
## Description
|
23
23
|
|
24
|
-
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information.
|
24
|
+
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. Library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
|
25
25
|
|
26
26
|
## Install
|
27
27
|
|
@@ -175,6 +175,6 @@ If your organization is seeking to utilize Sparrow under a proprietary license,
|
|
175
175
|
|
176
176
|
## License
|
177
177
|
|
178
|
-
Licensed under the GPL 3.0. Copyright 2020-
|
178
|
+
Licensed under the GPL 3.0. Copyright 2020-2025 Katana ML, Andrej Baranovskij. [Copy of the license](https://github.com/katanaml/sparrow/blob/main/LICENSE).
|
179
179
|
|
180
180
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
rich
|
2
|
-
|
2
|
+
torchvision==0.21.0
|
3
|
+
torch==2.6.0
|
3
4
|
sentence-transformers==3.3.1
|
4
5
|
numpy==2.1.3
|
5
6
|
pypdf==4.3.0
|
@@ -8,4 +9,4 @@ pdf2image
|
|
8
9
|
|
9
10
|
[:sys_platform == "darwin" and platform_machine == "arm64"]
|
10
11
|
mlx>=0.22.0
|
11
|
-
mlx-vlm==0.1.
|
12
|
+
mlx-vlm==0.1.12
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = '0.5.2'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sparrow-parse-0.5.2 → sparrow-parse-0.5.4}/sparrow_parse/processors/table_structure_processor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|