sparrow-parse 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sparrow_parse/__init__.py +1 -1
- sparrow_parse/extractors/vllm_extractor.py +3 -3
- sparrow_parse/text_extraction.py +1 -1
- {sparrow_parse-0.5.2.dist-info → sparrow_parse-0.5.3.dist-info}/METADATA +7 -5
- {sparrow_parse-0.5.2.dist-info → sparrow_parse-0.5.3.dist-info}/RECORD +8 -8
- {sparrow_parse-0.5.2.dist-info → sparrow_parse-0.5.3.dist-info}/WHEEL +0 -0
- {sparrow_parse-0.5.2.dist-info → sparrow_parse-0.5.3.dist-info}/entry_points.txt +0 -0
- {sparrow_parse-0.5.2.dist-info → sparrow_parse-0.5.3.dist-info}/top_level.txt +0 -0
sparrow_parse/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = '0.5.
|
1
|
+
__version__ = '0.5.3'
|
@@ -198,7 +198,7 @@ if __name__ == "__main__":
|
|
198
198
|
# # export HF_TOKEN="hf_"
|
199
199
|
# config = {
|
200
200
|
# "method": "mlx", # Could be 'huggingface', 'mlx' or 'local_gpu'
|
201
|
-
# "model_name": "mlx-community/Qwen2-VL-7B-Instruct-8bit",
|
201
|
+
# "model_name": "mlx-community/Qwen2.5-VL-7B-Instruct-8bit",
|
202
202
|
# # "hf_space": "katanaml/sparrow-qwen2-vl-7b",
|
203
203
|
# # "hf_token": os.getenv('HF_TOKEN'),
|
204
204
|
# # Additional fields for local GPU inference
|
@@ -211,7 +211,7 @@ if __name__ == "__main__":
|
|
211
211
|
#
|
212
212
|
# input_data = [
|
213
213
|
# {
|
214
|
-
# "file_path": "/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/
|
214
|
+
# "file_path": "/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/bonds_table.png",
|
215
215
|
# "text_input": "retrieve document data. return response in JSON format"
|
216
216
|
# }
|
217
217
|
# ]
|
@@ -219,7 +219,7 @@ if __name__ == "__main__":
|
|
219
219
|
# # Now you can run inference without knowing which implementation is used
|
220
220
|
# results_array, num_pages = extractor.run_inference(model_inference_instance, input_data, tables_only=False,
|
221
221
|
# generic_query=False,
|
222
|
-
# crop_size=
|
222
|
+
# crop_size=0,
|
223
223
|
# debug_dir="/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/",
|
224
224
|
# debug=True,
|
225
225
|
# mode=None)
|
sparrow_parse/text_extraction.py
CHANGED
@@ -4,7 +4,7 @@ from mlx_vlm.utils import load_image
|
|
4
4
|
# For test purposes, we will use a sample image
|
5
5
|
|
6
6
|
# Load model and processor
|
7
|
-
qwen_vl_model, qwen_vl_processor = load("mlx-community/Qwen2-VL-7B-Instruct-8bit")
|
7
|
+
qwen_vl_model, qwen_vl_processor = load("mlx-community/Qwen2.5-VL-7B-Instruct-8bit")
|
8
8
|
qwen_vl_config = qwen_vl_model.config
|
9
9
|
|
10
10
|
image = load_image("images/graph.png")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sparrow-parse
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.3
|
4
4
|
Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
|
5
5
|
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
6
|
Author: Andrej Baranovskij
|
@@ -17,20 +17,22 @@ Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Requires-Python: >=3.10
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
Requires-Dist: rich
|
20
|
-
Requires-Dist: transformers==4.
|
20
|
+
Requires-Dist: transformers==4.48.2
|
21
|
+
Requires-Dist: torchvision==0.21.0
|
22
|
+
Requires-Dist: torch==2.6.0
|
21
23
|
Requires-Dist: sentence-transformers==3.3.1
|
22
24
|
Requires-Dist: numpy==2.1.3
|
23
25
|
Requires-Dist: pypdf==4.3.0
|
24
26
|
Requires-Dist: gradio-client
|
25
27
|
Requires-Dist: pdf2image
|
26
28
|
Requires-Dist: mlx>=0.22.0; sys_platform == "darwin" and platform_machine == "arm64"
|
27
|
-
Requires-Dist: mlx-vlm==0.1.
|
29
|
+
Requires-Dist: mlx-vlm==0.1.12; sys_platform == "darwin" and platform_machine == "arm64"
|
28
30
|
|
29
31
|
# Sparrow Parse
|
30
32
|
|
31
33
|
## Description
|
32
34
|
|
33
|
-
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information.
|
35
|
+
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. Library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
|
34
36
|
|
35
37
|
## Install
|
36
38
|
|
@@ -184,6 +186,6 @@ If your organization is seeking to utilize Sparrow under a proprietary license,
|
|
184
186
|
|
185
187
|
## License
|
186
188
|
|
187
|
-
Licensed under the GPL 3.0. Copyright 2020-
|
189
|
+
Licensed under the GPL 3.0. Copyright 2020-2025 Katana ML, Andrej Baranovskij. [Copy of the license](https://github.com/katanaml/sparrow/blob/main/LICENSE).
|
188
190
|
|
189
191
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
sparrow_parse/__init__.py,sha256=
|
1
|
+
sparrow_parse/__init__.py,sha256=IIIADjPr2y0W_XfgU1cH-K2HswMouXAPagGe6_twaIk,21
|
2
2
|
sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
|
3
|
-
sparrow_parse/text_extraction.py,sha256=
|
3
|
+
sparrow_parse/text_extraction.py,sha256=JtUU7swvV12xBai5S9ICxWWWrUlkpZTZqvUnbz1h5Mk,834
|
4
4
|
sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
sparrow_parse/extractors/vllm_extractor.py,sha256=
|
5
|
+
sparrow_parse/extractors/vllm_extractor.py,sha256=Cf2sVgxDExj2ud4G6z9JnirVclTgPIEe9YSoCfTkW4k,9563
|
6
6
|
sparrow_parse/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
sparrow_parse/helpers/image_optimizer.py,sha256=gUAJuNzRAB5ipgfhxTNss4MHbCPPkV5y-BSyrEHcJ0Y,2164
|
8
8
|
sparrow_parse/helpers/pdf_optimizer.py,sha256=A2BVkb2JMqTJUz6bdfVzMmFSYaWn1QMav7UadMi0XJg,3423
|
@@ -14,8 +14,8 @@ sparrow_parse/vllm/inference_base.py,sha256=4mwGoAY63MB4cHZpV0czTkJWEzimmiTzqqzK
|
|
14
14
|
sparrow_parse/vllm/inference_factory.py,sha256=FTM65O-dW2WZchHOrNN7_Q3-FlVoAc65iSptuuUuClM,1166
|
15
15
|
sparrow_parse/vllm/local_gpu_inference.py,sha256=aHoJTejb5xrXjWDIGu5RBQWEyRCOBCB04sMvO2Wyvg8,628
|
16
16
|
sparrow_parse/vllm/mlx_inference.py,sha256=MUuW56f-aKnVmeMAATxKLxsovEMmp1qlgtlmW8J2C7M,4899
|
17
|
-
sparrow_parse-0.5.
|
18
|
-
sparrow_parse-0.5.
|
19
|
-
sparrow_parse-0.5.
|
20
|
-
sparrow_parse-0.5.
|
21
|
-
sparrow_parse-0.5.
|
17
|
+
sparrow_parse-0.5.3.dist-info/METADATA,sha256=NOwPut-aOo6gdWH44k_Ei3WP3-bvkc-Dl7qyKE3r2FQ,7239
|
18
|
+
sparrow_parse-0.5.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
19
|
+
sparrow_parse-0.5.3.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
|
20
|
+
sparrow_parse-0.5.3.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
|
21
|
+
sparrow_parse-0.5.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|