sparrow-parse 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sparrow_parse/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = '0.5.2'
1
+ __version__ = '0.5.3'
@@ -198,7 +198,7 @@ if __name__ == "__main__":
198
198
  # # export HF_TOKEN="hf_"
199
199
  # config = {
200
200
  # "method": "mlx", # Could be 'huggingface', 'mlx' or 'local_gpu'
201
- # "model_name": "mlx-community/Qwen2-VL-7B-Instruct-8bit",
201
+ # "model_name": "mlx-community/Qwen2.5-VL-7B-Instruct-8bit",
202
202
  # # "hf_space": "katanaml/sparrow-qwen2-vl-7b",
203
203
  # # "hf_token": os.getenv('HF_TOKEN'),
204
204
  # # Additional fields for local GPU inference
@@ -211,7 +211,7 @@ if __name__ == "__main__":
211
211
  #
212
212
  # input_data = [
213
213
  # {
214
- # "file_path": "/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.jpg",
214
+ # "file_path": "/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/bonds_table.png",
215
215
  # "text_input": "retrieve document data. return response in JSON format"
216
216
  # }
217
217
  # ]
@@ -219,7 +219,7 @@ if __name__ == "__main__":
219
219
  # # Now you can run inference without knowing which implementation is used
220
220
  # results_array, num_pages = extractor.run_inference(model_inference_instance, input_data, tables_only=False,
221
221
  # generic_query=False,
222
- # crop_size=80,
222
+ # crop_size=0,
223
223
  # debug_dir="/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/",
224
224
  # debug=True,
225
225
  # mode=None)
@@ -4,7 +4,7 @@ from mlx_vlm.utils import load_image
4
4
  # For test purposes, we will use a sample image
5
5
 
6
6
  # Load model and processor
7
- qwen_vl_model, qwen_vl_processor = load("mlx-community/Qwen2-VL-7B-Instruct-8bit")
7
+ qwen_vl_model, qwen_vl_processor = load("mlx-community/Qwen2.5-VL-7B-Instruct-8bit")
8
8
  qwen_vl_config = qwen_vl_model.config
9
9
 
10
10
  image = load_image("images/graph.png")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -17,20 +17,22 @@ Classifier: Programming Language :: Python :: 3.10
17
17
  Requires-Python: >=3.10
18
18
  Description-Content-Type: text/markdown
19
19
  Requires-Dist: rich
20
- Requires-Dist: transformers==4.47.1
20
+ Requires-Dist: transformers==4.48.2
21
+ Requires-Dist: torchvision==0.21.0
22
+ Requires-Dist: torch==2.6.0
21
23
  Requires-Dist: sentence-transformers==3.3.1
22
24
  Requires-Dist: numpy==2.1.3
23
25
  Requires-Dist: pypdf==4.3.0
24
26
  Requires-Dist: gradio-client
25
27
  Requires-Dist: pdf2image
26
28
  Requires-Dist: mlx>=0.22.0; sys_platform == "darwin" and platform_machine == "arm64"
27
- Requires-Dist: mlx-vlm==0.1.11; sys_platform == "darwin" and platform_machine == "arm64"
29
+ Requires-Dist: mlx-vlm==0.1.12; sys_platform == "darwin" and platform_machine == "arm64"
28
30
 
29
31
  # Sparrow Parse
30
32
 
31
33
  ## Description
32
34
 
33
- This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. This library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
35
+ This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. Library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
34
36
 
35
37
  ## Install
36
38
 
@@ -184,6 +186,6 @@ If your organization is seeking to utilize Sparrow under a proprietary license,
184
186
 
185
187
  ## License
186
188
 
187
- Licensed under the GPL 3.0. Copyright 2020-2024 Katana ML, Andrej Baranovskij. [Copy of the license](https://github.com/katanaml/sparrow/blob/main/LICENSE).
189
+ Licensed under the GPL 3.0. Copyright 2020-2025 Katana ML, Andrej Baranovskij. [Copy of the license](https://github.com/katanaml/sparrow/blob/main/LICENSE).
188
190
 
189
191
 
@@ -1,8 +1,8 @@
1
- sparrow_parse/__init__.py,sha256=LADBavpB8x-lrtqob-7SCQZXFPfLZCu34A0lBsgTGGI,21
1
+ sparrow_parse/__init__.py,sha256=IIIADjPr2y0W_XfgU1cH-K2HswMouXAPagGe6_twaIk,21
2
2
  sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
3
- sparrow_parse/text_extraction.py,sha256=TvDjC_R0Yjjj_8zOYSpu9gXtx8C7o36CIKsuCXfu44E,832
3
+ sparrow_parse/text_extraction.py,sha256=JtUU7swvV12xBai5S9ICxWWWrUlkpZTZqvUnbz1h5Mk,834
4
4
  sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- sparrow_parse/extractors/vllm_extractor.py,sha256=ecuheSYSDz7PNq5gMS4deStJ3wraJTR7JRmOjn31wpM,9560
5
+ sparrow_parse/extractors/vllm_extractor.py,sha256=Cf2sVgxDExj2ud4G6z9JnirVclTgPIEe9YSoCfTkW4k,9563
6
6
  sparrow_parse/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  sparrow_parse/helpers/image_optimizer.py,sha256=gUAJuNzRAB5ipgfhxTNss4MHbCPPkV5y-BSyrEHcJ0Y,2164
8
8
  sparrow_parse/helpers/pdf_optimizer.py,sha256=A2BVkb2JMqTJUz6bdfVzMmFSYaWn1QMav7UadMi0XJg,3423
@@ -14,8 +14,8 @@ sparrow_parse/vllm/inference_base.py,sha256=4mwGoAY63MB4cHZpV0czTkJWEzimmiTzqqzK
14
14
  sparrow_parse/vllm/inference_factory.py,sha256=FTM65O-dW2WZchHOrNN7_Q3-FlVoAc65iSptuuUuClM,1166
15
15
  sparrow_parse/vllm/local_gpu_inference.py,sha256=aHoJTejb5xrXjWDIGu5RBQWEyRCOBCB04sMvO2Wyvg8,628
16
16
  sparrow_parse/vllm/mlx_inference.py,sha256=MUuW56f-aKnVmeMAATxKLxsovEMmp1qlgtlmW8J2C7M,4899
17
- sparrow_parse-0.5.2.dist-info/METADATA,sha256=9XEpFv0kxRRiaSm2X-BHGq2OWX11nTkHQFkNRrvIAgs,7181
18
- sparrow_parse-0.5.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
19
- sparrow_parse-0.5.2.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
20
- sparrow_parse-0.5.2.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
21
- sparrow_parse-0.5.2.dist-info/RECORD,,
17
+ sparrow_parse-0.5.3.dist-info/METADATA,sha256=NOwPut-aOo6gdWH44k_Ei3WP3-bvkc-Dl7qyKE3r2FQ,7239
18
+ sparrow_parse-0.5.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
19
+ sparrow_parse-0.5.3.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
20
+ sparrow_parse-0.5.3.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
21
+ sparrow_parse-0.5.3.dist-info/RECORD,,