sparrow-parse 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sparrow_parse/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = '0.3.3'
1
+ __version__ = '0.3.4'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Sparrow Parse is a Python package for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -98,6 +98,8 @@ Example:
98
98
 
99
99
  ## Parsing and extraction
100
100
 
101
+ ### HTML extractor
102
+
101
103
  ```
102
104
  from sparrow_parse.extractor.html_extractor import HTMLExtractor
103
105
 
@@ -130,6 +132,36 @@ Example:
130
132
 
131
133
  *debug* - `True`
132
134
 
135
+ ### Sparrow Parse VL (vision-language) extractor
136
+
137
+ ```
138
+ extractor = VLLMExtractor()
139
+
140
+ # export HF_TOKEN="hf_"
141
+ config = {
142
+ "method": "huggingface", # Could be 'huggingface' or 'local_gpu'
143
+ "hf_space": "katanaml/sparrow-qwen2-vl-7b",
144
+ "hf_token": os.getenv('HF_TOKEN'),
145
+ # Additional fields for local GPU inference
146
+ # "device": "cuda", "model_path": "model.pth"
147
+ }
148
+
149
+ # Use the factory to get the correct instance
150
+ factory = InferenceFactory(config)
151
+ model_inference_instance = factory.get_inference_instance()
152
+
153
+ input_data = [
154
+ {
155
+ "image": "/Users/andrejb/Documents/work/epik/bankstatement/bonds_table.png",
156
+ "text_input": "retrieve financial instruments data. return response in JSON format"
157
+ }
158
+ ]
159
+
160
+ # Now you can run inference without knowing which implementation is used
161
+ result = extractor.run_inference(model_inference_instance, input_data, generic_query=False, debug=True)
162
+ print("Inference Result:", result)
163
+ ```
164
+
133
165
  ## PDF optimization
134
166
 
135
167
  ```
@@ -1,4 +1,4 @@
1
- sparrow_parse/__init__.py,sha256=JDRpXqOC0txw4_CqkfpSl89CczeXGgyjX4XSSLChyQg,21
1
+ sparrow_parse/__init__.py,sha256=SH0xuWVUkyLHZJwWBZ8GJoeliTeYFcqA6TWJgrkLv-U,21
2
2
  sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
3
3
  sparrow_parse/temp.py,sha256=gy4_mtNW_KfXn9br_suu6jHx7JKYLKs9pIOBynh_JWY,1134
4
4
  sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -16,8 +16,8 @@ sparrow_parse/vllm/huggingface_inference.py,sha256=Q2Ju65LDzbO-8RWW7cXzrR-pbZ1zK
16
16
  sparrow_parse/vllm/inference_base.py,sha256=W0N2khehGdF1XHzZACG3I1UZaydHMk6BZgWNvaJD4Ck,197
17
17
  sparrow_parse/vllm/inference_factory.py,sha256=r04e95uPWG5l8Q23yeDqKmvFxLyF991aA2m0hfBTNn8,993
18
18
  sparrow_parse/vllm/local_gpu_inference.py,sha256=I_uWYiFAQhRrykOKbVz69NzftDxuemDKtAye4kWhtnU,617
19
- sparrow_parse-0.3.3.dist-info/METADATA,sha256=qFl4MsoV6lF_OqgtcfBqDRpTHX8MUJh0jeGgNr77o8w,6482
20
- sparrow_parse-0.3.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
21
- sparrow_parse-0.3.3.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
22
- sparrow_parse-0.3.3.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
23
- sparrow_parse-0.3.3.dist-info/RECORD,,
19
+ sparrow_parse-0.3.4.dist-info/METADATA,sha256=L7qXKxktk42gUQlBlZAdzHQqfORoC6vBwRCd-VSwv3Y,7444
20
+ sparrow_parse-0.3.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
21
+ sparrow_parse-0.3.4.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
22
+ sparrow_parse-0.3.4.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
23
+ sparrow_parse-0.3.4.dist-info/RECORD,,