sparrow-parse 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/PKG-INFO +33 -1
  2. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/README.md +32 -0
  3. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/setup.py +1 -1
  4. sparrow-parse-0.3.4/sparrow_parse/__init__.py +1 -0
  5. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse.egg-info/PKG-INFO +33 -1
  6. sparrow-parse-0.3.3/sparrow_parse/__init__.py +0 -1
  7. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/setup.cfg +0 -0
  8. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/__main__.py +0 -0
  9. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/extractors/__init__.py +0 -0
  10. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/extractors/html_extractor.py +0 -0
  11. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/extractors/vllm_extractor.py +0 -0
  12. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/helpers/__init__.py +0 -0
  13. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/helpers/html_extractor_helper.py +0 -0
  14. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/helpers/pdf_optimizer.py +0 -0
  15. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/processors/__init__.py +0 -0
  16. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/processors/markdown_processor.py +0 -0
  17. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/processors/table_structure_processor.py +0 -0
  18. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/processors/unstructured_processor.py +0 -0
  19. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/temp.py +0 -0
  20. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/vllm/__init__.py +0 -0
  21. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/vllm/huggingface_inference.py +0 -0
  22. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/vllm/inference_base.py +0 -0
  23. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/vllm/inference_factory.py +0 -0
  24. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse/vllm/local_gpu_inference.py +0 -0
  25. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse.egg-info/SOURCES.txt +0 -0
  26. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse.egg-info/dependency_links.txt +0 -0
  27. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse.egg-info/entry_points.txt +0 -0
  28. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse.egg-info/requires.txt +0 -0
  29. {sparrow-parse-0.3.3 → sparrow-parse-0.3.4}/sparrow_parse.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Sparrow Parse is a Python package for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -87,6 +87,8 @@ Example:
87
87
 
88
88
  ## Parsing and extraction
89
89
 
90
+ ### HTML extractor
91
+
90
92
  ```
91
93
  from sparrow_parse.extractor.html_extractor import HTMLExtractor
92
94
 
@@ -119,6 +121,36 @@ Example:
119
121
 
120
122
  *debug* - `True`
121
123
 
124
+ ### Sparrow Parse VL (vision-language) extractor
125
+
126
+ ```
127
+ extractor = VLLMExtractor()
128
+
129
+ # export HF_TOKEN="hf_"
130
+ config = {
131
+ "method": "huggingface", # Could be 'huggingface' or 'local_gpu'
132
+ "hf_space": "katanaml/sparrow-qwen2-vl-7b",
133
+ "hf_token": os.getenv('HF_TOKEN'),
134
+ # Additional fields for local GPU inference
135
+ # "device": "cuda", "model_path": "model.pth"
136
+ }
137
+
138
+ # Use the factory to get the correct instance
139
+ factory = InferenceFactory(config)
140
+ model_inference_instance = factory.get_inference_instance()
141
+
142
+ input_data = [
143
+ {
144
+ "image": "/Users/andrejb/Documents/work/epik/bankstatement/bonds_table.png",
145
+ "text_input": "retrieve financial instruments data. return response in JSON format"
146
+ }
147
+ ]
148
+
149
+ # Now you can run inference without knowing which implementation is used
150
+ result = extractor.run_inference(model_inference_instance, input_data, generic_query=False, debug=True)
151
+ print("Inference Result:", result)
152
+ ```
153
+
122
154
  ## PDF optimization
123
155
 
124
156
  ```
@@ -68,6 +68,8 @@ Example:
68
68
 
69
69
  ## Parsing and extraction
70
70
 
71
+ ### HTML extractor
72
+
71
73
  ```
72
74
  from sparrow_parse.extractor.html_extractor import HTMLExtractor
73
75
 
@@ -100,6 +102,36 @@ Example:
100
102
 
101
103
  *debug* - `True`
102
104
 
105
+ ### Sparrow Parse VL (vision-language) extractor
106
+
107
+ ```
108
+ extractor = VLLMExtractor()
109
+
110
+ # export HF_TOKEN="hf_"
111
+ config = {
112
+ "method": "huggingface", # Could be 'huggingface' or 'local_gpu'
113
+ "hf_space": "katanaml/sparrow-qwen2-vl-7b",
114
+ "hf_token": os.getenv('HF_TOKEN'),
115
+ # Additional fields for local GPU inference
116
+ # "device": "cuda", "model_path": "model.pth"
117
+ }
118
+
119
+ # Use the factory to get the correct instance
120
+ factory = InferenceFactory(config)
121
+ model_inference_instance = factory.get_inference_instance()
122
+
123
+ input_data = [
124
+ {
125
+ "image": "/Users/andrejb/Documents/work/epik/bankstatement/bonds_table.png",
126
+ "text_input": "retrieve financial instruments data. return response in JSON format"
127
+ }
128
+ ]
129
+
130
+ # Now you can run inference without knowing which implementation is used
131
+ result = extractor.run_inference(model_inference_instance, input_data, generic_query=False, debug=True)
132
+ print("Inference Result:", result)
133
+ ```
134
+
103
135
  ## PDF optimization
104
136
 
105
137
  ```
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
8
8
 
9
9
  setup(
10
10
  name="sparrow-parse",
11
- version="0.3.3",
11
+ version="0.3.4",
12
12
  author="Andrej Baranovskij",
13
13
  author_email="andrejus.baranovskis@gmail.com",
14
14
  description="Sparrow Parse is a Python package for parsing and extracting information from documents.",
@@ -0,0 +1 @@
1
+ __version__ = '0.3.4'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Sparrow Parse is a Python package for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -87,6 +87,8 @@ Example:
87
87
 
88
88
  ## Parsing and extraction
89
89
 
90
+ ### HTML extractor
91
+
90
92
  ```
91
93
  from sparrow_parse.extractor.html_extractor import HTMLExtractor
92
94
 
@@ -119,6 +121,36 @@ Example:
119
121
 
120
122
  *debug* - `True`
121
123
 
124
+ ### Sparrow Parse VL (vision-language) extractor
125
+
126
+ ```
127
+ extractor = VLLMExtractor()
128
+
129
+ # export HF_TOKEN="hf_"
130
+ config = {
131
+ "method": "huggingface", # Could be 'huggingface' or 'local_gpu'
132
+ "hf_space": "katanaml/sparrow-qwen2-vl-7b",
133
+ "hf_token": os.getenv('HF_TOKEN'),
134
+ # Additional fields for local GPU inference
135
+ # "device": "cuda", "model_path": "model.pth"
136
+ }
137
+
138
+ # Use the factory to get the correct instance
139
+ factory = InferenceFactory(config)
140
+ model_inference_instance = factory.get_inference_instance()
141
+
142
+ input_data = [
143
+ {
144
+ "image": "/Users/andrejb/Documents/work/epik/bankstatement/bonds_table.png",
145
+ "text_input": "retrieve financial instruments data. return response in JSON format"
146
+ }
147
+ ]
148
+
149
+ # Now you can run inference without knowing which implementation is used
150
+ result = extractor.run_inference(model_inference_instance, input_data, generic_query=False, debug=True)
151
+ print("Inference Result:", result)
152
+ ```
153
+
122
154
  ## PDF optimization
123
155
 
124
156
  ```
@@ -1 +0,0 @@
1
- __version__ = '0.3.3'
File without changes