sparrow-parse 1.0.6__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/PKG-INFO +1 -1
  2. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/setup.py +1 -1
  3. sparrow-parse-1.0.7/sparrow_parse/__init__.py +1 -0
  4. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/vllm/mlx_inference.py +14 -19
  5. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse.egg-info/PKG-INFO +1 -1
  6. sparrow-parse-1.0.6/sparrow_parse/__init__.py +0 -1
  7. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/README.md +0 -0
  8. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/setup.cfg +0 -0
  9. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/__main__.py +0 -0
  10. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/extractors/__init__.py +0 -0
  11. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/extractors/vllm_extractor.py +0 -0
  12. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/helpers/__init__.py +0 -0
  13. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/helpers/image_optimizer.py +0 -0
  14. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/helpers/pdf_optimizer.py +0 -0
  15. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/processors/__init__.py +0 -0
  16. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/processors/table_structure_processor.py +0 -0
  17. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/text_extraction.py +0 -0
  18. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/vllm/__init__.py +0 -0
  19. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/vllm/huggingface_inference.py +0 -0
  20. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/vllm/inference_base.py +0 -0
  21. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/vllm/inference_factory.py +0 -0
  22. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse/vllm/local_gpu_inference.py +0 -0
  23. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse.egg-info/SOURCES.txt +0 -0
  24. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse.egg-info/dependency_links.txt +0 -0
  25. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse.egg-info/entry_points.txt +0 -0
  26. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse.egg-info/requires.txt +0 -0
  27. {sparrow-parse-1.0.6 → sparrow-parse-1.0.7}/sparrow_parse.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
8
8
 
9
9
  setup(
10
10
  name="sparrow-parse",
11
- version="1.0.6",
11
+ version="1.0.7",
12
12
  author="Andrej Baranovskij",
13
13
  author_email="andrejus.baranovskis@gmail.com",
14
14
  description="Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.",
@@ -0,0 +1 @@
1
+ __version__ = '1.0.7'
@@ -188,11 +188,11 @@ class MLXInference(ModelInference):
188
188
 
189
189
  return results
190
190
 
191
-
192
191
  def transform_query_with_bbox(self, text_input):
193
192
  """
194
193
  Transform JSON schema in text_input to include value, bbox, and confidence.
195
- Works with both array and object JSON structures.
194
+ Works with formats like: "retrieve field1, field2. return response in JSON format,
195
+ by strictly following this JSON schema: [{...}]."
196
196
 
197
197
  Args:
198
198
  text_input (str): The input text containing a JSON schema
@@ -200,38 +200,33 @@ class MLXInference(ModelInference):
200
200
  Returns:
201
201
  str: Text with transformed JSON including value, bbox, and confidence
202
202
  """
203
- # Split text into parts - find the JSON portion between "retrieve" and "return response"
204
- retrieve_pattern = r'retrieve\s+'
205
- return_pattern = r'\.\s+return\s+response'
206
203
 
207
- retrieve_match = re.search(retrieve_pattern, text_input)
208
- return_match = re.search(return_pattern, text_input)
204
+ schema_pattern = r'JSON schema:\s*(\[.*?\]|\{.*?\})'
205
+ schema_match = re.search(schema_pattern, text_input, re.DOTALL)
209
206
 
210
- if not retrieve_match or not return_match:
207
+ if not schema_match:
211
208
  return text_input # Return original if pattern not found
212
209
 
213
- json_start = retrieve_match.end()
214
- json_end = return_match.start()
215
-
216
- prefix = text_input[:json_start]
217
- json_str = text_input[json_start:json_end].strip()
218
- suffix = text_input[json_end:]
210
+ # Extract the schema part and its position
211
+ schema_str = schema_match.group(1).strip()
212
+ schema_start = schema_match.start(1)
213
+ schema_end = schema_match.end(1)
219
214
 
220
215
  # Parse and transform the JSON
221
216
  try:
222
217
  # Handle single quotes if needed
223
- json_str = json_str.replace("'", '"')
218
+ schema_str = schema_str.replace("'", '"')
224
219
 
225
- json_obj = json.loads(json_str)
220
+ json_obj = json.loads(schema_str)
226
221
  transformed_json = self.transform_query_structure(json_obj)
227
222
  transformed_json_str = json.dumps(transformed_json)
228
223
 
229
- # Rebuild the text
230
- result = prefix + transformed_json_str + suffix
224
+ # Rebuild the text by replacing just the schema portion
225
+ result = text_input[:schema_start] + transformed_json_str + text_input[schema_end:]
231
226
 
232
227
  return result
233
228
  except json.JSONDecodeError as e:
234
- print(f"Error parsing JSON: {e}")
229
+ print(f"Error parsing JSON schema: {e}")
235
230
  return text_input # Return original if parsing fails
236
231
 
237
232
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -1 +0,0 @@
1
- __version__ = '1.0.6'
File without changes
File without changes