sparrow-parse 1.0.7__tar.gz → 1.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/PKG-INFO +1 -1
  2. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/setup.py +1 -1
  3. sparrow-parse-1.0.9/sparrow_parse/__init__.py +1 -0
  4. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/vllm/mlx_inference.py +82 -26
  5. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse.egg-info/PKG-INFO +1 -1
  6. sparrow-parse-1.0.7/sparrow_parse/__init__.py +0 -1
  7. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/README.md +0 -0
  8. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/setup.cfg +0 -0
  9. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/__main__.py +0 -0
  10. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/extractors/__init__.py +0 -0
  11. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/extractors/vllm_extractor.py +0 -0
  12. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/helpers/__init__.py +0 -0
  13. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/helpers/image_optimizer.py +0 -0
  14. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/helpers/pdf_optimizer.py +0 -0
  15. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/processors/__init__.py +0 -0
  16. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/processors/table_structure_processor.py +0 -0
  17. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/text_extraction.py +0 -0
  18. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/vllm/__init__.py +0 -0
  19. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/vllm/huggingface_inference.py +0 -0
  20. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/vllm/inference_base.py +0 -0
  21. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/vllm/inference_factory.py +0 -0
  22. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse/vllm/local_gpu_inference.py +0 -0
  23. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse.egg-info/SOURCES.txt +0 -0
  24. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse.egg-info/dependency_links.txt +0 -0
  25. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse.egg-info/entry_points.txt +0 -0
  26. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse.egg-info/requires.txt +0 -0
  27. {sparrow-parse-1.0.7 → sparrow-parse-1.0.9}/sparrow_parse.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 1.0.7
3
+ Version: 1.0.9
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
8
8
 
9
9
  setup(
10
10
  name="sparrow-parse",
11
- version="1.0.7",
11
+ version="1.0.9",
12
12
  author="Andrej Baranovskij",
13
13
  author_email="andrejus.baranovskis@gmail.com",
14
14
  description="Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.",
@@ -0,0 +1 @@
1
+ __version__ = '1.0.9'
@@ -79,23 +79,56 @@ class MLXInference(ModelInference):
79
79
  def load_image_data(self, image_filepath, max_width=1250, max_height=1750):
80
80
  """
81
81
  Load and resize image while maintaining its aspect ratio.
82
-
83
- :param image_filepath: Path to the image file.
84
- :param max_width: Maximum allowed width of the image.
85
- :param max_height: Maximum allowed height of the image.
86
- :return: Tuple containing the image object and its new dimensions.
82
+ Returns both original and resized dimensions for coordinate mapping.
87
83
  """
88
- image = load_image(image_filepath) # Assuming load_image is defined elsewhere
89
- width, height = image.size
84
+ image = load_image(image_filepath)
85
+ orig_width, orig_height = image.size
90
86
 
91
87
  # Calculate new dimensions while maintaining the aspect ratio
92
- if width > max_width or height > max_height:
93
- aspect_ratio = width / height
88
+ if orig_width > max_width or orig_height > max_height:
89
+ aspect_ratio = orig_width / orig_height
94
90
  new_width = min(max_width, int(max_height * aspect_ratio))
95
91
  new_height = min(max_height, int(max_width / aspect_ratio))
96
- return image, new_width, new_height
92
+ return image, new_width, new_height, orig_width, orig_height
93
+
94
+ # No resize needed, original dimensions are used
95
+ return image, orig_width, orig_height, orig_width, orig_height
96
+
97
97
 
98
- return image, width, height
98
+ def scale_bbox_coordinates(self, json_response, orig_width, orig_height, resized_width, resized_height):
99
+ """
100
+ Scale bbox coordinates from resized image dimensions back to original image dimensions.
101
+ Only used when apply_annotation=True.
102
+ """
103
+ # Calculate scale factors
104
+ scale_x = orig_width / resized_width
105
+ scale_y = orig_height / resized_height
106
+
107
+ # No scaling needed if dimensions are the same
108
+ if scale_x == 1 and scale_y == 1:
109
+ return json_response
110
+
111
+ # Helper function to recursively process JSON objects
112
+ def process_object(obj):
113
+ if isinstance(obj, dict):
114
+ for key, value in obj.items():
115
+ if key == "bbox" and isinstance(value, list) and len(value) == 4:
116
+ # Scale the bbox coordinates
117
+ obj[key] = [
118
+ value[0] * scale_x, # x_min
119
+ value[1] * scale_y, # y_min
120
+ value[2] * scale_x, # x_max
121
+ value[3] * scale_y # y_max
122
+ ]
123
+ elif isinstance(value, (dict, list)):
124
+ process_object(value)
125
+ elif isinstance(obj, list):
126
+ for i, item in enumerate(obj):
127
+ if isinstance(item, (dict, list)):
128
+ process_object(item)
129
+ return obj
130
+
131
+ return process_object(json_response)
99
132
 
100
133
 
101
134
  def inference(self, input_data, apply_annotation=False, mode=None):
@@ -152,42 +185,65 @@ class MLXInference(ModelInference):
152
185
  print("Inference completed successfully")
153
186
  return response
154
187
 
188
+
155
189
  def _process_images(self, model, processor, config, file_paths, input_data, apply_annotation):
156
190
  """
157
191
  Process images and generate responses for each.
158
-
159
- :param model: The loaded model
160
- :param processor: The loaded processor
161
- :param config: Model configuration
162
- :param file_paths: List of image file paths
163
- :param input_data: Original input data
164
- :param apply_annotation: Flag to apply annotations
165
- :return: List of processed responses
192
+ Always resize images for memory efficiency, but scale coordinates back for annotation cases.
166
193
  """
167
194
  results = []
168
195
  for file_path in file_paths:
169
- image, width, height = self.load_image_data(file_path)
170
-
196
+ # Always get both original and resized dimensions
197
+ image, resized_width, resized_height, orig_width, orig_height = self.load_image_data(file_path)
198
+
171
199
  # Prepare messages based on model type
172
200
  messages = self._prepare_messages(input_data, apply_annotation)
173
-
174
- # Generate and process response
201
+
202
+ # Always use resize_shape for memory efficiency
175
203
  prompt = apply_chat_template(processor, config, messages)
176
204
  response, _ = generate(
177
205
  model,
178
206
  processor,
179
207
  prompt,
180
208
  image,
181
- resize_shape=(width, height),
209
+ resize_shape=(resized_width, resized_height),
182
210
  max_tokens=4000,
183
211
  temperature=0.0,
184
212
  verbose=False
185
213
  )
186
- results.append(self.process_response(response))
214
+
215
+ # Process the raw response
216
+ processed_response = self.process_response(response)
217
+
218
+ # Scale coordinates if apply_annotation is True and resizing was applied
219
+ if apply_annotation:
220
+ try:
221
+ # Parse the JSON response
222
+ json_response = json.loads(processed_response) if isinstance(processed_response,
223
+ str) else processed_response
224
+
225
+ # Apply scaling only if dimensions differ
226
+ if orig_width != resized_width or orig_height != resized_height:
227
+ json_response = self.scale_bbox_coordinates(
228
+ json_response,
229
+ orig_width,
230
+ orig_height,
231
+ resized_width,
232
+ resized_height
233
+ )
234
+
235
+ # Convert back to JSON string
236
+ processed_response = json.dumps(json_response, indent=2)
237
+ except (json.JSONDecodeError, TypeError) as e:
238
+ print(f"Warning: Could not scale coordinates - {e}")
239
+ # Keep the original response if JSON parsing fails
240
+
241
+ results.append(processed_response)
187
242
  print(f"Inference completed successfully for: {file_path}")
188
-
243
+
189
244
  return results
190
245
 
246
+
191
247
  def transform_query_with_bbox(self, text_input):
192
248
  """
193
249
  Transform JSON schema in text_input to include value, bbox, and confidence.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 1.0.7
3
+ Version: 1.0.9
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -1 +0,0 @@
1
- __version__ = '1.0.7'
File without changes
File without changes