sparrow-parse 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sparrow_parse/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = '1.0.8'
1
+ __version__ = '1.1.0'
@@ -75,26 +75,60 @@ class MLXInference(ModelInference):
75
75
  print(f"Failed to parse JSON: {e}")
76
76
  return output_text
77
77
 
78
+
78
79
  def load_image_data(self, image_filepath, max_width=1250, max_height=1750):
79
80
  """
80
81
  Load and resize image while maintaining its aspect ratio.
81
-
82
- :param image_filepath: Path to the image file.
83
- :param max_width: Maximum allowed width of the image.
84
- :param max_height: Maximum allowed height of the image.
85
- :return: Tuple containing the image object and its new dimensions.
82
+ Returns both original and resized dimensions for coordinate mapping.
86
83
  """
87
- image = load_image(image_filepath) # Assuming load_image is defined elsewhere
88
- width, height = image.size
84
+ image = load_image(image_filepath)
85
+ orig_width, orig_height = image.size
89
86
 
90
87
  # Calculate new dimensions while maintaining the aspect ratio
91
- if width > max_width or height > max_height:
92
- aspect_ratio = width / height
88
+ if orig_width > max_width or orig_height > max_height:
89
+ aspect_ratio = orig_width / orig_height
93
90
  new_width = min(max_width, int(max_height * aspect_ratio))
94
91
  new_height = min(max_height, int(max_width / aspect_ratio))
95
- return image, new_width, new_height
92
+ return image, new_width, new_height, orig_width, orig_height
93
+
94
+ # No resize needed, original dimensions are used
95
+ return image, orig_width, orig_height, orig_width, orig_height
96
96
 
97
- return image, width, height
97
+
98
+ def scale_bbox_coordinates(self, json_response, orig_width, orig_height, resized_width, resized_height):
99
+ """
100
+ Scale bbox coordinates from resized image dimensions back to original image dimensions.
101
+ Only used when apply_annotation=True.
102
+ """
103
+ # Calculate scale factors
104
+ scale_x = orig_width / resized_width
105
+ scale_y = orig_height / resized_height
106
+
107
+ # No scaling needed if dimensions are the same
108
+ if scale_x == 1 and scale_y == 1:
109
+ return json_response
110
+
111
+ # Helper function to recursively process JSON objects
112
+ def process_object(obj):
113
+ if isinstance(obj, dict):
114
+ for key, value in obj.items():
115
+ if key == "bbox" and isinstance(value, list) and len(value) == 4:
116
+ # Scale the bbox coordinates
117
+ obj[key] = [
118
+ value[0] * scale_x, # x_min
119
+ value[1] * scale_y, # y_min
120
+ value[2] * scale_x, # x_max
121
+ value[3] * scale_y # y_max
122
+ ]
123
+ elif isinstance(value, (dict, list)):
124
+ process_object(value)
125
+ elif isinstance(obj, list):
126
+ for i, item in enumerate(obj):
127
+ if isinstance(item, (dict, list)):
128
+ process_object(item)
129
+ return obj
130
+
131
+ return process_object(json_response)
98
132
 
99
133
 
100
134
  def inference(self, input_data, apply_annotation=False, mode=None):
@@ -151,63 +185,59 @@ class MLXInference(ModelInference):
151
185
  print("Inference completed successfully")
152
186
  return response
153
187
 
188
+
154
189
  def _process_images(self, model, processor, config, file_paths, input_data, apply_annotation):
155
190
  """
156
191
  Process images and generate responses for each.
157
- If apply_annotation=True, don't resize to maintain accurate coordinates.
158
-
159
- :param model: The loaded model
160
- :param processor: The loaded processor
161
- :param config: Model configuration
162
- :param file_paths: List of image file paths
163
- :param input_data: Original input data
164
- :param apply_annotation: Flag to apply annotations
165
- :return: List of processed responses
192
+ Always resize images for memory efficiency, but scale coordinates back for annotation cases.
166
193
  """
167
194
  results = []
168
195
  for file_path in file_paths:
169
- # Load image differently based on annotation requirement
170
- if apply_annotation:
171
- # For annotation, just load the image without resizing
172
- image = load_image(file_path)
173
- # We'll skip the resize_shape parameter when generating
174
- else:
175
- # For non-annotation cases, load with potential resizing
176
- image, width, height = self.load_image_data(file_path)
177
- # We'll use resize_shape when generating
196
+ # Always get both original and resized dimensions
197
+ image, resized_width, resized_height, orig_width, orig_height = self.load_image_data(file_path)
178
198
 
179
199
  # Prepare messages based on model type
180
200
  messages = self._prepare_messages(input_data, apply_annotation)
181
201
 
182
- # Generate and process response
202
+ # Always use resize_shape for memory efficiency
183
203
  prompt = apply_chat_template(processor, config, messages)
204
+ response, _ = generate(
205
+ model,
206
+ processor,
207
+ prompt,
208
+ image,
209
+ resize_shape=(resized_width, resized_height),
210
+ max_tokens=4000,
211
+ temperature=0.0,
212
+ verbose=False
213
+ )
214
+
215
+ # Process the raw response
216
+ processed_response = self.process_response(response)
184
217
 
218
+ # Scale coordinates if apply_annotation is True and resizing was applied
185
219
  if apply_annotation:
186
- # When annotation is required, don't use resize_shape
187
- # This preserves original coordinate system
188
- response, _ = generate(
189
- model,
190
- processor,
191
- prompt,
192
- image,
193
- max_tokens=4000,
194
- temperature=0.0,
195
- verbose=False
196
- )
197
- else:
198
- # For non-annotation cases, use resize_shape for memory efficiency
199
- response, _ = generate(
200
- model,
201
- processor,
202
- prompt,
203
- image,
204
- resize_shape=(width, height),
205
- max_tokens=4000,
206
- temperature=0.0,
207
- verbose=False
208
- )
220
+ try:
221
+ # Parse the JSON response
222
+ json_response = json.loads(processed_response) if isinstance(processed_response,
223
+ str) else processed_response
224
+
225
+ # Apply scaling only if dimensions differ
226
+ if orig_width != resized_width or orig_height != resized_height:
227
+ json_response = self.scale_bbox_coordinates(
228
+ json_response,
229
+ orig_width,
230
+ orig_height,
231
+ resized_width,
232
+ resized_height
233
+ )
234
+
235
+ # Convert back to JSON string
236
+ processed_response = json.dumps(json_response, indent=2)
237
+ except (json.JSONDecodeError, TypeError) as e:
238
+ print(f"Warning: Could not scale coordinates - {e}")
239
+ # Keep the original response if JSON parsing fails
209
240
 
210
- processed_response = self.process_response(response)
211
241
  results.append(processed_response)
212
242
  print(f"Inference completed successfully for: {file_path}")
213
243
 
@@ -219,6 +249,7 @@ class MLXInference(ModelInference):
219
249
  Transform JSON schema in text_input to include value, bbox, and confidence.
220
250
  Works with formats like: "retrieve field1, field2. return response in JSON format,
221
251
  by strictly following this JSON schema: [{...}]."
252
+ Handles complex nested structures including arrays.
222
253
 
223
254
  Args:
224
255
  text_input (str): The input text containing a JSON schema
@@ -226,29 +257,57 @@ class MLXInference(ModelInference):
226
257
  Returns:
227
258
  str: Text with transformed JSON including value, bbox, and confidence
228
259
  """
260
+ # Find where the schema starts
261
+ schema_start_marker = "JSON schema:"
262
+ schema_start_pos = text_input.find(schema_start_marker)
263
+
264
+ if schema_start_pos == -1:
265
+ return text_input # Return original if marker not found
266
+
267
+ # Find the actual schema by tracking opening and closing braces
268
+ start_pos = schema_start_pos + len(schema_start_marker)
269
+
270
+ # Skip whitespace to find first opening brace or bracket
271
+ while start_pos < len(text_input) and text_input[start_pos] not in ['{', '[']:
272
+ start_pos += 1
273
+
274
+ if start_pos >= len(text_input):
275
+ return text_input # No opening brace found
276
+
277
+ # Determine if we're dealing with an object or array
278
+ is_object = text_input[start_pos] == '{'
279
+
280
+ # Now extract the full JSON schema by counting braces
281
+ open_char = '{' if is_object else '['
282
+ close_char = '}' if is_object else ']'
283
+ count = 1 # Already found one opening brace/bracket
284
+ end_pos = start_pos + 1
229
285
 
230
- schema_pattern = r'JSON schema:\s*(\[.*?\]|\{.*?\})'
231
- schema_match = re.search(schema_pattern, text_input, re.DOTALL)
286
+ while end_pos < len(text_input) and count > 0:
287
+ if text_input[end_pos] == open_char:
288
+ count += 1
289
+ elif text_input[end_pos] == close_char:
290
+ count -= 1
291
+ end_pos += 1
232
292
 
233
- if not schema_match:
234
- return text_input # Return original if pattern not found
293
+ if count != 0:
294
+ print("Warning: Unbalanced braces in JSON schema")
295
+ return text_input # Unbalanced braces, return original
235
296
 
236
- # Extract the schema part and its position
237
- schema_str = schema_match.group(1).strip()
238
- schema_start = schema_match.start(1)
239
- schema_end = schema_match.end(1)
297
+ # Extract the schema
298
+ schema_str = text_input[start_pos:end_pos]
240
299
 
241
- # Parse and transform the JSON
242
300
  try:
243
301
  # Handle single quotes if needed
244
302
  schema_str = schema_str.replace("'", '"')
245
303
 
304
+ # Parse and transform the JSON
246
305
  json_obj = json.loads(schema_str)
247
306
  transformed_json = self.transform_query_structure(json_obj)
248
307
  transformed_json_str = json.dumps(transformed_json)
249
308
 
250
309
  # Rebuild the text by replacing just the schema portion
251
- result = text_input[:schema_start] + transformed_json_str + text_input[schema_end:]
310
+ result = text_input[:start_pos] + transformed_json_str + text_input[end_pos:]
252
311
 
253
312
  return result
254
313
  except json.JSONDecodeError as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 1.0.8
3
+ Version: 1.1.0
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -1,4 +1,4 @@
1
- sparrow_parse/__init__.py,sha256=iCEPnhz-knfGRAO4Ep2uQaYf4xwhPIjjcgAcNjga8kc,21
1
+ sparrow_parse/__init__.py,sha256=XIz3qAg9G9YysQi3Ryp0CN3rtc_JiecHZ9L2vEzcM6s,21
2
2
  sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
3
3
  sparrow_parse/text_extraction.py,sha256=uhYVNK5Q2FZnw1Poa3JWjtN-aEL7cyKpvaltdn0m2II,8948
4
4
  sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -13,9 +13,9 @@ sparrow_parse/vllm/huggingface_inference.py,sha256=RqYmP-wh_cm_BZ271HbejnZe30S5E
13
13
  sparrow_parse/vllm/inference_base.py,sha256=AmWF1OUjJLxSEK_WCbcRpXHX3cKk8nPJJHha_X-9Gs4,844
14
14
  sparrow_parse/vllm/inference_factory.py,sha256=FTM65O-dW2WZchHOrNN7_Q3-FlVoAc65iSptuuUuClM,1166
15
15
  sparrow_parse/vllm/local_gpu_inference.py,sha256=SIyprv12fYawwfxgQ7ZOTM5WmMfQqhO_9vbereRpZdk,652
16
- sparrow_parse/vllm/mlx_inference.py,sha256=j4DWq6e_9iQSt7CmWuA7OD7RoXkCrxzCNq4UffBuaoQ,12882
17
- sparrow_parse-1.0.8.dist-info/METADATA,sha256=clalm_6WpyInHCLH10dyMGX4dgJrPHIXwSU9ltSFZKM,7229
18
- sparrow_parse-1.0.8.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
19
- sparrow_parse-1.0.8.dist-info/entry_points.txt,sha256=HV5nnQVtr2m-kn6hzY_ynp0zugNCcGovbmnfmQgOyhw,53
20
- sparrow_parse-1.0.8.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
21
- sparrow_parse-1.0.8.dist-info/RECORD,,
16
+ sparrow_parse/vllm/mlx_inference.py,sha256=opTNOxcTBb6McVEStDECMRcsc_3pnzKSFUmm27h08yA,15466
17
+ sparrow_parse-1.1.0.dist-info/METADATA,sha256=yq1Fmcu0rmoxIiIAUR6UK-4xqrM2x5NmVAED9-DuWIw,7229
18
+ sparrow_parse-1.1.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
19
+ sparrow_parse-1.1.0.dist-info/entry_points.txt,sha256=HV5nnQVtr2m-kn6hzY_ynp0zugNCcGovbmnfmQgOyhw,53
20
+ sparrow_parse-1.1.0.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
21
+ sparrow_parse-1.1.0.dist-info/RECORD,,