sparrow-parse 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sparrow_parse/__init__.py +1 -1
- sparrow_parse/vllm/mlx_inference.py +123 -64
- {sparrow_parse-1.0.8.dist-info → sparrow_parse-1.1.0.dist-info}/METADATA +1 -1
- {sparrow_parse-1.0.8.dist-info → sparrow_parse-1.1.0.dist-info}/RECORD +7 -7
- {sparrow_parse-1.0.8.dist-info → sparrow_parse-1.1.0.dist-info}/WHEEL +0 -0
- {sparrow_parse-1.0.8.dist-info → sparrow_parse-1.1.0.dist-info}/entry_points.txt +0 -0
- {sparrow_parse-1.0.8.dist-info → sparrow_parse-1.1.0.dist-info}/top_level.txt +0 -0
sparrow_parse/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = '1.0
|
1
|
+
__version__ = '1.1.0'
|
@@ -75,26 +75,60 @@ class MLXInference(ModelInference):
|
|
75
75
|
print(f"Failed to parse JSON: {e}")
|
76
76
|
return output_text
|
77
77
|
|
78
|
+
|
78
79
|
def load_image_data(self, image_filepath, max_width=1250, max_height=1750):
|
79
80
|
"""
|
80
81
|
Load and resize image while maintaining its aspect ratio.
|
81
|
-
|
82
|
-
:param image_filepath: Path to the image file.
|
83
|
-
:param max_width: Maximum allowed width of the image.
|
84
|
-
:param max_height: Maximum allowed height of the image.
|
85
|
-
:return: Tuple containing the image object and its new dimensions.
|
82
|
+
Returns both original and resized dimensions for coordinate mapping.
|
86
83
|
"""
|
87
|
-
image = load_image(image_filepath)
|
88
|
-
|
84
|
+
image = load_image(image_filepath)
|
85
|
+
orig_width, orig_height = image.size
|
89
86
|
|
90
87
|
# Calculate new dimensions while maintaining the aspect ratio
|
91
|
-
if
|
92
|
-
aspect_ratio =
|
88
|
+
if orig_width > max_width or orig_height > max_height:
|
89
|
+
aspect_ratio = orig_width / orig_height
|
93
90
|
new_width = min(max_width, int(max_height * aspect_ratio))
|
94
91
|
new_height = min(max_height, int(max_width / aspect_ratio))
|
95
|
-
return image, new_width, new_height
|
92
|
+
return image, new_width, new_height, orig_width, orig_height
|
93
|
+
|
94
|
+
# No resize needed, original dimensions are used
|
95
|
+
return image, orig_width, orig_height, orig_width, orig_height
|
96
96
|
|
97
|
-
|
97
|
+
|
98
|
+
def scale_bbox_coordinates(self, json_response, orig_width, orig_height, resized_width, resized_height):
|
99
|
+
"""
|
100
|
+
Scale bbox coordinates from resized image dimensions back to original image dimensions.
|
101
|
+
Only used when apply_annotation=True.
|
102
|
+
"""
|
103
|
+
# Calculate scale factors
|
104
|
+
scale_x = orig_width / resized_width
|
105
|
+
scale_y = orig_height / resized_height
|
106
|
+
|
107
|
+
# No scaling needed if dimensions are the same
|
108
|
+
if scale_x == 1 and scale_y == 1:
|
109
|
+
return json_response
|
110
|
+
|
111
|
+
# Helper function to recursively process JSON objects
|
112
|
+
def process_object(obj):
|
113
|
+
if isinstance(obj, dict):
|
114
|
+
for key, value in obj.items():
|
115
|
+
if key == "bbox" and isinstance(value, list) and len(value) == 4:
|
116
|
+
# Scale the bbox coordinates
|
117
|
+
obj[key] = [
|
118
|
+
value[0] * scale_x, # x_min
|
119
|
+
value[1] * scale_y, # y_min
|
120
|
+
value[2] * scale_x, # x_max
|
121
|
+
value[3] * scale_y # y_max
|
122
|
+
]
|
123
|
+
elif isinstance(value, (dict, list)):
|
124
|
+
process_object(value)
|
125
|
+
elif isinstance(obj, list):
|
126
|
+
for i, item in enumerate(obj):
|
127
|
+
if isinstance(item, (dict, list)):
|
128
|
+
process_object(item)
|
129
|
+
return obj
|
130
|
+
|
131
|
+
return process_object(json_response)
|
98
132
|
|
99
133
|
|
100
134
|
def inference(self, input_data, apply_annotation=False, mode=None):
|
@@ -151,63 +185,59 @@ class MLXInference(ModelInference):
|
|
151
185
|
print("Inference completed successfully")
|
152
186
|
return response
|
153
187
|
|
188
|
+
|
154
189
|
def _process_images(self, model, processor, config, file_paths, input_data, apply_annotation):
|
155
190
|
"""
|
156
191
|
Process images and generate responses for each.
|
157
|
-
|
158
|
-
|
159
|
-
:param model: The loaded model
|
160
|
-
:param processor: The loaded processor
|
161
|
-
:param config: Model configuration
|
162
|
-
:param file_paths: List of image file paths
|
163
|
-
:param input_data: Original input data
|
164
|
-
:param apply_annotation: Flag to apply annotations
|
165
|
-
:return: List of processed responses
|
192
|
+
Always resize images for memory efficiency, but scale coordinates back for annotation cases.
|
166
193
|
"""
|
167
194
|
results = []
|
168
195
|
for file_path in file_paths:
|
169
|
-
#
|
170
|
-
|
171
|
-
# For annotation, just load the image without resizing
|
172
|
-
image = load_image(file_path)
|
173
|
-
# We'll skip the resize_shape parameter when generating
|
174
|
-
else:
|
175
|
-
# For non-annotation cases, load with potential resizing
|
176
|
-
image, width, height = self.load_image_data(file_path)
|
177
|
-
# We'll use resize_shape when generating
|
196
|
+
# Always get both original and resized dimensions
|
197
|
+
image, resized_width, resized_height, orig_width, orig_height = self.load_image_data(file_path)
|
178
198
|
|
179
199
|
# Prepare messages based on model type
|
180
200
|
messages = self._prepare_messages(input_data, apply_annotation)
|
181
201
|
|
182
|
-
#
|
202
|
+
# Always use resize_shape for memory efficiency
|
183
203
|
prompt = apply_chat_template(processor, config, messages)
|
204
|
+
response, _ = generate(
|
205
|
+
model,
|
206
|
+
processor,
|
207
|
+
prompt,
|
208
|
+
image,
|
209
|
+
resize_shape=(resized_width, resized_height),
|
210
|
+
max_tokens=4000,
|
211
|
+
temperature=0.0,
|
212
|
+
verbose=False
|
213
|
+
)
|
214
|
+
|
215
|
+
# Process the raw response
|
216
|
+
processed_response = self.process_response(response)
|
184
217
|
|
218
|
+
# Scale coordinates if apply_annotation is True and resizing was applied
|
185
219
|
if apply_annotation:
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
temperature=0.0,
|
207
|
-
verbose=False
|
208
|
-
)
|
220
|
+
try:
|
221
|
+
# Parse the JSON response
|
222
|
+
json_response = json.loads(processed_response) if isinstance(processed_response,
|
223
|
+
str) else processed_response
|
224
|
+
|
225
|
+
# Apply scaling only if dimensions differ
|
226
|
+
if orig_width != resized_width or orig_height != resized_height:
|
227
|
+
json_response = self.scale_bbox_coordinates(
|
228
|
+
json_response,
|
229
|
+
orig_width,
|
230
|
+
orig_height,
|
231
|
+
resized_width,
|
232
|
+
resized_height
|
233
|
+
)
|
234
|
+
|
235
|
+
# Convert back to JSON string
|
236
|
+
processed_response = json.dumps(json_response, indent=2)
|
237
|
+
except (json.JSONDecodeError, TypeError) as e:
|
238
|
+
print(f"Warning: Could not scale coordinates - {e}")
|
239
|
+
# Keep the original response if JSON parsing fails
|
209
240
|
|
210
|
-
processed_response = self.process_response(response)
|
211
241
|
results.append(processed_response)
|
212
242
|
print(f"Inference completed successfully for: {file_path}")
|
213
243
|
|
@@ -219,6 +249,7 @@ class MLXInference(ModelInference):
|
|
219
249
|
Transform JSON schema in text_input to include value, bbox, and confidence.
|
220
250
|
Works with formats like: "retrieve field1, field2. return response in JSON format,
|
221
251
|
by strictly following this JSON schema: [{...}]."
|
252
|
+
Handles complex nested structures including arrays.
|
222
253
|
|
223
254
|
Args:
|
224
255
|
text_input (str): The input text containing a JSON schema
|
@@ -226,29 +257,57 @@ class MLXInference(ModelInference):
|
|
226
257
|
Returns:
|
227
258
|
str: Text with transformed JSON including value, bbox, and confidence
|
228
259
|
"""
|
260
|
+
# Find where the schema starts
|
261
|
+
schema_start_marker = "JSON schema:"
|
262
|
+
schema_start_pos = text_input.find(schema_start_marker)
|
263
|
+
|
264
|
+
if schema_start_pos == -1:
|
265
|
+
return text_input # Return original if marker not found
|
266
|
+
|
267
|
+
# Find the actual schema by tracking opening and closing braces
|
268
|
+
start_pos = schema_start_pos + len(schema_start_marker)
|
269
|
+
|
270
|
+
# Skip whitespace to find first opening brace or bracket
|
271
|
+
while start_pos < len(text_input) and text_input[start_pos] not in ['{', '[']:
|
272
|
+
start_pos += 1
|
273
|
+
|
274
|
+
if start_pos >= len(text_input):
|
275
|
+
return text_input # No opening brace found
|
276
|
+
|
277
|
+
# Determine if we're dealing with an object or array
|
278
|
+
is_object = text_input[start_pos] == '{'
|
279
|
+
|
280
|
+
# Now extract the full JSON schema by counting braces
|
281
|
+
open_char = '{' if is_object else '['
|
282
|
+
close_char = '}' if is_object else ']'
|
283
|
+
count = 1 # Already found one opening brace/bracket
|
284
|
+
end_pos = start_pos + 1
|
229
285
|
|
230
|
-
|
231
|
-
|
286
|
+
while end_pos < len(text_input) and count > 0:
|
287
|
+
if text_input[end_pos] == open_char:
|
288
|
+
count += 1
|
289
|
+
elif text_input[end_pos] == close_char:
|
290
|
+
count -= 1
|
291
|
+
end_pos += 1
|
232
292
|
|
233
|
-
if
|
234
|
-
|
293
|
+
if count != 0:
|
294
|
+
print("Warning: Unbalanced braces in JSON schema")
|
295
|
+
return text_input # Unbalanced braces, return original
|
235
296
|
|
236
|
-
# Extract the schema
|
237
|
-
schema_str =
|
238
|
-
schema_start = schema_match.start(1)
|
239
|
-
schema_end = schema_match.end(1)
|
297
|
+
# Extract the schema
|
298
|
+
schema_str = text_input[start_pos:end_pos]
|
240
299
|
|
241
|
-
# Parse and transform the JSON
|
242
300
|
try:
|
243
301
|
# Handle single quotes if needed
|
244
302
|
schema_str = schema_str.replace("'", '"')
|
245
303
|
|
304
|
+
# Parse and transform the JSON
|
246
305
|
json_obj = json.loads(schema_str)
|
247
306
|
transformed_json = self.transform_query_structure(json_obj)
|
248
307
|
transformed_json_str = json.dumps(transformed_json)
|
249
308
|
|
250
309
|
# Rebuild the text by replacing just the schema portion
|
251
|
-
result = text_input[:
|
310
|
+
result = text_input[:start_pos] + transformed_json_str + text_input[end_pos:]
|
252
311
|
|
253
312
|
return result
|
254
313
|
except json.JSONDecodeError as e:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sparrow-parse
|
3
|
-
Version: 1.0
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
|
5
5
|
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
6
|
Author: Andrej Baranovskij
|
@@ -1,4 +1,4 @@
|
|
1
|
-
sparrow_parse/__init__.py,sha256=
|
1
|
+
sparrow_parse/__init__.py,sha256=XIz3qAg9G9YysQi3Ryp0CN3rtc_JiecHZ9L2vEzcM6s,21
|
2
2
|
sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
|
3
3
|
sparrow_parse/text_extraction.py,sha256=uhYVNK5Q2FZnw1Poa3JWjtN-aEL7cyKpvaltdn0m2II,8948
|
4
4
|
sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -13,9 +13,9 @@ sparrow_parse/vllm/huggingface_inference.py,sha256=RqYmP-wh_cm_BZ271HbejnZe30S5E
|
|
13
13
|
sparrow_parse/vllm/inference_base.py,sha256=AmWF1OUjJLxSEK_WCbcRpXHX3cKk8nPJJHha_X-9Gs4,844
|
14
14
|
sparrow_parse/vllm/inference_factory.py,sha256=FTM65O-dW2WZchHOrNN7_Q3-FlVoAc65iSptuuUuClM,1166
|
15
15
|
sparrow_parse/vllm/local_gpu_inference.py,sha256=SIyprv12fYawwfxgQ7ZOTM5WmMfQqhO_9vbereRpZdk,652
|
16
|
-
sparrow_parse/vllm/mlx_inference.py,sha256=
|
17
|
-
sparrow_parse-1.0.
|
18
|
-
sparrow_parse-1.0.
|
19
|
-
sparrow_parse-1.0.
|
20
|
-
sparrow_parse-1.0.
|
21
|
-
sparrow_parse-1.0.
|
16
|
+
sparrow_parse/vllm/mlx_inference.py,sha256=opTNOxcTBb6McVEStDECMRcsc_3pnzKSFUmm27h08yA,15466
|
17
|
+
sparrow_parse-1.1.0.dist-info/METADATA,sha256=yq1Fmcu0rmoxIiIAUR6UK-4xqrM2x5NmVAED9-DuWIw,7229
|
18
|
+
sparrow_parse-1.1.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
19
|
+
sparrow_parse-1.1.0.dist-info/entry_points.txt,sha256=HV5nnQVtr2m-kn6hzY_ynp0zugNCcGovbmnfmQgOyhw,53
|
20
|
+
sparrow_parse-1.1.0.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
|
21
|
+
sparrow_parse-1.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|