PyPI - sparrow-parse - Versions diffs - 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl - Mend

sparrow-parse 1.0.8py3-none-any.whl → 1.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sparrow_parse/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '1.0.8'
1	+ __version__ = '1.0.9'

sparrow_parse/vllm/mlx_inference.py CHANGED Viewed

@@ -75,26 +75,60 @@ class MLXInference(ModelInference):
             print(f"Failed to parse JSON: {e}")
             return output_text
     def load_image_data(self, image_filepath, max_width=1250, max_height=1750):
         """
         Load and resize image while maintaining its aspect ratio.
-        :param image_filepath: Path to the image file.
-        :param max_width: Maximum allowed width of the image.
-        :param max_height: Maximum allowed height of the image.
-        :return: Tuple containing the image object and its new dimensions.
+        Returns both original and resized dimensions for coordinate mapping.
         """
-        image = load_image(image_filepath)  # Assuming load_image is defined elsewhere
-        width, height = image.size
+        image = load_image(image_filepath)
+        orig_width, orig_height = image.size
         # Calculate new dimensions while maintaining the aspect ratio
-        if width > max_width or height > max_height:
-            aspect_ratio = width / height
+        if orig_width > max_width or orig_height > max_height:
+            aspect_ratio = orig_width / orig_height
             new_width = min(max_width, int(max_height * aspect_ratio))
             new_height = min(max_height, int(max_width / aspect_ratio))
-            return image, new_width, new_height
+            return image, new_width, new_height, orig_width, orig_height
+        # No resize needed, original dimensions are used
+        return image, orig_width, orig_height, orig_width, orig_height
-        return image, width, height
+    def scale_bbox_coordinates(self, json_response, orig_width, orig_height, resized_width, resized_height):
+        """
+        Scale bbox coordinates from resized image dimensions back to original image dimensions.
+        Only used when apply_annotation=True.
+        """
+        # Calculate scale factors
+        scale_x = orig_width / resized_width
+        scale_y = orig_height / resized_height
+        # No scaling needed if dimensions are the same
+        if scale_x == 1 and scale_y == 1:
+            return json_response
+        # Helper function to recursively process JSON objects
+        def process_object(obj):
+            if isinstance(obj, dict):
+                for key, value in obj.items():
+                    if key == "bbox" and isinstance(value, list) and len(value) == 4:
+                        # Scale the bbox coordinates
+                        obj[key] = [
+                            value[0] * scale_x,  # x_min
+                            value[1] * scale_y,  # y_min
+                            value[2] * scale_x,  # x_max
+                            value[3] * scale_y  # y_max
+                        ]
+                    elif isinstance(value, (dict, list)):
+                        process_object(value)
+            elif isinstance(obj, list):
+                for i, item in enumerate(obj):
+                    if isinstance(item, (dict, list)):
+                        process_object(item)
+            return obj
+        return process_object(json_response)
     def inference(self, input_data, apply_annotation=False, mode=None):
@@ -151,63 +185,59 @@ class MLXInference(ModelInference):
         print("Inference completed successfully")
         return response
     def _process_images(self, model, processor, config, file_paths, input_data, apply_annotation):
         """
         Process images and generate responses for each.
-        If apply_annotation=True, don't resize to maintain accurate coordinates.
-        :param model: The loaded model
-        :param processor: The loaded processor
-        :param config: Model configuration
-        :param file_paths: List of image file paths
-        :param input_data: Original input data
-        :param apply_annotation: Flag to apply annotations
-        :return: List of processed responses
+        Always resize images for memory efficiency, but scale coordinates back for annotation cases.
         """
         results = []
         for file_path in file_paths:
-            # Load image differently based on annotation requirement
-            if apply_annotation:
-                # For annotation, just load the image without resizing
-                image = load_image(file_path)
-                # We'll skip the resize_shape parameter when generating
-            else:
-                # For non-annotation cases, load with potential resizing
-                image, width, height = self.load_image_data(file_path)
-                # We'll use resize_shape when generating
+            # Always get both original and resized dimensions
+            image, resized_width, resized_height, orig_width, orig_height = self.load_image_data(file_path)
             # Prepare messages based on model type
             messages = self._prepare_messages(input_data, apply_annotation)
-            # Generate and process response
+            # Always use resize_shape for memory efficiency
             prompt = apply_chat_template(processor, config, messages)
+            response, _ = generate(
+                model,
+                processor,
+                prompt,
+                image,
+                resize_shape=(resized_width, resized_height),
+                max_tokens=4000,
+                temperature=0.0,
+                verbose=False
+            )
+            # Process the raw response
+            processed_response = self.process_response(response)
+            # Scale coordinates if apply_annotation is True and resizing was applied
             if apply_annotation:
-                # When annotation is required, don't use resize_shape
-                # This preserves original coordinate system
-                response, _ = generate(
-                    model,
-                    processor,
-                    prompt,
-                    image,
-                    max_tokens=4000,
-                    temperature=0.0,
-                    verbose=False
-                )
-            else:
-                # For non-annotation cases, use resize_shape for memory efficiency
-                response, _ = generate(
-                    model,
-                    processor,
-                    prompt,
-                    image,
-                    resize_shape=(width, height),
-                    max_tokens=4000,
-                    temperature=0.0,
-                    verbose=False
-                )
+                try:
+                    # Parse the JSON response
+                    json_response = json.loads(processed_response) if isinstance(processed_response,
+                                                                                 str) else processed_response
+                    # Apply scaling only if dimensions differ
+                    if orig_width != resized_width or orig_height != resized_height:
+                        json_response = self.scale_bbox_coordinates(
+                            json_response,
+                            orig_width,
+                            orig_height,
+                            resized_width,
+                            resized_height
+                        )
+                    # Convert back to JSON string
+                    processed_response = json.dumps(json_response, indent=2)
+                except (json.JSONDecodeError, TypeError) as e:
+                    print(f"Warning: Could not scale coordinates - {e}")
+                    # Keep the original response if JSON parsing fails
-            processed_response = self.process_response(response)
             results.append(processed_response)
             print(f"Inference completed successfully for: {file_path}")

{sparrow_parse-1.0.8.dist-info → sparrow_parse-1.0.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sparrow-parse
-Version: 1.0.8
+Version: 1.0.9
 Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
 Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
 Author: Andrej Baranovskij

{sparrow_parse-1.0.8.dist-info → sparrow_parse-1.0.9.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-sparrow_parse/__init__.py,sha256=iCEPnhz-knfGRAO4Ep2uQaYf4xwhPIjjcgAcNjga8kc,21
+sparrow_parse/__init__.py,sha256=mu4LFHUATfA8gJh1jUgMKeIXEjlhbQqTC5UyMGu-2Gs,21
 sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
 sparrow_parse/text_extraction.py,sha256=uhYVNK5Q2FZnw1Poa3JWjtN-aEL7cyKpvaltdn0m2II,8948
 sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -13,9 +13,9 @@ sparrow_parse/vllm/huggingface_inference.py,sha256=RqYmP-wh_cm_BZ271HbejnZe30S5E
 sparrow_parse/vllm/inference_base.py,sha256=AmWF1OUjJLxSEK_WCbcRpXHX3cKk8nPJJHha_X-9Gs4,844
 sparrow_parse/vllm/inference_factory.py,sha256=FTM65O-dW2WZchHOrNN7_Q3-FlVoAc65iSptuuUuClM,1166
 sparrow_parse/vllm/local_gpu_inference.py,sha256=SIyprv12fYawwfxgQ7ZOTM5WmMfQqhO_9vbereRpZdk,652
-sparrow_parse/vllm/mlx_inference.py,sha256=j4DWq6e_9iQSt7CmWuA7OD7RoXkCrxzCNq4UffBuaoQ,12882
-sparrow_parse-1.0.8.dist-info/METADATA,sha256=clalm_6WpyInHCLH10dyMGX4dgJrPHIXwSU9ltSFZKM,7229
-sparrow_parse-1.0.8.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-sparrow_parse-1.0.8.dist-info/entry_points.txt,sha256=HV5nnQVtr2m-kn6hzY_ynp0zugNCcGovbmnfmQgOyhw,53
-sparrow_parse-1.0.8.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
-sparrow_parse-1.0.8.dist-info/RECORD,,
+sparrow_parse/vllm/mlx_inference.py,sha256=NGh-_pQtQAVaCqxxnpaxznSIl61_8znNdKwBtGgYdvk,14331
+sparrow_parse-1.0.9.dist-info/METADATA,sha256=u9kO5dH-ow82NHgmrRalJDRKzvznYSZFwEEy_4Z_Ofw,7229
+sparrow_parse-1.0.9.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
+sparrow_parse-1.0.9.dist-info/entry_points.txt,sha256=HV5nnQVtr2m-kn6hzY_ynp0zugNCcGovbmnfmQgOyhw,53
+sparrow_parse-1.0.9.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
+sparrow_parse-1.0.9.dist-info/RECORD,,

{sparrow_parse-1.0.8.dist-info → sparrow_parse-1.0.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{sparrow_parse-1.0.8.dist-info → sparrow_parse-1.0.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sparrow_parse-1.0.8.dist-info → sparrow_parse-1.0.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

sparrow-parse 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl

sparrow-parse 1.0.8py3-none-any.whl → 1.0.9py3-none-any.whl