PyPI - sparrow-parse - Versions diffs - 1.0.5__tar.gz → 1.0.6__tar.gz - Mend

sparrow-parse 1.0.5tar.gz → 1.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sparrow-parse
-Version: 1.0.5
+Version: 1.0.6
 Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
 Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
 Author: Andrej Baranovskij
@@ -20,11 +20,11 @@ Requires-Dist: torchvision>=0.22.0
 Requires-Dist: torch>=2.7.0
 Requires-Dist: sentence-transformers>=4.1.0
 Requires-Dist: numpy>=2.2.5
-Requires-Dist: pypdf>=5.4.0
+Requires-Dist: pypdf>=5.5.0
 Requires-Dist: gradio_client>=1.7.2
 Requires-Dist: pdf2image>=1.17.0
-Requires-Dist: mlx>=0.25.1; sys_platform == "darwin" and platform_machine == "arm64"
-Requires-Dist: mlx-vlm==0.1.25; sys_platform == "darwin" and platform_machine == "arm64"
+Requires-Dist: mlx>=0.25.2; sys_platform == "darwin" and platform_machine == "arm64"
+Requires-Dist: mlx-vlm==0.1.26; sys_platform == "darwin" and platform_machine == "arm64"
 # Sparrow Parse

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
 setup(
     name="sparrow-parse",
-    version="1.0.5",
+    version="1.0.6",
     author="Andrej Baranovskij",
     author_email="andrejus.baranovskis@gmail.com",
     description="Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.",

sparrow-parse-1.0.6/sparrow_parse/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = '1.0.6'

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/sparrow_parse/extractors/vllm_extractor.py RENAMED Viewed

@@ -14,13 +14,14 @@ class VLLMExtractor(object):
         pass
     def run_inference(self, model_inference_instance, input_data, tables_only=False,
-                      generic_query=False, crop_size=None, debug_dir=None, debug=False, mode=None):
+                      generic_query=False, crop_size=None, apply_annotation=False, debug_dir=None, debug=False, mode=None):
         """
         Main entry point for processing input data using a model inference instance.
         Handles generic queries, PDFs, and table extraction.
         """
         if generic_query:
             input_data[0]["text_input"] = "retrieve document data. return response in JSON format"
+            apply_annotation=False
         if debug:
             print("Input data:", input_data)
@@ -37,12 +38,12 @@ class VLLMExtractor(object):
         # Document data extraction inference (file_path exists and is not None)
         file_path = input_data[0]["file_path"]
         if self.is_pdf(file_path):
-            return self._process_pdf(model_inference_instance, input_data, tables_only, crop_size, debug, debug_dir, mode)
+            return self._process_pdf(model_inference_instance, input_data, tables_only, crop_size, apply_annotation, debug, debug_dir, mode)
         else:
-            return self._process_non_pdf(model_inference_instance, input_data, tables_only, crop_size, debug, debug_dir)
+            return self._process_non_pdf(model_inference_instance, input_data, tables_only, crop_size, apply_annotation, debug, debug_dir)
-    def _process_pdf(self, model_inference_instance, input_data, tables_only, crop_size, debug, debug_dir, mode):
+    def _process_pdf(self, model_inference_instance, input_data, tables_only, crop_size, apply_annotation, debug, debug_dir, mode):
         """
         Handles processing and inference for PDF files, including page splitting and optional table extraction.
         """
@@ -50,21 +51,21 @@ class VLLMExtractor(object):
         num_pages, output_files, temp_dir = pdf_optimizer.split_pdf_to_pages(input_data[0]["file_path"],
                                                                              debug_dir, convert_to_images=True)
-        results = self._process_pages(model_inference_instance, output_files, input_data, tables_only, crop_size, debug, debug_dir)
+        results = self._process_pages(model_inference_instance, output_files, input_data, tables_only, crop_size, apply_annotation, debug, debug_dir)
         # Clean up temporary directory
         shutil.rmtree(temp_dir, ignore_errors=True)
         return results, num_pages
-    def _process_non_pdf(self, model_inference_instance, input_data, tables_only, crop_size, debug, debug_dir):
+    def _process_non_pdf(self, model_inference_instance, input_data, tables_only, crop_size, apply_annotation, debug, debug_dir):
         """
         Handles processing and inference for non-PDF files, with optional table extraction.
         """
         file_path = input_data[0]["file_path"]
         if tables_only:
-            return self._extract_tables(model_inference_instance, file_path, input_data, debug, debug_dir), 1
+            return self._extract_tables(model_inference_instance, file_path, input_data, apply_annotation, debug, debug_dir), 1
         else:
             temp_dir = tempfile.mkdtemp()
@@ -77,13 +78,13 @@ class VLLMExtractor(object):
             file_path = input_data[0]["file_path"]
             input_data[0]["file_path"] = [file_path]
-            results = model_inference_instance.inference(input_data)
+            results = model_inference_instance.inference(input_data, apply_annotation)
             shutil.rmtree(temp_dir, ignore_errors=True)
             return results, 1
-    def _process_pages(self, model_inference_instance, output_files, input_data, tables_only, crop_size, debug, debug_dir):
+    def _process_pages(self, model_inference_instance, output_files, input_data, tables_only, crop_size, apply_annotation, debug, debug_dir):
         """
         Processes individual pages (PDF split) and handles table extraction or inference.
@@ -93,6 +94,7 @@ class VLLMExtractor(object):
             input_data: Input data for inference.
             tables_only: Whether to only process tables.
             crop_size: Size for cropping image borders.
+            apply_annotation: Flag to apply annotations to the output.
             debug: Debug flag for logging.
             debug_dir: Directory for saving debug information.
@@ -106,9 +108,7 @@ class VLLMExtractor(object):
                 print(f"Processing {len(output_files)} pages for table extraction.")
             # Process each page individually for table extraction
             for i, file_path in enumerate(output_files):
-                tables_result = self._extract_tables(
-                    model_inference_instance, file_path, input_data, debug, debug_dir, page_index=i
-                )
+                tables_result = self._extract_tables( model_inference_instance, file_path, input_data, apply_annotation, debug, debug_dir, page_index=i)
                 # Since _extract_tables returns a list with one JSON string, unpack it
                 results_array.extend(tables_result)  # Unpack the single JSON string
         else:
@@ -141,7 +141,7 @@ class VLLMExtractor(object):
                 input_data[0]["file_path"] = output_files
             # Process all files at once
-            results = model_inference_instance.inference(input_data)
+            results = model_inference_instance.inference(input_data, apply_annotation)
             results_array.extend(results)
             # Clean up temporary directory
@@ -150,7 +150,7 @@ class VLLMExtractor(object):
         return results_array
-    def _extract_tables(self, model_inference_instance, file_path, input_data, debug, debug_dir, page_index=None):
+    def _extract_tables(self, model_inference_instance, file_path, input_data, apply_annotation, debug, debug_dir, page_index=None):
         """
         Detects and processes tables from an input file.
         """
@@ -175,7 +175,7 @@ class VLLMExtractor(object):
             table.save(output_filename, "JPEG")
             input_data[0]["file_path"] = [output_filename]
-            result = self._run_model_inference(model_inference_instance, input_data)
+            result = self._run_model_inference(model_inference_instance, input_data, apply_annotation)
             results_array.append(result)
         shutil.rmtree(temp_dir, ignore_errors=True)
@@ -191,11 +191,11 @@ class VLLMExtractor(object):
     @staticmethod
-    def _run_model_inference(model_inference_instance, input_data):
+    def _run_model_inference(model_inference_instance, input_data, apply_annotation):
         """
         Runs model inference and handles JSON decoding.
         """
-        result = model_inference_instance.inference(input_data)[0]
+        result = model_inference_instance.inference(input_data, apply_annotation)[0]
         try:
             return json.loads(result) if isinstance(result, str) else result
         except json.JSONDecodeError:
@@ -230,7 +230,7 @@ if __name__ == "__main__":
     # input_data = [
     #     {
     #         "file_path": "sparrow_parse/images/bonds_table.png",
-    #         "text_input": "retrieve all data. return response in JSON format"
+    #         "text_input": "retrieve [{\"instrument_name\":\"str\", \"valuation\":\"int\"}]. return response in JSON format"
     #     }
     # ]
     #
@@ -245,6 +245,7 @@ if __name__ == "__main__":
     # results_array, num_pages = extractor.run_inference(model_inference_instance, input_data, tables_only=False,
     #                                                    generic_query=False,
     #                                                    crop_size=0,
+    #                                                    apply_annotation=False,
     #                                                    debug_dir="/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/",
     #                                                    debug=True,
     #                                                    mode=None)

sparrow-parse-1.0.6/sparrow_parse/text_extraction.py ADDED Viewed

@@ -0,0 +1,216 @@
+from mlx_vlm import load, apply_chat_template, generate
+from mlx_vlm.utils import load_image
+from PIL import ImageDraw, ImageFont
+import json
+# Load model and processor
+vl_model, vl_processor = load("mlx-community/Mistral-Small-3.1-24B-Instruct-2503-8bit")
+# vl_model, vl_processor = load("mlx-community/Qwen2.5-VL-72B-Instruct-4bit")
+vl_config = vl_model.config
+image = load_image("images/bonds_table.png")
+# Qwen
+# messages = [
+#     {"role": "system", "content": "You are an expert at extracting text from images. Format your response in JSON."},
+#     {"role": "user", "content": "retrieve [{\"instrument_name\":\"str\", \"valuation\":\"int\"}]. return response in JSON format"}
+# ]
+# Qwen with bbox
+# messages = [
+#     {"role": "system", "content": "You are an expert at extracting text from images. For each item in the table, provide separate bounding boxes for each field. All coordinates should be in pixels relative to the original image. Format your response in JSON."},
+#     {"role": "user", "content": "retrieve [{\"instrument_name\":{\"value\":\"str\", \"bbox\":[\"float\", \"float\", \"float\", \"float\"], \"confidence\":\"float\"}, \"valuation\":{\"value\":\"int\", \"bbox\":[\"float\", \"float\", \"float\", \"float\"], \"confidence\":\"float\"}}]. return response in JSON format"}
+# ]
+# Qwen with bbox, get all data
+# messages = [
+#     {"role": "system", "content": "You are an expert at extracting text from images. For each item in the table, provide separate bounding boxes for each field. All coordinates should be in pixels relative to the original image. Format your response in JSON."},
+#     {"role": "user", "content": "retrieve all data. return response in JSON format. For each identified field or data element, include: 1) a descriptive field name as the object key, 2) a nested object with 'value' containing the extracted content, 'bbox' array with [x_min, y_min, x_max, y_max] coordinates in pixels, and 'confidence' score between 0-1. Example structure: [{\"field_name\":{\"value\":\"extracted value\", \"bbox\":[100, 200, 300, 250], \"confidence\":0.95}}]"}
+# ]
+# Mistral
+# message = "retrieve all data. return response in JSON format"
+message = "retrieve [{\"instrument_name\":\"str\", \"valuation\":\"int\"}]. return response in JSON format"
+# Qwen
+# prompt = apply_chat_template(vl_processor, vl_config, messages)
+# Mistral
+prompt = apply_chat_template(vl_processor, vl_config, message)
+# Generate text
+vl_output, _ = generate(
+    vl_model,
+    vl_processor,
+    prompt,
+    image,
+    max_tokens=4000,
+    temperature=0,
+    verbose=False
+)
+print(vl_output)
+# Comment out below code if non Qwen model is used
+# # Convert to a format we can draw on
+# img_draw = image.copy()
+# draw = ImageDraw.Draw(img_draw)
+#
+# # Parse the JSON result
+# results = json.loads(vl_output.strip('```json\n').strip('```'))
+#
+# # Predefined solid colors that are highly visible
+# solid_colors = [
+#     (180, 30, 40),  # Dark red
+#     (0, 100, 140),  # Dark blue
+#     (30, 120, 40),  # Dark green
+#     (140, 60, 160),  # Purple
+#     (200, 100, 0),  # Orange
+#     (100, 80, 0),  # Brown
+#     (0, 100, 100),  # Teal
+#     (120, 40, 100)  # Magenta
+# ]
+#
+# # Determine unique field keys across all items to assign consistent colors
+# unique_fields = set()
+# for item in results:
+#     unique_fields.update(item.keys())
+#
+# # Map each unique field to a color
+# field_color_map = {}
+# for i, field in enumerate(sorted(unique_fields)):
+#     field_color_map[field] = solid_colors[i % len(solid_colors)]
+#
+# # Load font with larger size
+# font_size = 20
+# try:
+#     font = ImageFont.truetype("arial.ttf", font_size)
+# except IOError:
+#     try:
+#         font = ImageFont.truetype("DejaVuSans.ttf", font_size)
+#     except IOError:
+#         try:
+#             font = ImageFont.truetype("Helvetica.ttf", font_size)
+#         except IOError:
+#             font = ImageFont.load_default()
+#
+#
+# # Helper function to measure text width
+# def get_text_dimensions(text, font):
+#     try:
+#         # Method for newer Pillow versions
+#         left, top, right, bottom = draw.textbbox((0, 0), text, font=font)
+#         return right - left, bottom - top
+#     except AttributeError:
+#         try:
+#             # Alternative method
+#             left, top, right, bottom = font.getbbox(text)
+#             return right - left, bottom - top
+#         except AttributeError:
+#             # Fallback approximation
+#             return len(text) * (font_size // 2), font_size + 2
+#
+#
+# # Draw bounding boxes for each item
+# for item in results:
+#     # Process each field
+#     for field_name, field_data in item.items():
+#         # Check if this field has the expected structure
+#         if isinstance(field_data, dict) and "bbox" in field_data and "value" in field_data:
+#             bbox = field_data["bbox"]
+#             value = field_data["value"]
+#             confidence = field_data.get("confidence", "N/A")
+#
+#             # Check if coordinates need to be scaled (normalized 0-1 values)
+#             if all(isinstance(coord, (int, float)) for coord in bbox):
+#                 if max(bbox) <= 1.0:  # Normalized coordinates
+#                     width, height = image.size
+#                     bbox = [
+#                         bbox[0] * width,
+#                         bbox[1] * height,
+#                         bbox[2] * width,
+#                         bbox[3] * height
+#                     ]
+#
+#             # Get color from the mapping we created
+#             color = field_color_map[field_name]
+#
+#             # Make sure bbox coordinates are integers
+#             bbox = [int(coord) for coord in bbox]
+#
+#             # Calculate the bbox width
+#             bbox_width = bbox[2] - bbox[0]
+#
+#             # Draw rectangle with appropriate thickness
+#             border_thickness = 3
+#             draw.rectangle(
+#                 [(bbox[0], bbox[1]), (bbox[2], bbox[3])],
+#                 outline=color,
+#                 width=border_thickness
+#             )
+#
+#             # Format the value and confidence
+#             value_str = str(value)
+#             confidence_str = f" [{confidence:.2f}]" if isinstance(confidence, (int, float)) else ""
+#             prefix = f"{field_name}: "
+#
+#             # First, try with full text without truncation
+#             full_label = prefix + value_str + confidence_str
+#             full_width, text_height = get_text_dimensions(full_label, font)
+#
+#             # Compare with a reasonable maximum display width
+#             min_display_width = 300  # Reasonable minimum width to display text
+#             max_display_width = max(bbox_width * 1.5, min_display_width)
+#
+#             # Only truncate if the full text exceeds our maximum display width
+#             if full_width > max_display_width:
+#                 # Calculate the space available for the value
+#                 prefix_width, _ = get_text_dimensions(prefix, font)
+#                 confidence_width, _ = get_text_dimensions(confidence_str, font)
+#                 available_value_width = max_display_width - prefix_width - confidence_width
+#
+#                 # Truncate the value to fit
+#                 truncated_value = value_str
+#                 for i in range(len(value_str) - 1, 3, -1):
+#                     truncated_value = value_str[:i] + "..."
+#                     temp_width, _ = get_text_dimensions(truncated_value, font)
+#                     if temp_width <= available_value_width:
+#                         break
+#
+#                 label = prefix + truncated_value + confidence_str
+#                 text_width, _ = get_text_dimensions(label, font)
+#             else:
+#                 # No truncation needed
+#                 label = full_label
+#                 text_width = full_width
+#
+#             # Position for text (above the bounding box)
+#             padding = 6
+#             text_position = (bbox[0], bbox[1] - text_height - (padding * 2))
+#
+#             # Ensure text doesn't go off the top of the image
+#             if text_position[1] < padding:
+#                 # If too close to top, position below the box instead
+#                 text_position = (bbox[0], bbox[3] + padding)
+#
+#             # Add a background rectangle with better contrast
+#             draw.rectangle(
+#                 [(text_position[0] - padding, text_position[1] - padding),
+#                  (text_position[0] + text_width + padding, text_position[1] + text_height + padding)],
+#                 fill=(255, 255, 255, 240),
+#                 outline=color,
+#                 width=2
+#             )
+#
+#             # Draw the text
+#             draw.text(
+#                 text_position,
+#                 label,
+#                 fill=color,
+#                 font=font
+#             )
+#
+# # Save the annotated image
+# output_path = "images/bonds_table_annotated.png"
+# img_draw.save(output_path)
+# print(f"Annotated image saved to {output_path}")

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/sparrow_parse/vllm/huggingface_inference.py RENAMED Viewed

@@ -26,7 +26,7 @@ class HuggingFaceInference(ModelInference):
             return output_text
-    def inference(self, input_data, mode=None):
+    def inference(self, input_data, apply_annotation=False, mode=None):
         if mode == "static":
             simple_json = self.get_simple_json()
             return [simple_json]

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/sparrow_parse/vllm/inference_base.py RENAMED Viewed

@@ -4,7 +4,7 @@ import json
 class ModelInference(ABC):
     @abstractmethod
-    def inference(self, input_data, mode=None):
+    def inference(self, input_data, apply_annotation=False, mode=None):
         """This method should be implemented by subclasses."""
         pass

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/sparrow_parse/vllm/local_gpu_inference.py RENAMED Viewed

@@ -8,7 +8,7 @@ class LocalGPUInference(ModelInference):
         self.device = device
         self.model.to(self.device)
-    def inference(self, input_data, mode=None):
+    def inference(self, input_data, apply_annotation=False, mode=None):
         self.model.eval()  # Set the model to evaluation mode
         with torch.no_grad():  # No need to calculate gradients
             input_tensor = torch.tensor(input_data).to(self.device)

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/sparrow_parse/vllm/mlx_inference.py RENAMED Viewed

@@ -3,7 +3,7 @@ from mlx_vlm.prompt_utils import apply_chat_template
 from mlx_vlm.utils import load_image
 from sparrow_parse.vllm.inference_base import ModelInference
 import os
-import json
+import json, re
 from rich import print
@@ -98,11 +98,12 @@ class MLXInference(ModelInference):
         return image, width, height
-    def inference(self, input_data, mode=None):
+    def inference(self, input_data, apply_annotation=False, mode=None):
         """
         Perform inference on input data using the specified model.
         :param input_data: A list of dictionaries containing image file paths and text inputs.
+        :param apply_annotation: Optional flag to apply annotations to the output.
         :param mode: Optional mode for inference ("static" for simple JSON output).
         :return: List of processed model responses.
         """
@@ -125,7 +126,7 @@ class MLXInference(ModelInference):
         else:
             # Image-based inference
             file_paths = self._extract_file_paths(input_data)
-            results = self._process_images(model, processor, config, file_paths, input_data)
+            results = self._process_images(model, processor, config, file_paths, input_data, apply_annotation)
         return results
@@ -151,7 +152,7 @@ class MLXInference(ModelInference):
         print("Inference completed successfully")
         return response
-    def _process_images(self, model, processor, config, file_paths, input_data):
+    def _process_images(self, model, processor, config, file_paths, input_data, apply_annotation):
         """
         Process images and generate responses for each.
@@ -160,6 +161,7 @@ class MLXInference(ModelInference):
         :param config: Model configuration
         :param file_paths: List of image file paths
         :param input_data: Original input data
+        :param apply_annotation: Flag to apply annotations
         :return: List of processed responses
         """
         results = []
@@ -167,11 +169,11 @@ class MLXInference(ModelInference):
             image, width, height = self.load_image_data(file_path)
             # Prepare messages based on model type
-            messages = self._prepare_messages(input_data, file_path)
+            messages = self._prepare_messages(input_data, apply_annotation)
             # Generate and process response
             prompt = apply_chat_template(processor, config, messages)
-            response = generate(
+            response, _ = generate(
                 model,
                 processor,
                 prompt,
@@ -186,21 +188,104 @@ class MLXInference(ModelInference):
         return results
-    def _prepare_messages(self, input_data, file_path):
+    def transform_query_with_bbox(self, text_input):
+        """
+        Transform JSON schema in text_input to include value, bbox, and confidence.
+        Works with both array and object JSON structures.
+        Args:
+            text_input (str): The input text containing a JSON schema
+        Returns:
+            str: Text with transformed JSON including value, bbox, and confidence
+        """
+        # Split text into parts - find the JSON portion between "retrieve" and "return response"
+        retrieve_pattern = r'retrieve\s+'
+        return_pattern = r'\.\s+return\s+response'
+        retrieve_match = re.search(retrieve_pattern, text_input)
+        return_match = re.search(return_pattern, text_input)
+        if not retrieve_match or not return_match:
+            return text_input  # Return original if pattern not found
+        json_start = retrieve_match.end()
+        json_end = return_match.start()
+        prefix = text_input[:json_start]
+        json_str = text_input[json_start:json_end].strip()
+        suffix = text_input[json_end:]
+        # Parse and transform the JSON
+        try:
+            # Handle single quotes if needed
+            json_str = json_str.replace("'", '"')
+            json_obj = json.loads(json_str)
+            transformed_json = self.transform_query_structure(json_obj)
+            transformed_json_str = json.dumps(transformed_json)
+            # Rebuild the text
+            result = prefix + transformed_json_str + suffix
+            return result
+        except json.JSONDecodeError as e:
+            print(f"Error parsing JSON: {e}")
+            return text_input  # Return original if parsing fails
+    def transform_query_structure(self, json_obj):
+        """
+        Transform each field in the JSON structure to include value, bbox, and confidence.
+        Handles both array and object formats recursively.
+        """
+        if isinstance(json_obj, list):
+            # Handle array format
+            return [self.transform_query_structure(item) for item in json_obj]
+        elif isinstance(json_obj, dict):
+            # Handle object format
+            result = {}
+            for key, value in json_obj.items():
+                if isinstance(value, (dict, list)):
+                    # Recursively transform nested objects or arrays
+                    result[key] = self.transform_query_structure(value)
+                else:
+                    # Transform simple value to object with value, bbox, and confidence
+                    result[key] = {
+                        "value": value,
+                        "bbox": ["float", "float", "float", "float"],
+                        "confidence": "float"
+                    }
+            return result
+        else:
+            # For primitive values, no transformation needed
+            return json_obj
+    def _prepare_messages(self, input_data, apply_annotation):
         """
         Prepare the appropriate messages based on the model type.
         :param input_data: Original input data
-        :param file_path: Current file path being processed
+        :param apply_annotation: Flag to apply annotations
         :return: Properly formatted messages
         """
         if "mistral" in self.model_name.lower():
             return input_data[0]["text_input"]
-        else:
+        elif "qwen" in self.model_name.lower():
+            if apply_annotation:
+                system_prompt = {"role": "system", "content": "You are an expert at extracting text from images. "
+                                                              "For each item in the table, provide separate bounding boxes for each field. "
+                                                              "All coordinates should be in pixels relative to the original image. Format your response in JSON."}
+                user_prompt = {"role": "user", "content": self.transform_query_with_bbox(input_data[0]["text_input"])}
+                return [system_prompt, user_prompt]
             return [
                 {"role": "system", "content": "You are an expert at extracting structured text from image documents."},
                 {"role": "user", "content": input_data[0]["text_input"]},
             ]
+        else:
+            raise ValueError("Unsupported model type. Please use either Mistral or Qwen.")
     @staticmethod
     def _extract_file_paths(input_data):

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/sparrow_parse.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sparrow-parse
-Version: 1.0.5
+Version: 1.0.6
 Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
 Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
 Author: Andrej Baranovskij
@@ -20,11 +20,11 @@ Requires-Dist: torchvision>=0.22.0
 Requires-Dist: torch>=2.7.0
 Requires-Dist: sentence-transformers>=4.1.0
 Requires-Dist: numpy>=2.2.5
-Requires-Dist: pypdf>=5.4.0
+Requires-Dist: pypdf>=5.5.0
 Requires-Dist: gradio_client>=1.7.2
 Requires-Dist: pdf2image>=1.17.0
-Requires-Dist: mlx>=0.25.1; sys_platform == "darwin" and platform_machine == "arm64"
-Requires-Dist: mlx-vlm==0.1.25; sys_platform == "darwin" and platform_machine == "arm64"
+Requires-Dist: mlx>=0.25.2; sys_platform == "darwin" and platform_machine == "arm64"
+Requires-Dist: mlx-vlm==0.1.26; sys_platform == "darwin" and platform_machine == "arm64"
 # Sparrow Parse

{sparrow-parse-1.0.5 → sparrow-parse-1.0.6}/sparrow_parse.egg-info/requires.txt RENAMED Viewed

@@ -4,10 +4,10 @@ torchvision>=0.22.0
 torch>=2.7.0
 sentence-transformers>=4.1.0
 numpy>=2.2.5
-pypdf>=5.4.0
+pypdf>=5.5.0
 gradio_client>=1.7.2
 pdf2image>=1.17.0
 [:sys_platform == "darwin" and platform_machine == "arm64"]
-mlx>=0.25.1
-mlx-vlm==0.1.25
+mlx>=0.25.2
+mlx-vlm==0.1.26

sparrow-parse-1.0.5/sparrow_parse/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = '1.0.5'

sparrow-parse-1.0.5/sparrow_parse/text_extraction.py DELETED Viewed

@@ -1,35 +0,0 @@
-from mlx_vlm import load, apply_chat_template, generate
-from mlx_vlm.utils import load_image
-# Load model and processor
-# vl_model, vl_processor = load("mlx-community/Mistral-Small-3.1-24B-Instruct-2503-8bit")
-vl_model, vl_processor = load("mlx-community/Qwen2.5-VL-7B-Instruct-8bit")
-vl_config = vl_model.config
-image = load_image("images/bonds_table.png")
-messages = [
-    {"role": "system", "content": "You are an expert at extracting text from images. Format your response in json."},
-    {"role": "user", "content": "retrieve [{\"instrument_name\":\"str\", \"valuation\":\"int\"}]. return response in JSON format"}
-]
-# message = "retrieve all data. return response in JSON format"
-# message = "retrieve [{\"instrument_name\":\"str\", \"valuation\":\"int\"}]. return response in JSON format"
-# Apply chat template
-prompt = apply_chat_template(vl_processor, vl_config, messages)
-# prompt = apply_chat_template(vl_processor, vl_config, message)
-# Generate text
-vl_output = generate(
-    vl_model,
-    vl_processor,
-    prompt,
-    image,
-    max_tokens=1000,
-    temperature=0,
-    verbose=False
-)
-print(vl_output)