PyPI - sparrow-parse - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

sparrow-parse 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sparrow_parse/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '0.4.1'
1	+ __version__ = '0.4.3'

sparrow_parse/extractors/vllm_extractor.py CHANGED Viewed

@@ -54,7 +54,7 @@ class VLLMExtractor(object):
         """
         file_path = input_data[0]["file_path"]
         if tables_only:
-            return [self._extract_tables(model_inference_instance, file_path, input_data, debug, debug_dir)], 1
+            return self._extract_tables(model_inference_instance, file_path, input_data, debug, debug_dir), 1
         else:
             input_data[0]["file_path"] = [file_path]
             results = model_inference_instance.inference(input_data)
@@ -85,7 +85,8 @@ class VLLMExtractor(object):
                 tables_result = self._extract_tables(
                     model_inference_instance, file_path, input_data, debug, debug_dir, page_index=i
                 )
-                results_array.append(tables_result)
+                # Since _extract_tables returns a list with one JSON string, unpack it
+                results_array.extend(tables_result)  # Unpack the single JSON string
         else:
             if debug:
                 print(f"Processing {len(output_files)} pages for inference at once.")
@@ -118,7 +119,15 @@ class VLLMExtractor(object):
             results_array.append(result)
         shutil.rmtree(temp_dir, ignore_errors=True)
-        return json.dumps(results_array, indent=4)
+        # Merge results_array elements into a single JSON structure
+        merged_results = {"page_tables": results_array}
+        # Format the merged results as a JSON string with indentation
+        formatted_results = json.dumps(merged_results, indent=4)
+        # Return the formatted JSON string wrapped in a list
+        return [formatted_results]
     @staticmethod
@@ -166,7 +175,7 @@ if __name__ == "__main__":
     # ]
     #
     # # Now you can run inference without knowing which implementation is used
-    # results_array, num_pages = extractor.run_inference(model_inference_instance, input_data, tables_only=False,
+    # results_array, num_pages = extractor.run_inference(model_inference_instance, input_data, tables_only=True,
     #                                                    generic_query=False,
     #                                                    debug_dir="/Users/andrejb/Work/katana-git/sparrow/sparrow-ml/llm/data/",
     #                                                    debug=True,

sparrow_parse/vllm/mlx_inference.py CHANGED Viewed

@@ -14,14 +14,12 @@ class MLXInference(ModelInference):
     def __init__(self, model_name):
         """
-        Initialize the inference class with the given model name and load the model once.
+        Initialize the inference class with the given model name.
         :param model_name: Name of the model to load.
         """
-        self.model, self.processor = self._load_model_and_processor(model_name)
-        self.config = self.model.config
-        print(f"Loaded model: {model_name}")
+        self.model_name = model_name
+        print(f"MLXInference initialized with model: {model_name}")
     @staticmethod
@@ -89,6 +87,10 @@ class MLXInference(ModelInference):
         if mode == "static":
             return [self.get_simple_json()]
+        # Load the model and processor
+        model, processor = self._load_model_and_processor(self.model_name)
+        config = model.config
         # Prepare absolute file paths
         file_paths = self._extract_file_paths(input_data)
@@ -103,10 +105,10 @@ class MLXInference(ModelInference):
             ]
             # Generate and process response
-            prompt = apply_chat_template(self.processor, self.config, messages)  # Assuming defined
+            prompt = apply_chat_template(processor, config, messages)  # Assuming defined
             response = generate(
-                self.model,
-                self.processor,
+                model,
+                processor,
                 image,
                 prompt,
                 resize_shape=(width, height),

{sparrow_parse-0.4.1.dist-info → sparrow_parse-0.4.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sparrow-parse
-Version: 0.4.1
+Version: 0.4.3
 Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
 Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
 Author: Andrej Baranovskij

{sparrow_parse-0.4.1.dist-info → sparrow_parse-0.4.3.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-sparrow_parse/__init__.py,sha256=8yPI9dbwQUYqhMtA3RfAi5yJOhZBnz-g8966ssrYXiU,21
+sparrow_parse/__init__.py,sha256=udnlByVnFcZDwWir50pEbTU0bIwgBrpNtAiVExFEzu0,21
 sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
 sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sparrow_parse/extractors/vllm_extractor.py,sha256=QIg7AMCfw81YHQN6CutF2ipV_DZ3txSGduPIcvQRmiA,7439
+sparrow_parse/extractors/vllm_extractor.py,sha256=ybWpRpDH0YHoYpHkjIJtm7DQoHJBKNsirK2YIAlMvGo,7863
 sparrow_parse/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sparrow_parse/helpers/pdf_optimizer.py,sha256=GIqQYWtixFeZGCRFXL0lQfQByapCDuQzzRHAkzcPwLE,3302
 sparrow_parse/processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -11,9 +11,9 @@ sparrow_parse/vllm/huggingface_inference.py,sha256=EJnG6PesGKMc_0qGPN8ufE6pSnhAg
 sparrow_parse/vllm/inference_base.py,sha256=4mwGoAY63MB4cHZpV0czTkJWEzimmiTzqqzKmLNzgjw,820
 sparrow_parse/vllm/inference_factory.py,sha256=FTM65O-dW2WZchHOrNN7_Q3-FlVoAc65iSptuuUuClM,1166
 sparrow_parse/vllm/local_gpu_inference.py,sha256=aHoJTejb5xrXjWDIGu5RBQWEyRCOBCB04sMvO2Wyvg8,628
-sparrow_parse/vllm/mlx_inference.py,sha256=xR40qwjIR0HvrN8x58oOq6F4r1hEANRB-9kcokUQHHU,4748
-sparrow_parse-0.4.1.dist-info/METADATA,sha256=4rmJ1CURKtyTs-ZH1eyHn_VptHosJZwhQFB5Fssr5e0,6432
-sparrow_parse-0.4.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-sparrow_parse-0.4.1.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
-sparrow_parse-0.4.1.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
-sparrow_parse-0.4.1.dist-info/RECORD,,
+sparrow_parse/vllm/mlx_inference.py,sha256=cx-PLXf1t8ro50YALddj70FiR7s0gk_Ddp-I9XlPQQU,4788
+sparrow_parse-0.4.3.dist-info/METADATA,sha256=W7zeOHa09rgn-58aIdTkNOSqBLgpziDF7sZ_059jaoo,6432
+sparrow_parse-0.4.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+sparrow_parse-0.4.3.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
+sparrow_parse-0.4.3.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
+sparrow_parse-0.4.3.dist-info/RECORD,,

{sparrow_parse-0.4.1.dist-info → sparrow_parse-0.4.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{sparrow_parse-0.4.1.dist-info → sparrow_parse-0.4.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sparrow_parse-0.4.1.dist-info → sparrow_parse-0.4.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

sparrow-parse 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

sparrow-parse 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl