PyPI - sparrow-parse - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

sparrow-parse 0.3.6py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sparrow_parse/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '0.3.6'
1	+ __version__ = '0.3.7'

sparrow_parse/extractors/vllm_extractor.py CHANGED Viewed

@@ -30,21 +30,16 @@ class VLLMExtractor(object):
                                                                                  debug_dir,
                                                                                  True)
-            # Run inference on each page
-            for page_num, output_file in enumerate(output_files):
-                input_data[0]["file_path"] = output_file
-                if debug:
-                    print(f"Running inference on page {page_num + 1}...")
+            input_data[0]["file_path"] = output_files
-                # Run inference on the page
-                result = model_inference_instance.inference(input_data, mode)
-                results_array.append(result)
+            # Run inference on the page
+            results_array = model_inference_instance.inference(input_data, mode)
             shutil.rmtree(temp_dir, ignore_errors=True)
             return results_array, num_pages
-        result = model_inference_instance.inference(input_data)
-        results_array.append(result)
+        input_data[0]["file_path"] = [input_data[0]["file_path"]]
+        results_array = model_inference_instance.inference(input_data)
         return results_array, 1
@@ -80,7 +75,7 @@ if __name__ == "__main__":
     # results_array, num_pages = extractor.run_inference(model_inference_instance, input_data, generic_query=False,
     #                                  debug_dir="/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/",
     #                                  debug=True,
-    #                                  mode="static")
+    #                                  mode=None)
     #
     # for i, result in enumerate(results_array):
     #     print(f"Result for page {i + 1}:", result)

sparrow_parse/vllm/huggingface_inference.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from gradio_client import Client, handle_file
 from sparrow_parse.vllm.inference_base import ModelInference
 import json
+import os
+import ast
 class HuggingFaceInference(ModelInference):
@@ -27,14 +29,32 @@ class HuggingFaceInference(ModelInference):
     def inference(self, input_data, mode=None):
         if mode == "static":
             simple_json = self.get_simple_json()
-            return simple_json
+            return [simple_json]
         client = Client(self.hf_space, hf_token=self.hf_token)
-        result = client.predict(
-            image=handle_file(input_data[0]["file_path"]),
-            text_input=input_data[0]["text_input"],
-            api_name="/run_inference"
+        # Extract and prepare the absolute paths for all file paths in input_data
+        file_paths = [
+            os.path.abspath(file_path)
+            for data in input_data
+            for file_path in data["file_path"]
+        ]
+        # Validate file existence and prepare files for the Gradio client
+        image_files = [handle_file(path) for path in file_paths if os.path.exists(path)]
+        results = client.predict(
+            input_imgs=image_files,
+            text_input=input_data[0]["text_input"],  # Single shared text input for all images
+            api_name="/run_inference"  # Specify the Gradio API endpoint
         )
-        return self.process_response(result)
+        # Convert the string into a Python list
+        parsed_results = ast.literal_eval(results)
+        results_array = []
+        for page_output in parsed_results:
+            page_result = self.process_response(page_output)
+            results_array.append(page_result)
+        return results_array

{sparrow_parse-0.3.6.dist-info → sparrow_parse-0.3.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sparrow-parse
-Version: 0.3.6
+Version: 0.3.7
 Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
 Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
 Author: Andrej Baranovskij
@@ -73,7 +73,7 @@ input_data = [
 results_array, num_pages = extractor.run_inference(model_inference_instance, input_data, generic_query=False,
                                  debug_dir="/data/",
                                  debug=True,
-                                 mode="static")
+                                 mode=None)
 for i, result in enumerate(results_array):
     print(f"Result for page {i + 1}:", result)
@@ -82,6 +82,10 @@ print(f"Number of pages: {num_pages}")
 Use `mode="static"` if you want to simulate LLM call, without executing LLM backend.
+Method `run_inference` will return results and number of pages processed.
+Note: GPU backend `katanaml/sparrow-qwen2-vl-7b` is private, to be able to run below command, you need to create your own backend on Hugging Face space using [code](https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse/sparrow_parse/vllm/infra/qwen2_vl_7b) from Sparrow Parse.
 ## PDF pre-processing
 ```

{sparrow_parse-0.3.6.dist-info → sparrow_parse-0.3.7.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
-sparrow_parse/__init__.py,sha256=IbpUPwvtjLOqowcOFsWQ6LKq-FH6cI19IpvfQlxufq0,21
+sparrow_parse/__init__.py,sha256=V3RDzgFfGW_qKkRklGT6eISHLybQsgfScnd9neXG7Cs,21
 sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
 sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sparrow_parse/extractors/vllm_extractor.py,sha256=Wo8sOvsQt6YHd7bvB_DB8MUa71FioO9xcQOWA3PQ6eU,3415
+sparrow_parse/extractors/vllm_extractor.py,sha256=mBPgeyMuHUa6jN_OZLVE-426tD4zYnFT61oxebk7XJc,3191
 sparrow_parse/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sparrow_parse/helpers/pdf_optimizer.py,sha256=GIqQYWtixFeZGCRFXL0lQfQByapCDuQzzRHAkzcPwLE,3302
 sparrow_parse/processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sparrow_parse/processors/table_structure_processor.py,sha256=bG_6jx66n_KNdY_O6hrZD1D4DHX5Qy__RYcKHmrSGnc,23894
 sparrow_parse/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sparrow_parse/vllm/huggingface_inference.py,sha256=nalmPJFfrFlRnfd4yTq4HvIwDvIXjhKUlEyZ6gzMqe0,1239
+sparrow_parse/vllm/huggingface_inference.py,sha256=EJnG6PesGKMc_0qGPN8ufE6pSnhAgFu0XjCbaLCNVyM,1980
 sparrow_parse/vllm/inference_base.py,sha256=4mwGoAY63MB4cHZpV0czTkJWEzimmiTzqqzKmLNzgjw,820
 sparrow_parse/vllm/inference_factory.py,sha256=r04e95uPWG5l8Q23yeDqKmvFxLyF991aA2m0hfBTNn8,993
 sparrow_parse/vllm/local_gpu_inference.py,sha256=aHoJTejb5xrXjWDIGu5RBQWEyRCOBCB04sMvO2Wyvg8,628
-sparrow_parse-0.3.6.dist-info/METADATA,sha256=ANS8eWCx07bQOOFFnJUKwsiPo-ZT42b8DvMwP9o-jf4,5827
-sparrow_parse-0.3.6.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-sparrow_parse-0.3.6.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
-sparrow_parse-0.3.6.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
-sparrow_parse-0.3.6.dist-info/RECORD,,
+sparrow_parse-0.3.7.dist-info/METADATA,sha256=ErE4fDTkcyOrVbgpc6x9AO9cU3Gf8HbEGsbKmK-F0RA,6187
+sparrow_parse-0.3.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+sparrow_parse-0.3.7.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
+sparrow_parse-0.3.7.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
+sparrow_parse-0.3.7.dist-info/RECORD,,

{sparrow_parse-0.3.6.dist-info → sparrow_parse-0.3.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{sparrow_parse-0.3.6.dist-info → sparrow_parse-0.3.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sparrow_parse-0.3.6.dist-info → sparrow_parse-0.3.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

sparrow-parse 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

sparrow-parse 0.3.6py3-none-any.whl → 0.3.7py3-none-any.whl