sparrow-parse 1.0.3__py3-none-any.whl → 1.0.4a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sparrow_parse/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = '1.0.3'
1
+ __version__ = '1.0.4a'
@@ -1,6 +1,6 @@
1
1
  from sparrow_parse.vllm.huggingface_inference import HuggingFaceInference
2
2
  from sparrow_parse.vllm.local_gpu_inference import LocalGPUInference
3
- from sparrow_parse.vllm.mlx_inference import MLXInference
3
+ # from sparrow_parse.vllm.mlx_inference import MLXInference
4
4
 
5
5
 
6
6
  class InferenceFactory:
@@ -14,7 +14,8 @@ class InferenceFactory:
14
14
  model = self._load_local_model() # Replace with actual model loading logic
15
15
  return LocalGPUInference(model=model, device=self.config.get("device", "cuda"))
16
16
  elif self.config["method"] == "mlx":
17
- return MLXInference(model_name=self.config["model_name"])
17
+ # return MLXInference(model_name=self.config["model_name"])
18
+ return None
18
19
  else:
19
20
  raise ValueError(f"Unknown method: {self.config['method']}")
20
21
 
@@ -1,216 +1,217 @@
1
- from mlx_vlm import load, generate
2
- from mlx_vlm.prompt_utils import apply_chat_template
3
- from mlx_vlm.utils import load_image
4
- from sparrow_parse.vllm.inference_base import ModelInference
5
- import os
6
- import json
7
- from rich import print
8
-
9
-
10
- class MLXInference(ModelInference):
11
- """
12
- A class for performing inference using the MLX model.
13
- Handles image preprocessing, response formatting, and model interaction.
14
- """
15
-
16
- def __init__(self, model_name):
17
- """
18
- Initialize the inference class with the given model name.
19
-
20
- :param model_name: Name of the model to load.
21
- """
22
- self.model_name = model_name
23
- print(f"MLXInference initialized for model: {model_name}")
24
-
25
-
26
- @staticmethod
27
- def _load_model_and_processor(model_name):
28
- """
29
- Load the model and processor for inference.
30
-
31
- :param model_name: Name of the model to load.
32
- :return: Tuple containing the loaded model and processor.
33
- """
34
- model, processor = load(model_name)
35
- print(f"Loaded model: {model_name}")
36
- return model, processor
37
-
38
-
39
- def process_response(self, output_text):
40
- """
41
- Process and clean the model's raw output to format as JSON.
42
- """
43
- try:
44
- # Check if we have markdown code block markers
45
- if "```" in output_text:
46
- # Handle markdown-formatted output
47
- json_start = output_text.find("```json")
48
- if json_start != -1:
49
- # Extract content between ```json and ```
50
- content = output_text[json_start + 7:]
51
- json_end = content.rfind("```")
52
- if json_end != -1:
53
- content = content[:json_end].strip()
54
- formatted_json = json.loads(content)
55
- return json.dumps(formatted_json, indent=2)
56
-
57
- # Handle raw JSON (no markdown formatting)
58
- # First try to find JSON array or object patterns
59
- for pattern in [r'\[\s*\{.*\}\s*\]', r'\{.*\}']:
60
- import re
61
- matches = re.search(pattern, output_text, re.DOTALL)
62
- if matches:
63
- potential_json = matches.group(0)
64
- try:
65
- formatted_json = json.loads(potential_json)
66
- return json.dumps(formatted_json, indent=2)
67
- except:
68
- pass
69
-
70
- # Last resort: try to parse the whole text as JSON
71
- formatted_json = json.loads(output_text.strip())
72
- return json.dumps(formatted_json, indent=2)
73
-
74
- except Exception as e:
75
- print(f"Failed to parse JSON: {e}")
76
- return output_text
77
-
78
-
79
- def load_image_data(self, image_filepath, max_width=1250, max_height=1750):
80
- """
81
- Load and resize image while maintaining its aspect ratio.
82
-
83
- :param image_filepath: Path to the image file.
84
- :param max_width: Maximum allowed width of the image.
85
- :param max_height: Maximum allowed height of the image.
86
- :return: Tuple containing the image object and its new dimensions.
87
- """
88
- image = load_image(image_filepath) # Assuming load_image is defined elsewhere
89
- width, height = image.size
90
-
91
- # Calculate new dimensions while maintaining the aspect ratio
92
- if width > max_width or height > max_height:
93
- aspect_ratio = width / height
94
- new_width = min(max_width, int(max_height * aspect_ratio))
95
- new_height = min(max_height, int(max_width / aspect_ratio))
96
- return image, new_width, new_height
97
-
98
- return image, width, height
99
-
100
-
101
- def inference(self, input_data, mode=None):
102
- """
103
- Perform inference on input data using the specified model.
104
-
105
- :param input_data: A list of dictionaries containing image file paths and text inputs.
106
- :param mode: Optional mode for inference ("static" for simple JSON output).
107
- :return: List of processed model responses.
108
- """
109
- # Handle static mode
110
- if mode == "static":
111
- return [self.get_simple_json()]
112
-
113
- # Load the model and processor
114
- model, processor = self._load_model_and_processor(self.model_name)
115
- config = model.config
116
-
117
- # Determine if we're doing text-only or image-based inference
118
- is_text_only = input_data[0].get("file_path") is None
119
-
120
- if is_text_only:
121
- # Text-only inference
122
- messages = input_data[0]["text_input"]
123
- response = self._generate_text_response(model, processor, config, messages)
124
- results = [self.process_response(response)]
125
- print("Agent inference completed successfully")
126
- else:
127
- # Image-based inference
128
- file_paths = self._extract_file_paths(input_data)
129
- results = self._process_images(model, processor, config, file_paths, input_data)
130
-
131
- return results
132
-
133
- def _generate_text_response(self, model, processor, config, messages):
134
- """
135
- Generate a text response for text-only inputs.
136
-
137
- :param model: The loaded model
138
- :param processor: The loaded processor
139
- :param config: Model configuration
140
- :param messages: Input messages
141
- :return: Generated response
142
- """
143
- prompt = apply_chat_template(processor, config, messages)
144
- return generate(
145
- model,
146
- processor,
147
- prompt,
148
- max_tokens=4000,
149
- temperature=0.0,
150
- verbose=False
151
- )
152
-
153
- def _process_images(self, model, processor, config, file_paths, input_data):
154
- """
155
- Process images and generate responses for each.
156
-
157
- :param model: The loaded model
158
- :param processor: The loaded processor
159
- :param config: Model configuration
160
- :param file_paths: List of image file paths
161
- :param input_data: Original input data
162
- :return: List of processed responses
163
- """
164
- results = []
165
- for file_path in file_paths:
166
- image, width, height = self.load_image_data(file_path)
167
-
168
- # Prepare messages based on model type
169
- messages = self._prepare_messages(input_data, file_path)
170
-
171
- # Generate and process response
172
- prompt = apply_chat_template(processor, config, messages)
173
- response = generate(
174
- model,
175
- processor,
176
- prompt,
177
- image,
178
- resize_shape=(width, height),
179
- max_tokens=4000,
180
- temperature=0.0,
181
- verbose=False
182
- )
183
- results.append(self.process_response(response))
184
- print(f"Inference completed successfully for: {file_path}")
185
-
186
- return results
187
-
188
- def _prepare_messages(self, input_data, file_path):
189
- """
190
- Prepare the appropriate messages based on the model type.
191
-
192
- :param input_data: Original input data
193
- :param file_path: Current file path being processed
194
- :return: Properly formatted messages
195
- """
196
- if "mistral" in self.model_name.lower():
197
- return input_data[0]["text_input"]
198
- else:
199
- return [
200
- {"role": "system", "content": "You are an expert at extracting structured text from image documents."},
201
- {"role": "user", "content": input_data[0]["text_input"]},
202
- ]
203
-
204
- @staticmethod
205
- def _extract_file_paths(input_data):
206
- """
207
- Extract and resolve absolute file paths from input data.
208
-
209
- :param input_data: List of dictionaries containing image file paths.
210
- :return: List of absolute file paths.
211
- """
212
- return [
213
- os.path.abspath(file_path)
214
- for data in input_data
215
- for file_path in data.get("file_path", [])
216
- ]
1
+ # from mlx_vlm import load, generate
2
+ # from mlx_vlm.prompt_utils import apply_chat_template
3
+ # from mlx_vlm.utils import load_image
4
+ # from sparrow_parse.vllm.inference_base import ModelInference
5
+ # import os
6
+ # import json
7
+ # from rich import print
8
+ #
9
+ #
10
+ # class MLXInference(ModelInference):
11
+ # """
12
+ # A class for performing inference using the MLX model.
13
+ # Handles image preprocessing, response formatting, and model interaction.
14
+ # """
15
+ #
16
+ # def __init__(self, model_name):
17
+ # """
18
+ # Initialize the inference class with the given model name.
19
+ #
20
+ # :param model_name: Name of the model to load.
21
+ # """
22
+ # self.model_name = model_name
23
+ # print(f"MLXInference initialized for model: {model_name}")
24
+ #
25
+ #
26
+ # @staticmethod
27
+ # def _load_model_and_processor(model_name):
28
+ # """
29
+ # Load the model and processor for inference.
30
+ #
31
+ # :param model_name: Name of the model to load.
32
+ # :return: Tuple containing the loaded model and processor.
33
+ # """
34
+ # model, processor = load(model_name)
35
+ # print(f"Loaded model: {model_name}")
36
+ # return model, processor
37
+ #
38
+ #
39
+ # def process_response(self, output_text):
40
+ # """
41
+ # Process and clean the model's raw output to format as JSON.
42
+ # """
43
+ # try:
44
+ # # Check if we have markdown code block markers
45
+ # if "```" in output_text:
46
+ # # Handle markdown-formatted output
47
+ # json_start = output_text.find("```json")
48
+ # if json_start != -1:
49
+ # # Extract content between ```json and ```
50
+ # content = output_text[json_start + 7:]
51
+ # json_end = content.rfind("```")
52
+ # if json_end != -1:
53
+ # content = content[:json_end].strip()
54
+ # formatted_json = json.loads(content)
55
+ # return json.dumps(formatted_json, indent=2)
56
+ #
57
+ # # Handle raw JSON (no markdown formatting)
58
+ # # First try to find JSON array or object patterns
59
+ # for pattern in [r'\[\s*\{.*\}\s*\]', r'\{.*\}']:
60
+ # import re
61
+ # matches = re.search(pattern, output_text, re.DOTALL)
62
+ # if matches:
63
+ # potential_json = matches.group(0)
64
+ # try:
65
+ # formatted_json = json.loads(potential_json)
66
+ # return json.dumps(formatted_json, indent=2)
67
+ # except:
68
+ # pass
69
+ #
70
+ # # Last resort: try to parse the whole text as JSON
71
+ # formatted_json = json.loads(output_text.strip())
72
+ # return json.dumps(formatted_json, indent=2)
73
+ #
74
+ # except Exception as e:
75
+ # print(f"Failed to parse JSON: {e}")
76
+ # return output_text
77
+ #
78
+ #
79
+ # def load_image_data(self, image_filepath, max_width=1250, max_height=1750):
80
+ # """
81
+ # Load and resize image while maintaining its aspect ratio.
82
+ #
83
+ # :param image_filepath: Path to the image file.
84
+ # :param max_width: Maximum allowed width of the image.
85
+ # :param max_height: Maximum allowed height of the image.
86
+ # :return: Tuple containing the image object and its new dimensions.
87
+ # """
88
+ # image = load_image(image_filepath) # Assuming load_image is defined elsewhere
89
+ # width, height = image.size
90
+ #
91
+ # # Calculate new dimensions while maintaining the aspect ratio
92
+ # if width > max_width or height > max_height:
93
+ # aspect_ratio = width / height
94
+ # new_width = min(max_width, int(max_height * aspect_ratio))
95
+ # new_height = min(max_height, int(max_width / aspect_ratio))
96
+ # return image, new_width, new_height
97
+ #
98
+ # return image, width, height
99
+ #
100
+ #
101
+ # def inference(self, input_data, mode=None):
102
+ # """
103
+ # Perform inference on input data using the specified model.
104
+ #
105
+ # :param input_data: A list of dictionaries containing image file paths and text inputs.
106
+ # :param mode: Optional mode for inference ("static" for simple JSON output).
107
+ # :return: List of processed model responses.
108
+ # """
109
+ # # Handle static mode
110
+ # if mode == "static":
111
+ # return [self.get_simple_json()]
112
+ #
113
+ # # Load the model and processor
114
+ # model, processor = self._load_model_and_processor(self.model_name)
115
+ # config = model.config
116
+ #
117
+ # # Determine if we're doing text-only or image-based inference
118
+ # is_text_only = input_data[0].get("file_path") is None
119
+ #
120
+ # if is_text_only:
121
+ # # Text-only inference
122
+ # messages = input_data[0]["text_input"]
123
+ # response = self._generate_text_response(model, processor, config, messages)
124
+ # results = [response]
125
+ # else:
126
+ # # Image-based inference
127
+ # file_paths = self._extract_file_paths(input_data)
128
+ # results = self._process_images(model, processor, config, file_paths, input_data)
129
+ #
130
+ # return results
131
+ #
132
+ # def _generate_text_response(self, model, processor, config, messages):
133
+ # """
134
+ # Generate a text response for text-only inputs.
135
+ #
136
+ # :param model: The loaded model
137
+ # :param processor: The loaded processor
138
+ # :param config: Model configuration
139
+ # :param messages: Input messages
140
+ # :return: Generated response
141
+ # """
142
+ # prompt = apply_chat_template(processor, config, messages)
143
+ # response = generate(
144
+ # model,
145
+ # processor,
146
+ # prompt,
147
+ # max_tokens=4000,
148
+ # temperature=0.0,
149
+ # verbose=False
150
+ # )
151
+ # print("Inference completed successfully")
152
+ # return response
153
+ #
154
+ # def _process_images(self, model, processor, config, file_paths, input_data):
155
+ # """
156
+ # Process images and generate responses for each.
157
+ #
158
+ # :param model: The loaded model
159
+ # :param processor: The loaded processor
160
+ # :param config: Model configuration
161
+ # :param file_paths: List of image file paths
162
+ # :param input_data: Original input data
163
+ # :return: List of processed responses
164
+ # """
165
+ # results = []
166
+ # for file_path in file_paths:
167
+ # image, width, height = self.load_image_data(file_path)
168
+ #
169
+ # # Prepare messages based on model type
170
+ # messages = self._prepare_messages(input_data, file_path)
171
+ #
172
+ # # Generate and process response
173
+ # prompt = apply_chat_template(processor, config, messages)
174
+ # response = generate(
175
+ # model,
176
+ # processor,
177
+ # prompt,
178
+ # image,
179
+ # resize_shape=(width, height),
180
+ # max_tokens=4000,
181
+ # temperature=0.0,
182
+ # verbose=False
183
+ # )
184
+ # results.append(self.process_response(response))
185
+ # print(f"Inference completed successfully for: {file_path}")
186
+ #
187
+ # return results
188
+ #
189
+ # def _prepare_messages(self, input_data, file_path):
190
+ # """
191
+ # Prepare the appropriate messages based on the model type.
192
+ #
193
+ # :param input_data: Original input data
194
+ # :param file_path: Current file path being processed
195
+ # :return: Properly formatted messages
196
+ # """
197
+ # if "mistral" in self.model_name.lower():
198
+ # return input_data[0]["text_input"]
199
+ # else:
200
+ # return [
201
+ # {"role": "system", "content": "You are an expert at extracting structured text from image documents."},
202
+ # {"role": "user", "content": input_data[0]["text_input"]},
203
+ # ]
204
+ #
205
+ # @staticmethod
206
+ # def _extract_file_paths(input_data):
207
+ # """
208
+ # Extract and resolve absolute file paths from input data.
209
+ #
210
+ # :param input_data: List of dictionaries containing image file paths.
211
+ # :return: List of absolute file paths.
212
+ # """
213
+ # return [
214
+ # os.path.abspath(file_path)
215
+ # for data in input_data
216
+ # for file_path in data.get("file_path", [])
217
+ # ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 1.0.3
3
+ Version: 1.0.4a0
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -16,15 +16,13 @@ Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  Requires-Dist: rich
18
18
  Requires-Dist: transformers >=4.51.3
19
- Requires-Dist: torchvision >=0.21.0
20
- Requires-Dist: torch >=2.6.0
19
+ Requires-Dist: torchvision >=0.22.0
20
+ Requires-Dist: torch >=2.7.0
21
21
  Requires-Dist: sentence-transformers >=4.1.0
22
- Requires-Dist: numpy >=2.2.4
22
+ Requires-Dist: numpy >=2.2.5
23
23
  Requires-Dist: pypdf >=5.4.0
24
24
  Requires-Dist: gradio-client >=1.7.2
25
25
  Requires-Dist: pdf2image >=1.17.0
26
- Requires-Dist: mlx >=0.25.0 ; sys_platform == "darwin" and platform_machine == "arm64"
27
- Requires-Dist: mlx-vlm ==0.1.23 ; sys_platform == "darwin" and platform_machine == "arm64"
28
26
 
29
27
  # Sparrow Parse
30
28
 
@@ -1,4 +1,4 @@
1
- sparrow_parse/__init__.py,sha256=MpVHFFoITiYyPltTb_qFrdeX2entdTm4x0PczXi3txY,21
1
+ sparrow_parse/__init__.py,sha256=uaGkUYEjwal6HsB_xcaWl4f22MLGxgYjrQfnOg_f2FE,22
2
2
  sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
3
3
  sparrow_parse/text_extraction.py,sha256=lirPpvz8tnwCMGmoHPK94-vCviybuRyQM-mpvhtp3uY,1124
4
4
  sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -11,11 +11,11 @@ sparrow_parse/processors/table_structure_processor.py,sha256=BCYnrsqngEu0WpBORce
11
11
  sparrow_parse/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  sparrow_parse/vllm/huggingface_inference.py,sha256=EJnG6PesGKMc_0qGPN8ufE6pSnhAgFu0XjCbaLCNVyM,1980
13
13
  sparrow_parse/vllm/inference_base.py,sha256=4mwGoAY63MB4cHZpV0czTkJWEzimmiTzqqzKmLNzgjw,820
14
- sparrow_parse/vllm/inference_factory.py,sha256=FTM65O-dW2WZchHOrNN7_Q3-FlVoAc65iSptuuUuClM,1166
14
+ sparrow_parse/vllm/inference_factory.py,sha256=Qd8233Xj9321ZhPEBW0bPpk4pfkIOcYnqoyyNcRCByI,1194
15
15
  sparrow_parse/vllm/local_gpu_inference.py,sha256=aHoJTejb5xrXjWDIGu5RBQWEyRCOBCB04sMvO2Wyvg8,628
16
- sparrow_parse/vllm/mlx_inference.py,sha256=vqIkfTd5rP8bnZ8K_CGVEWe_G3E4i3rwN9MfLBDiE3c,8000
17
- sparrow_parse-1.0.3.dist-info/METADATA,sha256=dIGBhBhtR5rSKj4RbT1PhyrWxKUVUq5AxbJ33FsKNlE,7229
18
- sparrow_parse-1.0.3.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
19
- sparrow_parse-1.0.3.dist-info/entry_points.txt,sha256=HV5nnQVtr2m-kn6hzY_ynp0zugNCcGovbmnfmQgOyhw,53
20
- sparrow_parse-1.0.3.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
21
- sparrow_parse-1.0.3.dist-info/RECORD,,
16
+ sparrow_parse/vllm/mlx_inference.py,sha256=sLSt0qN--RuJAApWX2HgYfX0ZDiZqZbgI7LRxioy73s,8315
17
+ sparrow_parse-1.0.4a0.dist-info/METADATA,sha256=2Jtk-kXCCG_RjTxtq3CPGI-K-nHDZrHsuAxtcBA_nlc,7053
18
+ sparrow_parse-1.0.4a0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
19
+ sparrow_parse-1.0.4a0.dist-info/entry_points.txt,sha256=HV5nnQVtr2m-kn6hzY_ynp0zugNCcGovbmnfmQgOyhw,53
20
+ sparrow_parse-1.0.4a0.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
21
+ sparrow_parse-1.0.4a0.dist-info/RECORD,,