sparrow-parse 1.0.3__tar.gz → 1.0.4a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/PKG-INFO +4 -6
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/setup.py +1 -1
- sparrow-parse-1.0.4a0/sparrow_parse/__init__.py +1 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/vllm/inference_factory.py +3 -2
- sparrow-parse-1.0.4a0/sparrow_parse/vllm/mlx_inference.py +217 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse.egg-info/PKG-INFO +4 -6
- sparrow-parse-1.0.4a0/sparrow_parse.egg-info/requires.txt +9 -0
- sparrow-parse-1.0.3/sparrow_parse/__init__.py +0 -1
- sparrow-parse-1.0.3/sparrow_parse/vllm/mlx_inference.py +0 -216
- sparrow-parse-1.0.3/sparrow_parse.egg-info/requires.txt +0 -13
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/README.md +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/setup.cfg +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/__main__.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/extractors/__init__.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/extractors/vllm_extractor.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/helpers/__init__.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/helpers/image_optimizer.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/helpers/pdf_optimizer.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/processors/__init__.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/processors/table_structure_processor.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/text_extraction.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/vllm/__init__.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/vllm/huggingface_inference.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/vllm/inference_base.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/vllm/local_gpu_inference.py +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse.egg-info/SOURCES.txt +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse.egg-info/dependency_links.txt +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse.egg-info/entry_points.txt +0 -0
- {sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sparrow-parse
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.4a0
|
4
4
|
Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
|
5
5
|
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
6
|
Author: Andrej Baranovskij
|
@@ -16,15 +16,13 @@ Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
17
17
|
Requires-Dist: rich
|
18
18
|
Requires-Dist: transformers>=4.51.3
|
19
|
-
Requires-Dist: torchvision>=0.
|
20
|
-
Requires-Dist: torch>=2.
|
19
|
+
Requires-Dist: torchvision>=0.22.0
|
20
|
+
Requires-Dist: torch>=2.7.0
|
21
21
|
Requires-Dist: sentence-transformers>=4.1.0
|
22
|
-
Requires-Dist: numpy>=2.2.
|
22
|
+
Requires-Dist: numpy>=2.2.5
|
23
23
|
Requires-Dist: pypdf>=5.4.0
|
24
24
|
Requires-Dist: gradio_client>=1.7.2
|
25
25
|
Requires-Dist: pdf2image>=1.17.0
|
26
|
-
Requires-Dist: mlx>=0.25.0; sys_platform == "darwin" and platform_machine == "arm64"
|
27
|
-
Requires-Dist: mlx-vlm==0.1.23; sys_platform == "darwin" and platform_machine == "arm64"
|
28
26
|
|
29
27
|
# Sparrow Parse
|
30
28
|
|
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
|
|
8
8
|
|
9
9
|
setup(
|
10
10
|
name="sparrow-parse",
|
11
|
-
version="1.0.
|
11
|
+
version="1.0.4a",
|
12
12
|
author="Andrej Baranovskij",
|
13
13
|
author_email="andrejus.baranovskis@gmail.com",
|
14
14
|
description="Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.",
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = '1.0.4a'
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from sparrow_parse.vllm.huggingface_inference import HuggingFaceInference
|
2
2
|
from sparrow_parse.vllm.local_gpu_inference import LocalGPUInference
|
3
|
-
from sparrow_parse.vllm.mlx_inference import MLXInference
|
3
|
+
# from sparrow_parse.vllm.mlx_inference import MLXInference
|
4
4
|
|
5
5
|
|
6
6
|
class InferenceFactory:
|
@@ -14,7 +14,8 @@ class InferenceFactory:
|
|
14
14
|
model = self._load_local_model() # Replace with actual model loading logic
|
15
15
|
return LocalGPUInference(model=model, device=self.config.get("device", "cuda"))
|
16
16
|
elif self.config["method"] == "mlx":
|
17
|
-
return MLXInference(model_name=self.config["model_name"])
|
17
|
+
# return MLXInference(model_name=self.config["model_name"])
|
18
|
+
return None
|
18
19
|
else:
|
19
20
|
raise ValueError(f"Unknown method: {self.config['method']}")
|
20
21
|
|
@@ -0,0 +1,217 @@
|
|
1
|
+
# from mlx_vlm import load, generate
|
2
|
+
# from mlx_vlm.prompt_utils import apply_chat_template
|
3
|
+
# from mlx_vlm.utils import load_image
|
4
|
+
# from sparrow_parse.vllm.inference_base import ModelInference
|
5
|
+
# import os
|
6
|
+
# import json
|
7
|
+
# from rich import print
|
8
|
+
#
|
9
|
+
#
|
10
|
+
# class MLXInference(ModelInference):
|
11
|
+
# """
|
12
|
+
# A class for performing inference using the MLX model.
|
13
|
+
# Handles image preprocessing, response formatting, and model interaction.
|
14
|
+
# """
|
15
|
+
#
|
16
|
+
# def __init__(self, model_name):
|
17
|
+
# """
|
18
|
+
# Initialize the inference class with the given model name.
|
19
|
+
#
|
20
|
+
# :param model_name: Name of the model to load.
|
21
|
+
# """
|
22
|
+
# self.model_name = model_name
|
23
|
+
# print(f"MLXInference initialized for model: {model_name}")
|
24
|
+
#
|
25
|
+
#
|
26
|
+
# @staticmethod
|
27
|
+
# def _load_model_and_processor(model_name):
|
28
|
+
# """
|
29
|
+
# Load the model and processor for inference.
|
30
|
+
#
|
31
|
+
# :param model_name: Name of the model to load.
|
32
|
+
# :return: Tuple containing the loaded model and processor.
|
33
|
+
# """
|
34
|
+
# model, processor = load(model_name)
|
35
|
+
# print(f"Loaded model: {model_name}")
|
36
|
+
# return model, processor
|
37
|
+
#
|
38
|
+
#
|
39
|
+
# def process_response(self, output_text):
|
40
|
+
# """
|
41
|
+
# Process and clean the model's raw output to format as JSON.
|
42
|
+
# """
|
43
|
+
# try:
|
44
|
+
# # Check if we have markdown code block markers
|
45
|
+
# if "```" in output_text:
|
46
|
+
# # Handle markdown-formatted output
|
47
|
+
# json_start = output_text.find("```json")
|
48
|
+
# if json_start != -1:
|
49
|
+
# # Extract content between ```json and ```
|
50
|
+
# content = output_text[json_start + 7:]
|
51
|
+
# json_end = content.rfind("```")
|
52
|
+
# if json_end != -1:
|
53
|
+
# content = content[:json_end].strip()
|
54
|
+
# formatted_json = json.loads(content)
|
55
|
+
# return json.dumps(formatted_json, indent=2)
|
56
|
+
#
|
57
|
+
# # Handle raw JSON (no markdown formatting)
|
58
|
+
# # First try to find JSON array or object patterns
|
59
|
+
# for pattern in [r'\[\s*\{.*\}\s*\]', r'\{.*\}']:
|
60
|
+
# import re
|
61
|
+
# matches = re.search(pattern, output_text, re.DOTALL)
|
62
|
+
# if matches:
|
63
|
+
# potential_json = matches.group(0)
|
64
|
+
# try:
|
65
|
+
# formatted_json = json.loads(potential_json)
|
66
|
+
# return json.dumps(formatted_json, indent=2)
|
67
|
+
# except:
|
68
|
+
# pass
|
69
|
+
#
|
70
|
+
# # Last resort: try to parse the whole text as JSON
|
71
|
+
# formatted_json = json.loads(output_text.strip())
|
72
|
+
# return json.dumps(formatted_json, indent=2)
|
73
|
+
#
|
74
|
+
# except Exception as e:
|
75
|
+
# print(f"Failed to parse JSON: {e}")
|
76
|
+
# return output_text
|
77
|
+
#
|
78
|
+
#
|
79
|
+
# def load_image_data(self, image_filepath, max_width=1250, max_height=1750):
|
80
|
+
# """
|
81
|
+
# Load and resize image while maintaining its aspect ratio.
|
82
|
+
#
|
83
|
+
# :param image_filepath: Path to the image file.
|
84
|
+
# :param max_width: Maximum allowed width of the image.
|
85
|
+
# :param max_height: Maximum allowed height of the image.
|
86
|
+
# :return: Tuple containing the image object and its new dimensions.
|
87
|
+
# """
|
88
|
+
# image = load_image(image_filepath) # Assuming load_image is defined elsewhere
|
89
|
+
# width, height = image.size
|
90
|
+
#
|
91
|
+
# # Calculate new dimensions while maintaining the aspect ratio
|
92
|
+
# if width > max_width or height > max_height:
|
93
|
+
# aspect_ratio = width / height
|
94
|
+
# new_width = min(max_width, int(max_height * aspect_ratio))
|
95
|
+
# new_height = min(max_height, int(max_width / aspect_ratio))
|
96
|
+
# return image, new_width, new_height
|
97
|
+
#
|
98
|
+
# return image, width, height
|
99
|
+
#
|
100
|
+
#
|
101
|
+
# def inference(self, input_data, mode=None):
|
102
|
+
# """
|
103
|
+
# Perform inference on input data using the specified model.
|
104
|
+
#
|
105
|
+
# :param input_data: A list of dictionaries containing image file paths and text inputs.
|
106
|
+
# :param mode: Optional mode for inference ("static" for simple JSON output).
|
107
|
+
# :return: List of processed model responses.
|
108
|
+
# """
|
109
|
+
# # Handle static mode
|
110
|
+
# if mode == "static":
|
111
|
+
# return [self.get_simple_json()]
|
112
|
+
#
|
113
|
+
# # Load the model and processor
|
114
|
+
# model, processor = self._load_model_and_processor(self.model_name)
|
115
|
+
# config = model.config
|
116
|
+
#
|
117
|
+
# # Determine if we're doing text-only or image-based inference
|
118
|
+
# is_text_only = input_data[0].get("file_path") is None
|
119
|
+
#
|
120
|
+
# if is_text_only:
|
121
|
+
# # Text-only inference
|
122
|
+
# messages = input_data[0]["text_input"]
|
123
|
+
# response = self._generate_text_response(model, processor, config, messages)
|
124
|
+
# results = [response]
|
125
|
+
# else:
|
126
|
+
# # Image-based inference
|
127
|
+
# file_paths = self._extract_file_paths(input_data)
|
128
|
+
# results = self._process_images(model, processor, config, file_paths, input_data)
|
129
|
+
#
|
130
|
+
# return results
|
131
|
+
#
|
132
|
+
# def _generate_text_response(self, model, processor, config, messages):
|
133
|
+
# """
|
134
|
+
# Generate a text response for text-only inputs.
|
135
|
+
#
|
136
|
+
# :param model: The loaded model
|
137
|
+
# :param processor: The loaded processor
|
138
|
+
# :param config: Model configuration
|
139
|
+
# :param messages: Input messages
|
140
|
+
# :return: Generated response
|
141
|
+
# """
|
142
|
+
# prompt = apply_chat_template(processor, config, messages)
|
143
|
+
# response = generate(
|
144
|
+
# model,
|
145
|
+
# processor,
|
146
|
+
# prompt,
|
147
|
+
# max_tokens=4000,
|
148
|
+
# temperature=0.0,
|
149
|
+
# verbose=False
|
150
|
+
# )
|
151
|
+
# print("Inference completed successfully")
|
152
|
+
# return response
|
153
|
+
#
|
154
|
+
# def _process_images(self, model, processor, config, file_paths, input_data):
|
155
|
+
# """
|
156
|
+
# Process images and generate responses for each.
|
157
|
+
#
|
158
|
+
# :param model: The loaded model
|
159
|
+
# :param processor: The loaded processor
|
160
|
+
# :param config: Model configuration
|
161
|
+
# :param file_paths: List of image file paths
|
162
|
+
# :param input_data: Original input data
|
163
|
+
# :return: List of processed responses
|
164
|
+
# """
|
165
|
+
# results = []
|
166
|
+
# for file_path in file_paths:
|
167
|
+
# image, width, height = self.load_image_data(file_path)
|
168
|
+
#
|
169
|
+
# # Prepare messages based on model type
|
170
|
+
# messages = self._prepare_messages(input_data, file_path)
|
171
|
+
#
|
172
|
+
# # Generate and process response
|
173
|
+
# prompt = apply_chat_template(processor, config, messages)
|
174
|
+
# response = generate(
|
175
|
+
# model,
|
176
|
+
# processor,
|
177
|
+
# prompt,
|
178
|
+
# image,
|
179
|
+
# resize_shape=(width, height),
|
180
|
+
# max_tokens=4000,
|
181
|
+
# temperature=0.0,
|
182
|
+
# verbose=False
|
183
|
+
# )
|
184
|
+
# results.append(self.process_response(response))
|
185
|
+
# print(f"Inference completed successfully for: {file_path}")
|
186
|
+
#
|
187
|
+
# return results
|
188
|
+
#
|
189
|
+
# def _prepare_messages(self, input_data, file_path):
|
190
|
+
# """
|
191
|
+
# Prepare the appropriate messages based on the model type.
|
192
|
+
#
|
193
|
+
# :param input_data: Original input data
|
194
|
+
# :param file_path: Current file path being processed
|
195
|
+
# :return: Properly formatted messages
|
196
|
+
# """
|
197
|
+
# if "mistral" in self.model_name.lower():
|
198
|
+
# return input_data[0]["text_input"]
|
199
|
+
# else:
|
200
|
+
# return [
|
201
|
+
# {"role": "system", "content": "You are an expert at extracting structured text from image documents."},
|
202
|
+
# {"role": "user", "content": input_data[0]["text_input"]},
|
203
|
+
# ]
|
204
|
+
#
|
205
|
+
# @staticmethod
|
206
|
+
# def _extract_file_paths(input_data):
|
207
|
+
# """
|
208
|
+
# Extract and resolve absolute file paths from input data.
|
209
|
+
#
|
210
|
+
# :param input_data: List of dictionaries containing image file paths.
|
211
|
+
# :return: List of absolute file paths.
|
212
|
+
# """
|
213
|
+
# return [
|
214
|
+
# os.path.abspath(file_path)
|
215
|
+
# for data in input_data
|
216
|
+
# for file_path in data.get("file_path", [])
|
217
|
+
# ]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sparrow-parse
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.4a0
|
4
4
|
Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
|
5
5
|
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
6
|
Author: Andrej Baranovskij
|
@@ -16,15 +16,13 @@ Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
17
17
|
Requires-Dist: rich
|
18
18
|
Requires-Dist: transformers>=4.51.3
|
19
|
-
Requires-Dist: torchvision>=0.
|
20
|
-
Requires-Dist: torch>=2.
|
19
|
+
Requires-Dist: torchvision>=0.22.0
|
20
|
+
Requires-Dist: torch>=2.7.0
|
21
21
|
Requires-Dist: sentence-transformers>=4.1.0
|
22
|
-
Requires-Dist: numpy>=2.2.
|
22
|
+
Requires-Dist: numpy>=2.2.5
|
23
23
|
Requires-Dist: pypdf>=5.4.0
|
24
24
|
Requires-Dist: gradio_client>=1.7.2
|
25
25
|
Requires-Dist: pdf2image>=1.17.0
|
26
|
-
Requires-Dist: mlx>=0.25.0; sys_platform == "darwin" and platform_machine == "arm64"
|
27
|
-
Requires-Dist: mlx-vlm==0.1.23; sys_platform == "darwin" and platform_machine == "arm64"
|
28
26
|
|
29
27
|
# Sparrow Parse
|
30
28
|
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = '1.0.3'
|
@@ -1,216 +0,0 @@
|
|
1
|
-
from mlx_vlm import load, generate
|
2
|
-
from mlx_vlm.prompt_utils import apply_chat_template
|
3
|
-
from mlx_vlm.utils import load_image
|
4
|
-
from sparrow_parse.vllm.inference_base import ModelInference
|
5
|
-
import os
|
6
|
-
import json
|
7
|
-
from rich import print
|
8
|
-
|
9
|
-
|
10
|
-
class MLXInference(ModelInference):
|
11
|
-
"""
|
12
|
-
A class for performing inference using the MLX model.
|
13
|
-
Handles image preprocessing, response formatting, and model interaction.
|
14
|
-
"""
|
15
|
-
|
16
|
-
def __init__(self, model_name):
|
17
|
-
"""
|
18
|
-
Initialize the inference class with the given model name.
|
19
|
-
|
20
|
-
:param model_name: Name of the model to load.
|
21
|
-
"""
|
22
|
-
self.model_name = model_name
|
23
|
-
print(f"MLXInference initialized for model: {model_name}")
|
24
|
-
|
25
|
-
|
26
|
-
@staticmethod
|
27
|
-
def _load_model_and_processor(model_name):
|
28
|
-
"""
|
29
|
-
Load the model and processor for inference.
|
30
|
-
|
31
|
-
:param model_name: Name of the model to load.
|
32
|
-
:return: Tuple containing the loaded model and processor.
|
33
|
-
"""
|
34
|
-
model, processor = load(model_name)
|
35
|
-
print(f"Loaded model: {model_name}")
|
36
|
-
return model, processor
|
37
|
-
|
38
|
-
|
39
|
-
def process_response(self, output_text):
|
40
|
-
"""
|
41
|
-
Process and clean the model's raw output to format as JSON.
|
42
|
-
"""
|
43
|
-
try:
|
44
|
-
# Check if we have markdown code block markers
|
45
|
-
if "```" in output_text:
|
46
|
-
# Handle markdown-formatted output
|
47
|
-
json_start = output_text.find("```json")
|
48
|
-
if json_start != -1:
|
49
|
-
# Extract content between ```json and ```
|
50
|
-
content = output_text[json_start + 7:]
|
51
|
-
json_end = content.rfind("```")
|
52
|
-
if json_end != -1:
|
53
|
-
content = content[:json_end].strip()
|
54
|
-
formatted_json = json.loads(content)
|
55
|
-
return json.dumps(formatted_json, indent=2)
|
56
|
-
|
57
|
-
# Handle raw JSON (no markdown formatting)
|
58
|
-
# First try to find JSON array or object patterns
|
59
|
-
for pattern in [r'\[\s*\{.*\}\s*\]', r'\{.*\}']:
|
60
|
-
import re
|
61
|
-
matches = re.search(pattern, output_text, re.DOTALL)
|
62
|
-
if matches:
|
63
|
-
potential_json = matches.group(0)
|
64
|
-
try:
|
65
|
-
formatted_json = json.loads(potential_json)
|
66
|
-
return json.dumps(formatted_json, indent=2)
|
67
|
-
except:
|
68
|
-
pass
|
69
|
-
|
70
|
-
# Last resort: try to parse the whole text as JSON
|
71
|
-
formatted_json = json.loads(output_text.strip())
|
72
|
-
return json.dumps(formatted_json, indent=2)
|
73
|
-
|
74
|
-
except Exception as e:
|
75
|
-
print(f"Failed to parse JSON: {e}")
|
76
|
-
return output_text
|
77
|
-
|
78
|
-
|
79
|
-
def load_image_data(self, image_filepath, max_width=1250, max_height=1750):
|
80
|
-
"""
|
81
|
-
Load and resize image while maintaining its aspect ratio.
|
82
|
-
|
83
|
-
:param image_filepath: Path to the image file.
|
84
|
-
:param max_width: Maximum allowed width of the image.
|
85
|
-
:param max_height: Maximum allowed height of the image.
|
86
|
-
:return: Tuple containing the image object and its new dimensions.
|
87
|
-
"""
|
88
|
-
image = load_image(image_filepath) # Assuming load_image is defined elsewhere
|
89
|
-
width, height = image.size
|
90
|
-
|
91
|
-
# Calculate new dimensions while maintaining the aspect ratio
|
92
|
-
if width > max_width or height > max_height:
|
93
|
-
aspect_ratio = width / height
|
94
|
-
new_width = min(max_width, int(max_height * aspect_ratio))
|
95
|
-
new_height = min(max_height, int(max_width / aspect_ratio))
|
96
|
-
return image, new_width, new_height
|
97
|
-
|
98
|
-
return image, width, height
|
99
|
-
|
100
|
-
|
101
|
-
def inference(self, input_data, mode=None):
|
102
|
-
"""
|
103
|
-
Perform inference on input data using the specified model.
|
104
|
-
|
105
|
-
:param input_data: A list of dictionaries containing image file paths and text inputs.
|
106
|
-
:param mode: Optional mode for inference ("static" for simple JSON output).
|
107
|
-
:return: List of processed model responses.
|
108
|
-
"""
|
109
|
-
# Handle static mode
|
110
|
-
if mode == "static":
|
111
|
-
return [self.get_simple_json()]
|
112
|
-
|
113
|
-
# Load the model and processor
|
114
|
-
model, processor = self._load_model_and_processor(self.model_name)
|
115
|
-
config = model.config
|
116
|
-
|
117
|
-
# Determine if we're doing text-only or image-based inference
|
118
|
-
is_text_only = input_data[0].get("file_path") is None
|
119
|
-
|
120
|
-
if is_text_only:
|
121
|
-
# Text-only inference
|
122
|
-
messages = input_data[0]["text_input"]
|
123
|
-
response = self._generate_text_response(model, processor, config, messages)
|
124
|
-
results = [self.process_response(response)]
|
125
|
-
print("Agent inference completed successfully")
|
126
|
-
else:
|
127
|
-
# Image-based inference
|
128
|
-
file_paths = self._extract_file_paths(input_data)
|
129
|
-
results = self._process_images(model, processor, config, file_paths, input_data)
|
130
|
-
|
131
|
-
return results
|
132
|
-
|
133
|
-
def _generate_text_response(self, model, processor, config, messages):
|
134
|
-
"""
|
135
|
-
Generate a text response for text-only inputs.
|
136
|
-
|
137
|
-
:param model: The loaded model
|
138
|
-
:param processor: The loaded processor
|
139
|
-
:param config: Model configuration
|
140
|
-
:param messages: Input messages
|
141
|
-
:return: Generated response
|
142
|
-
"""
|
143
|
-
prompt = apply_chat_template(processor, config, messages)
|
144
|
-
return generate(
|
145
|
-
model,
|
146
|
-
processor,
|
147
|
-
prompt,
|
148
|
-
max_tokens=4000,
|
149
|
-
temperature=0.0,
|
150
|
-
verbose=False
|
151
|
-
)
|
152
|
-
|
153
|
-
def _process_images(self, model, processor, config, file_paths, input_data):
|
154
|
-
"""
|
155
|
-
Process images and generate responses for each.
|
156
|
-
|
157
|
-
:param model: The loaded model
|
158
|
-
:param processor: The loaded processor
|
159
|
-
:param config: Model configuration
|
160
|
-
:param file_paths: List of image file paths
|
161
|
-
:param input_data: Original input data
|
162
|
-
:return: List of processed responses
|
163
|
-
"""
|
164
|
-
results = []
|
165
|
-
for file_path in file_paths:
|
166
|
-
image, width, height = self.load_image_data(file_path)
|
167
|
-
|
168
|
-
# Prepare messages based on model type
|
169
|
-
messages = self._prepare_messages(input_data, file_path)
|
170
|
-
|
171
|
-
# Generate and process response
|
172
|
-
prompt = apply_chat_template(processor, config, messages)
|
173
|
-
response = generate(
|
174
|
-
model,
|
175
|
-
processor,
|
176
|
-
prompt,
|
177
|
-
image,
|
178
|
-
resize_shape=(width, height),
|
179
|
-
max_tokens=4000,
|
180
|
-
temperature=0.0,
|
181
|
-
verbose=False
|
182
|
-
)
|
183
|
-
results.append(self.process_response(response))
|
184
|
-
print(f"Inference completed successfully for: {file_path}")
|
185
|
-
|
186
|
-
return results
|
187
|
-
|
188
|
-
def _prepare_messages(self, input_data, file_path):
|
189
|
-
"""
|
190
|
-
Prepare the appropriate messages based on the model type.
|
191
|
-
|
192
|
-
:param input_data: Original input data
|
193
|
-
:param file_path: Current file path being processed
|
194
|
-
:return: Properly formatted messages
|
195
|
-
"""
|
196
|
-
if "mistral" in self.model_name.lower():
|
197
|
-
return input_data[0]["text_input"]
|
198
|
-
else:
|
199
|
-
return [
|
200
|
-
{"role": "system", "content": "You are an expert at extracting structured text from image documents."},
|
201
|
-
{"role": "user", "content": input_data[0]["text_input"]},
|
202
|
-
]
|
203
|
-
|
204
|
-
@staticmethod
|
205
|
-
def _extract_file_paths(input_data):
|
206
|
-
"""
|
207
|
-
Extract and resolve absolute file paths from input data.
|
208
|
-
|
209
|
-
:param input_data: List of dictionaries containing image file paths.
|
210
|
-
:return: List of absolute file paths.
|
211
|
-
"""
|
212
|
-
return [
|
213
|
-
os.path.abspath(file_path)
|
214
|
-
for data in input_data
|
215
|
-
for file_path in data.get("file_path", [])
|
216
|
-
]
|
@@ -1,13 +0,0 @@
|
|
1
|
-
rich
|
2
|
-
transformers>=4.51.3
|
3
|
-
torchvision>=0.21.0
|
4
|
-
torch>=2.6.0
|
5
|
-
sentence-transformers>=4.1.0
|
6
|
-
numpy>=2.2.4
|
7
|
-
pypdf>=5.4.0
|
8
|
-
gradio_client>=1.7.2
|
9
|
-
pdf2image>=1.17.0
|
10
|
-
|
11
|
-
[:sys_platform == "darwin" and platform_machine == "arm64"]
|
12
|
-
mlx>=0.25.0
|
13
|
-
mlx-vlm==0.1.23
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sparrow-parse-1.0.3 → sparrow-parse-1.0.4a0}/sparrow_parse/processors/table_structure_processor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|