sparrow-parse 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sparrow_parse/__init__.py +1 -1
- sparrow_parse/extractors/vllm_extractor.py +0 -30
- {sparrow_parse-0.4.0.dist-info → sparrow_parse-0.4.1.dist-info}/METADATA +1 -1
- {sparrow_parse-0.4.0.dist-info → sparrow_parse-0.4.1.dist-info}/RECORD +7 -7
- {sparrow_parse-0.4.0.dist-info → sparrow_parse-0.4.1.dist-info}/WHEEL +0 -0
- {sparrow_parse-0.4.0.dist-info → sparrow_parse-0.4.1.dist-info}/entry_points.txt +0 -0
- {sparrow_parse-0.4.0.dist-info → sparrow_parse-0.4.1.dist-info}/top_level.txt +0 -0
sparrow_parse/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = '0.4.
|
1
|
+
__version__ = '0.4.1'
|
@@ -115,7 +115,6 @@ class VLLMExtractor(object):
|
|
115
115
|
|
116
116
|
input_data[0]["file_path"] = [output_filename]
|
117
117
|
result = self._run_model_inference(model_inference_instance, input_data)
|
118
|
-
result = self.add_table_info_to_data(result, "table_nr", i + 1)
|
119
118
|
results_array.append(result)
|
120
119
|
|
121
120
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
@@ -140,35 +139,6 @@ class VLLMExtractor(object):
|
|
140
139
|
return file_path.lower().endswith('.pdf')
|
141
140
|
|
142
141
|
|
143
|
-
@staticmethod
|
144
|
-
def add_table_info_to_data(data: Union[Dict, List], key: str, message: Any) -> Dict:
|
145
|
-
"""
|
146
|
-
Add a key-value pair to a dictionary or wrap a list in a dictionary.
|
147
|
-
If a 'table' key exists, add or update the key-value pair inside it.
|
148
|
-
|
149
|
-
Args:
|
150
|
-
data (Union[Dict, List]): The input data (either a dictionary or list).
|
151
|
-
key (str): The key to add.
|
152
|
-
message (Any): The value to associate with the key.
|
153
|
-
|
154
|
-
Returns:
|
155
|
-
Dict: The modified data.
|
156
|
-
"""
|
157
|
-
if isinstance(data, dict):
|
158
|
-
if "table" in data and isinstance(data["table"], list):
|
159
|
-
# Add or update the key-value pair in the existing structure
|
160
|
-
data[key] = message
|
161
|
-
else:
|
162
|
-
# Wrap the dictionary inside a `table` key and include the additional key-value pair
|
163
|
-
data = {"table": [data], key: message}
|
164
|
-
elif isinstance(data, list):
|
165
|
-
# Wrap the list in a dictionary with the additional key-value pair
|
166
|
-
data = {"table": data, key: message}
|
167
|
-
else:
|
168
|
-
raise TypeError("Data must be a dictionary or a list.")
|
169
|
-
return data
|
170
|
-
|
171
|
-
|
172
142
|
if __name__ == "__main__":
|
173
143
|
# run locally: python -m sparrow_parse.extractors.vllm_extractor
|
174
144
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sparrow-parse
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.1
|
4
4
|
Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
|
5
5
|
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
6
|
Author: Andrej Baranovskij
|
@@ -1,7 +1,7 @@
|
|
1
|
-
sparrow_parse/__init__.py,sha256=
|
1
|
+
sparrow_parse/__init__.py,sha256=8yPI9dbwQUYqhMtA3RfAi5yJOhZBnz-g8966ssrYXiU,21
|
2
2
|
sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
|
3
3
|
sparrow_parse/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
sparrow_parse/extractors/vllm_extractor.py,sha256=
|
4
|
+
sparrow_parse/extractors/vllm_extractor.py,sha256=QIg7AMCfw81YHQN6CutF2ipV_DZ3txSGduPIcvQRmiA,7439
|
5
5
|
sparrow_parse/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
sparrow_parse/helpers/pdf_optimizer.py,sha256=GIqQYWtixFeZGCRFXL0lQfQByapCDuQzzRHAkzcPwLE,3302
|
7
7
|
sparrow_parse/processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -12,8 +12,8 @@ sparrow_parse/vllm/inference_base.py,sha256=4mwGoAY63MB4cHZpV0czTkJWEzimmiTzqqzK
|
|
12
12
|
sparrow_parse/vllm/inference_factory.py,sha256=FTM65O-dW2WZchHOrNN7_Q3-FlVoAc65iSptuuUuClM,1166
|
13
13
|
sparrow_parse/vllm/local_gpu_inference.py,sha256=aHoJTejb5xrXjWDIGu5RBQWEyRCOBCB04sMvO2Wyvg8,628
|
14
14
|
sparrow_parse/vllm/mlx_inference.py,sha256=xR40qwjIR0HvrN8x58oOq6F4r1hEANRB-9kcokUQHHU,4748
|
15
|
-
sparrow_parse-0.4.
|
16
|
-
sparrow_parse-0.4.
|
17
|
-
sparrow_parse-0.4.
|
18
|
-
sparrow_parse-0.4.
|
19
|
-
sparrow_parse-0.4.
|
15
|
+
sparrow_parse-0.4.1.dist-info/METADATA,sha256=4rmJ1CURKtyTs-ZH1eyHn_VptHosJZwhQFB5Fssr5e0,6432
|
16
|
+
sparrow_parse-0.4.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
17
|
+
sparrow_parse-0.4.1.dist-info/entry_points.txt,sha256=8CrvTVTTcz1YuZ8aRCYNOH15ZOAaYLlcbYX3t28HwJY,54
|
18
|
+
sparrow_parse-0.4.1.dist-info/top_level.txt,sha256=n6b-WtT91zKLyCPZTP7wvne8v_yvIahcsz-4sX8I0rY,14
|
19
|
+
sparrow_parse-0.4.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|