sparrow-parse 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/PKG-INFO +1 -1
  2. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/setup.py +1 -1
  3. sparrow-parse-0.4.1/sparrow_parse/__init__.py +1 -0
  4. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/extractors/vllm_extractor.py +0 -30
  5. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse.egg-info/PKG-INFO +1 -1
  6. sparrow-parse-0.4.0/sparrow_parse/__init__.py +0 -1
  7. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/README.md +0 -0
  8. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/setup.cfg +0 -0
  9. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/__main__.py +0 -0
  10. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/extractors/__init__.py +0 -0
  11. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/helpers/__init__.py +0 -0
  12. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/helpers/pdf_optimizer.py +0 -0
  13. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/processors/__init__.py +0 -0
  14. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/processors/table_structure_processor.py +0 -0
  15. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/vllm/__init__.py +0 -0
  16. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/vllm/huggingface_inference.py +0 -0
  17. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/vllm/inference_base.py +0 -0
  18. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/vllm/inference_factory.py +0 -0
  19. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/vllm/local_gpu_inference.py +0 -0
  20. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse/vllm/mlx_inference.py +0 -0
  21. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse.egg-info/SOURCES.txt +0 -0
  22. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse.egg-info/dependency_links.txt +0 -0
  23. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse.egg-info/entry_points.txt +0 -0
  24. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse.egg-info/requires.txt +0 -0
  25. {sparrow-parse-0.4.0 → sparrow-parse-0.4.1}/sparrow_parse.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
8
8
 
9
9
  setup(
10
10
  name="sparrow-parse",
11
- version="0.4.0",
11
+ version="0.4.1",
12
12
  author="Andrej Baranovskij",
13
13
  author_email="andrejus.baranovskis@gmail.com",
14
14
  description="Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.",
@@ -0,0 +1 @@
1
+ __version__ = '0.4.1'
@@ -115,7 +115,6 @@ class VLLMExtractor(object):
115
115
 
116
116
  input_data[0]["file_path"] = [output_filename]
117
117
  result = self._run_model_inference(model_inference_instance, input_data)
118
- result = self.add_table_info_to_data(result, "table_nr", i + 1)
119
118
  results_array.append(result)
120
119
 
121
120
  shutil.rmtree(temp_dir, ignore_errors=True)
@@ -140,35 +139,6 @@ class VLLMExtractor(object):
140
139
  return file_path.lower().endswith('.pdf')
141
140
 
142
141
 
143
- @staticmethod
144
- def add_table_info_to_data(data: Union[Dict, List], key: str, message: Any) -> Dict:
145
- """
146
- Add a key-value pair to a dictionary or wrap a list in a dictionary.
147
- If a 'table' key exists, add or update the key-value pair inside it.
148
-
149
- Args:
150
- data (Union[Dict, List]): The input data (either a dictionary or list).
151
- key (str): The key to add.
152
- message (Any): The value to associate with the key.
153
-
154
- Returns:
155
- Dict: The modified data.
156
- """
157
- if isinstance(data, dict):
158
- if "table" in data and isinstance(data["table"], list):
159
- # Add or update the key-value pair in the existing structure
160
- data[key] = message
161
- else:
162
- # Wrap the dictionary inside a `table` key and include the additional key-value pair
163
- data = {"table": [data], key: message}
164
- elif isinstance(data, list):
165
- # Wrap the list in a dictionary with the additional key-value pair
166
- data = {"table": data, key: message}
167
- else:
168
- raise TypeError("Data must be a dictionary or a list.")
169
- return data
170
-
171
-
172
142
  if __name__ == "__main__":
173
143
  # run locally: python -m sparrow_parse.extractors.vllm_extractor
174
144
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
@@ -1 +0,0 @@
1
- __version__ = '0.4.0'
File without changes
File without changes