sparrow-parse 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sparrow_parse/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = '0.2.2'
1
+ __version__ = '0.2.4'
@@ -173,7 +173,7 @@ if __name__ == "__main__":
173
173
  # '/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf',
174
174
  # 'hi_res',
175
175
  # 'yolox',
176
- # ['tables', 'html'],
176
+ # ['tables', 'unstructured'],
177
177
  # True,
178
178
  # True)
179
179
 
sparrow_parse/temp.py CHANGED
@@ -3,7 +3,7 @@
3
3
  # 'hi_res',
4
4
  # 'yolox',
5
5
  # # 'detectron2_onnx',
6
- # ['tables', 'html'],
6
+ # ['tables', 'unstructured'],
7
7
  # True,
8
8
  # True)
9
9
 
@@ -11,18 +11,6 @@
11
11
  # '/Users/andrejb/Documents/work/epik/bankstatement/OCBC_1_1.pdf',
12
12
  # 'hi_res',
13
13
  # 'yolox',
14
- # ['tables', 'html'],
14
+ # ['tables', 'unstructured'],
15
15
  # True,
16
- # True)
17
-
18
-
19
- # content, table_content = processor.extract_data(
20
- # '/Users/andrejb/Documents/work/schreiber/invoice_data/test/2618407.pdf',
21
- # ['tables', 'markdown'],
22
- # True,
23
- # True)
24
- # content, table_content = processor.extract_data(
25
- # '/Users/andrejb/Documents/work/epik/bankstatement/POSB_2_1.pdf',
26
- # ['tables', 'markdown'],
27
- # True,
28
- # True)
16
+ # True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Sparrow Parse is a Python package for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  License: GPL-3.0
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Topic :: Software Development
18
+ Requires-Dist: numpy (==1.26.4)
18
19
  Requires-Dist: pymupdf4llm (==0.0.5)
19
20
  Requires-Dist: rich (>=13.7.1,<14.0.0)
20
21
  Requires-Dist: sentence-transformers (==3.0.1)
@@ -63,7 +64,7 @@ Example:
63
64
 
64
65
  *model_name* - `yolox`
65
66
 
66
- *options* - `['tables', 'html']`
67
+ *options* - `['tables', 'unstructured']`
67
68
 
68
69
  *local* - `True`
69
70
 
@@ -1,13 +1,13 @@
1
- sparrow_parse/__init__.py,sha256=pKh2_hmFNUwG0JxbL18qxdmi_nmFpgRbR8X6IPZr08o,21
1
+ sparrow_parse/__init__.py,sha256=qfsPKGSX4YbTkJ_mvg4kfAF60Sx0Ol77KSinJilpkVw,21
2
2
  sparrow_parse/__main__.py,sha256=Xs1bpJV0n08KWOoQE34FBYn6EBXZA9HIYJKrE4ZdG78,153
3
3
  sparrow_parse/data/invoice_1_table.txt,sha256=dsWEASxlVNidpTCQDowCM7SjaUzSqwx7DuydTfaQ7xI,1115
4
4
  sparrow_parse/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sparrow_parse/extractor/extractor_helper.py,sha256=fvA7iuGdpW_WFc4jkALBzQbACqlE5x_K9ScW-E6RCoY,13357
6
6
  sparrow_parse/extractor/html_extractor.py,sha256=V_ApbQzS3LmyYM8G0adQy73FqvQob5TGZgRMbOviA9M,9507
7
7
  sparrow_parse/extractor/markdown_processor.py,sha256=dC2WUdA-v2psh7oytruftxYkXdQi72FoEYxF30ROuO0,4506
8
- sparrow_parse/extractor/unstructured_processor.py,sha256=Vx5ut2aXyIUhj7Lkoz5uONWjx_SbR-i4vNGxPrCPmDQ,6763
9
- sparrow_parse/temp.py,sha256=xiRDPkv_fsM9xCcW29TU0LushgYjMaN9_Cwur6RvY1A,859
10
- sparrow_parse-0.2.2.dist-info/METADATA,sha256=mbdNeeRvy24A4TJTCQ5irsOuAxb53GMvBAiKtkZc4AA,5582
11
- sparrow_parse-0.2.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
12
- sparrow_parse-0.2.2.dist-info/entry_points.txt,sha256=H507qotwq3VX4lv5pY9MZYtupKNE1RRb8gEQucPiGi0,52
13
- sparrow_parse-0.2.2.dist-info/RECORD,,
8
+ sparrow_parse/extractor/unstructured_processor.py,sha256=z46aXacMvfW_wmsACs0LtamoMc19eogGd5fVVAj4vIo,6771
9
+ sparrow_parse/temp.py,sha256=Hl1wPOEytXnfbUobU8BJgEswPsfncibbQdwrpSHtlOo,513
10
+ sparrow_parse-0.2.4.dist-info/METADATA,sha256=xMKswwVZ8T4sq9SGOdmMrglvlbJAZeKA9-2gkFQJczs,5622
11
+ sparrow_parse-0.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
12
+ sparrow_parse-0.2.4.dist-info/entry_points.txt,sha256=H507qotwq3VX4lv5pY9MZYtupKNE1RRb8gEQucPiGi0,52
13
+ sparrow_parse-0.2.4.dist-info/RECORD,,