sparrow-parse 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/PKG-INFO +29 -22
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/README.md +19 -3
- sparrow-parse-0.3.2/setup.cfg +4 -0
- sparrow-parse-0.3.2/setup.py +37 -0
- sparrow-parse-0.3.2/sparrow_parse/__init__.py +1 -0
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/extractor/pdf_optimizer.py +3 -3
- sparrow-parse-0.3.2/sparrow_parse.egg-info/PKG-INFO +189 -0
- sparrow-parse-0.3.2/sparrow_parse.egg-info/SOURCES.txt +17 -0
- sparrow-parse-0.3.2/sparrow_parse.egg-info/dependency_links.txt +1 -0
- sparrow-parse-0.3.2/sparrow_parse.egg-info/entry_points.txt +3 -0
- sparrow-parse-0.3.2/sparrow_parse.egg-info/requires.txt +9 -0
- sparrow-parse-0.3.2/sparrow_parse.egg-info/top_level.txt +1 -0
- sparrow_parse-0.3.0/pyproject.toml +0 -41
- sparrow_parse-0.3.0/sparrow_parse/__init__.py +0 -1
- sparrow_parse-0.3.0/sparrow_parse/data/invoice_1_table.txt +0 -9
- sparrow_parse-0.3.0/sparrow_parse/extractor/__pycache__/__init__.cpython-310.pyc +0 -0
- sparrow_parse-0.3.0/sparrow_parse/extractor/__pycache__/extractor_helper.cpython-310.pyc +0 -0
- sparrow_parse-0.3.0/sparrow_parse/extractor/__pycache__/html_extractor.cpython-310.pyc +0 -0
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/__main__.py +0 -0
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/extractor/__init__.py +0 -0
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/extractor/extractor_helper.py +0 -0
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/extractor/html_extractor.py +0 -0
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/extractor/markdown_processor.py +0 -0
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/extractor/unstructured_processor.py +0 -0
- {sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/temp.py +0 -0
@@ -1,30 +1,20 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sparrow-parse
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.2
|
4
4
|
Summary: Sparrow Parse is a Python package for parsing and extracting information from documents.
|
5
5
|
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
|
-
License: GPL-3.0
|
7
|
-
Keywords: llm,rag,vision
|
8
6
|
Author: Andrej Baranovskij
|
9
7
|
Author-email: andrejus.baranovskis@gmail.com
|
10
|
-
|
11
|
-
|
8
|
+
License: UNKNOWN
|
9
|
+
Project-URL: Homepage, https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
10
|
+
Project-URL: Repository, https://github.com/katanaml/sparrow
|
11
|
+
Keywords: llm,rag,vision
|
12
|
+
Platform: UNKNOWN
|
12
13
|
Classifier: Operating System :: OS Independent
|
13
|
-
Classifier:
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
16
|
-
Classifier: Programming Language :: Python :: 3.11
|
14
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
17
15
|
Classifier: Topic :: Software Development
|
18
|
-
|
19
|
-
Requires-
|
20
|
-
Requires-Dist: pymupdf4llm (==0.0.6)
|
21
|
-
Requires-Dist: rich (>=13.7.1,<14.0.0)
|
22
|
-
Requires-Dist: sentence-transformers (==3.0.1)
|
23
|
-
Requires-Dist: torch (==2.2.2)
|
24
|
-
Requires-Dist: transformers (==4.41.2)
|
25
|
-
Requires-Dist: unstructured-inference (==0.7.33)
|
26
|
-
Requires-Dist: unstructured[all-docs] (==0.14.5)
|
27
|
-
Project-URL: Repository, https://github.com/katanaml/sparrow
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Requires-Python: >=3.10
|
28
18
|
Description-Content-Type: text/markdown
|
29
19
|
|
30
20
|
# Sparrow Parse
|
@@ -152,14 +142,30 @@ Example:
|
|
152
142
|
|
153
143
|
## Library build
|
154
144
|
|
145
|
+
Create Python virtual environment
|
146
|
+
|
155
147
|
```
|
156
|
-
|
148
|
+
python -m venv .env_sparrow_parse
|
157
149
|
```
|
158
150
|
|
159
|
-
|
151
|
+
Install Python libraries
|
160
152
|
|
161
153
|
```
|
162
|
-
|
154
|
+
pip install -r requirements.txt
|
155
|
+
```
|
156
|
+
|
157
|
+
Build package
|
158
|
+
|
159
|
+
```
|
160
|
+
pip install setuptools wheel
|
161
|
+
python setup.py sdist bdist_wheel
|
162
|
+
```
|
163
|
+
|
164
|
+
Upload to PyPI
|
165
|
+
|
166
|
+
```
|
167
|
+
pip install twine
|
168
|
+
twine upload dist/*
|
163
169
|
```
|
164
170
|
|
165
171
|
## Commercial usage
|
@@ -180,3 +186,4 @@ If your organization is seeking to utilize Sparrow under a proprietary license,
|
|
180
186
|
|
181
187
|
Licensed under the GPL 3.0. Copyright 2020-2024 Katana ML, Andrej Baranovskij. [Copy of the license](https://github.com/katanaml/sparrow/blob/main/LICENSE).
|
182
188
|
|
189
|
+
|
@@ -123,14 +123,30 @@ Example:
|
|
123
123
|
|
124
124
|
## Library build
|
125
125
|
|
126
|
+
Create Python virtual environment
|
127
|
+
|
128
|
+
```
|
129
|
+
python -m venv .env_sparrow_parse
|
130
|
+
```
|
131
|
+
|
132
|
+
Install Python libraries
|
133
|
+
|
134
|
+
```
|
135
|
+
pip install -r requirements.txt
|
136
|
+
```
|
137
|
+
|
138
|
+
Build package
|
139
|
+
|
126
140
|
```
|
127
|
-
|
141
|
+
pip install setuptools wheel
|
142
|
+
python setup.py sdist bdist_wheel
|
128
143
|
```
|
129
144
|
|
130
|
-
|
145
|
+
Upload to PyPI
|
131
146
|
|
132
147
|
```
|
133
|
-
|
148
|
+
pip install twine
|
149
|
+
twine upload dist/*
|
134
150
|
```
|
135
151
|
|
136
152
|
## Commercial usage
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from setuptools import setup, find_packages
|
2
|
+
|
3
|
+
with open("README.md", "r", encoding="utf-8") as fh:
|
4
|
+
long_description = fh.read()
|
5
|
+
|
6
|
+
with open("requirements.txt", "r", encoding="utf-8") as fh:
|
7
|
+
requirements = fh.read().splitlines()
|
8
|
+
|
9
|
+
setup(
|
10
|
+
name="sparrow-parse",
|
11
|
+
version="0.3.2",
|
12
|
+
author="Andrej Baranovskij",
|
13
|
+
author_email="andrejus.baranovskis@gmail.com",
|
14
|
+
description="Sparrow Parse is a Python package for parsing and extracting information from documents.",
|
15
|
+
long_description=long_description,
|
16
|
+
long_description_content_type="text/markdown",
|
17
|
+
url="https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse",
|
18
|
+
project_urls={
|
19
|
+
"Homepage": "https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse",
|
20
|
+
"Repository": "https://github.com/katanaml/sparrow",
|
21
|
+
},
|
22
|
+
classifiers=[
|
23
|
+
"Operating System :: OS Independent",
|
24
|
+
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
25
|
+
"Topic :: Software Development",
|
26
|
+
"Programming Language :: Python :: 3.10",
|
27
|
+
],
|
28
|
+
entry_points={
|
29
|
+
'console_scripts': [
|
30
|
+
'sparrow-parse=sparrow_parse:main',
|
31
|
+
],
|
32
|
+
},
|
33
|
+
keywords="llm, rag, vision",
|
34
|
+
packages=find_packages(),
|
35
|
+
python_requires='>=3.10',
|
36
|
+
install_requires=requirements,
|
37
|
+
)
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = '0.3.2'
|
@@ -1,4 +1,4 @@
|
|
1
|
-
import
|
1
|
+
import pypdf
|
2
2
|
from pdf2image import convert_from_path
|
3
3
|
import os
|
4
4
|
import tempfile
|
@@ -17,12 +17,12 @@ class PDFOptimizer(object):
|
|
17
17
|
if not convert_to_images:
|
18
18
|
# Open the PDF file
|
19
19
|
with open(file_path, 'rb') as pdf_file:
|
20
|
-
reader =
|
20
|
+
reader = pypdf.PdfReader(pdf_file)
|
21
21
|
number_of_pages = len(reader.pages)
|
22
22
|
|
23
23
|
# Split the PDF into separate files per page
|
24
24
|
for page_num in range(number_of_pages):
|
25
|
-
writer =
|
25
|
+
writer = pypdf.PdfWriter()
|
26
26
|
writer.add_page(reader.pages[page_num])
|
27
27
|
|
28
28
|
output_filename = os.path.join(temp_dir, f'page_{page_num + 1}.pdf')
|
@@ -0,0 +1,189 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: sparrow-parse
|
3
|
+
Version: 0.3.2
|
4
|
+
Summary: Sparrow Parse is a Python package for parsing and extracting information from documents.
|
5
|
+
Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
6
|
+
Author: Andrej Baranovskij
|
7
|
+
Author-email: andrejus.baranovskis@gmail.com
|
8
|
+
License: UNKNOWN
|
9
|
+
Project-URL: Homepage, https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
|
10
|
+
Project-URL: Repository, https://github.com/katanaml/sparrow
|
11
|
+
Keywords: llm,rag,vision
|
12
|
+
Platform: UNKNOWN
|
13
|
+
Classifier: Operating System :: OS Independent
|
14
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
15
|
+
Classifier: Topic :: Software Development
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Requires-Python: >=3.10
|
18
|
+
Description-Content-Type: text/markdown
|
19
|
+
|
20
|
+
# Sparrow Parse
|
21
|
+
|
22
|
+
## Description
|
23
|
+
|
24
|
+
This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) with helpful methods for data pre-processing, parsing and extracting information.
|
25
|
+
|
26
|
+
## Install
|
27
|
+
|
28
|
+
```
|
29
|
+
pip install sparrow-parse
|
30
|
+
```
|
31
|
+
|
32
|
+
## Pre-processing
|
33
|
+
|
34
|
+
### Unstructured
|
35
|
+
|
36
|
+
```
|
37
|
+
from sparrow_parse.extractor.unstructured_processor import UnstructuredProcessor
|
38
|
+
|
39
|
+
processor = UnstructuredProcessor()
|
40
|
+
|
41
|
+
content, table_content = processor.extract_data(
|
42
|
+
file_path, # file to process
|
43
|
+
strategy, # data processing strategy supported by unstructured
|
44
|
+
model_name, # model supported by unstructured
|
45
|
+
options, # table extraction into HTML format
|
46
|
+
local, # True if running from CLI, or False if running from FastAPI
|
47
|
+
debug) # Debug
|
48
|
+
```
|
49
|
+
|
50
|
+
Example:
|
51
|
+
|
52
|
+
*file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
|
53
|
+
|
54
|
+
*strategy* - `hi_res`
|
55
|
+
|
56
|
+
*model_name* - `yolox`
|
57
|
+
|
58
|
+
*options* - `['tables', 'unstructured']`
|
59
|
+
|
60
|
+
*local* - `True`
|
61
|
+
|
62
|
+
*debug* - `True`
|
63
|
+
|
64
|
+
### Markdown
|
65
|
+
|
66
|
+
```
|
67
|
+
from sparrow_parse.extractor.markdown_processor import MarkdownProcessor
|
68
|
+
|
69
|
+
processor = MarkdownProcessor()
|
70
|
+
|
71
|
+
content, table_content = processor.extract_data(
|
72
|
+
file_path, # file to process
|
73
|
+
options, # table extraction into HTML format
|
74
|
+
local, # True if running from CLI, or False if running from FastAPI
|
75
|
+
debug) # Debug
|
76
|
+
```
|
77
|
+
|
78
|
+
Example:
|
79
|
+
|
80
|
+
*file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
|
81
|
+
|
82
|
+
*options* - `['tables', 'markdown']`
|
83
|
+
|
84
|
+
*local* - `True`
|
85
|
+
|
86
|
+
*debug* - `True`
|
87
|
+
|
88
|
+
## Parsing and extraction
|
89
|
+
|
90
|
+
```
|
91
|
+
from sparrow_parse.extractor.html_extractor import HTMLExtractor
|
92
|
+
|
93
|
+
extractor = HTMLExtractor()
|
94
|
+
|
95
|
+
answer, targets_unprocessed = extractor.read_data(
|
96
|
+
target_columns, # list of table columns data to fetch
|
97
|
+
data, # list of HTML tables
|
98
|
+
column_keywords, # list of valid column names, can be empty. Useful to filter junk content
|
99
|
+
group_by_rows, # JSON result grouping
|
100
|
+
update_targets, # Set to true, if page contains multiple tables with the same columns
|
101
|
+
local, # True if running from CLI, or False if running from FastAPI
|
102
|
+
debug) # Debug
|
103
|
+
|
104
|
+
```
|
105
|
+
|
106
|
+
Example:
|
107
|
+
|
108
|
+
*target_columns* - `['description', 'qty', 'net_price', 'net_worth', 'vat', 'gross_worth']`
|
109
|
+
|
110
|
+
*data* - `list of HTML tables`
|
111
|
+
|
112
|
+
*column_keywords* - `None`
|
113
|
+
|
114
|
+
*group_by_rows* - `True`
|
115
|
+
|
116
|
+
*update_targets* - `True`
|
117
|
+
|
118
|
+
*local* - `True`
|
119
|
+
|
120
|
+
*debug* - `True`
|
121
|
+
|
122
|
+
## PDF optimization
|
123
|
+
|
124
|
+
```
|
125
|
+
from sparrow_parse.extractor.pdf_optimizer import PDFOptimizer
|
126
|
+
|
127
|
+
pdf_optimizer = PDFOptimizer()
|
128
|
+
|
129
|
+
num_pages, output_files, temp_dir = pdf_optimizer.split_pdf_to_pages(file_path,
|
130
|
+
output_directory,
|
131
|
+
convert_to_images)
|
132
|
+
|
133
|
+
```
|
134
|
+
|
135
|
+
Example:
|
136
|
+
|
137
|
+
*file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
|
138
|
+
|
139
|
+
*output_directory* - set to not `None`, for debug purposes only
|
140
|
+
|
141
|
+
*convert_to_images* - default `False`, to split into PDF files
|
142
|
+
|
143
|
+
## Library build
|
144
|
+
|
145
|
+
Create Python virtual environment
|
146
|
+
|
147
|
+
```
|
148
|
+
python -m venv .env_sparrow_parse
|
149
|
+
```
|
150
|
+
|
151
|
+
Install Python libraries
|
152
|
+
|
153
|
+
```
|
154
|
+
pip install -r requirements.txt
|
155
|
+
```
|
156
|
+
|
157
|
+
Build package
|
158
|
+
|
159
|
+
```
|
160
|
+
pip install setuptools wheel
|
161
|
+
python setup.py sdist bdist_wheel
|
162
|
+
```
|
163
|
+
|
164
|
+
Upload to PyPI
|
165
|
+
|
166
|
+
```
|
167
|
+
pip install twine
|
168
|
+
twine upload dist/*
|
169
|
+
```
|
170
|
+
|
171
|
+
## Commercial usage
|
172
|
+
|
173
|
+
Sparrow is available under the GPL 3.0 license, promoting freedom to use, modify, and distribute the software while ensuring any modifications remain open source under the same license. This aligns with our commitment to supporting the open-source community and fostering collaboration.
|
174
|
+
|
175
|
+
Additionally, we recognize the diverse needs of organizations, including small to medium-sized enterprises (SMEs). Therefore, Sparrow is also offered for free commercial use to organizations with gross revenue below $5 million USD in the past 12 months, enabling them to leverage Sparrow without the financial burden often associated with high-quality software solutions.
|
176
|
+
|
177
|
+
For businesses that exceed this revenue threshold or require usage terms not accommodated by the GPL 3.0 license—such as integrating Sparrow into proprietary software without the obligation to disclose source code modifications—we offer dual licensing options. Dual licensing allows Sparrow to be used under a separate proprietary license, offering greater flexibility for commercial applications and proprietary integrations. This model supports both the project's sustainability and the business's needs for confidentiality and customization.
|
178
|
+
|
179
|
+
If your organization is seeking to utilize Sparrow under a proprietary license, or if you are interested in custom workflows, consulting services, or dedicated support and maintenance options, please contact us at abaranovskis@redsamuraiconsulting.com. We're here to provide tailored solutions that meet your unique requirements, ensuring you can maximize the benefits of Sparrow for your projects and workflows.
|
180
|
+
|
181
|
+
## Author
|
182
|
+
|
183
|
+
[Katana ML](https://katanaml.io), [Andrej Baranovskij](https://github.com/abaranovskis-redsamurai)
|
184
|
+
|
185
|
+
## License
|
186
|
+
|
187
|
+
Licensed under the GPL 3.0. Copyright 2020-2024 Katana ML, Andrej Baranovskij. [Copy of the license](https://github.com/katanaml/sparrow/blob/main/LICENSE).
|
188
|
+
|
189
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
README.md
|
2
|
+
setup.py
|
3
|
+
sparrow_parse/__init__.py
|
4
|
+
sparrow_parse/__main__.py
|
5
|
+
sparrow_parse/temp.py
|
6
|
+
sparrow_parse.egg-info/PKG-INFO
|
7
|
+
sparrow_parse.egg-info/SOURCES.txt
|
8
|
+
sparrow_parse.egg-info/dependency_links.txt
|
9
|
+
sparrow_parse.egg-info/entry_points.txt
|
10
|
+
sparrow_parse.egg-info/requires.txt
|
11
|
+
sparrow_parse.egg-info/top_level.txt
|
12
|
+
sparrow_parse/extractor/__init__.py
|
13
|
+
sparrow_parse/extractor/extractor_helper.py
|
14
|
+
sparrow_parse/extractor/html_extractor.py
|
15
|
+
sparrow_parse/extractor/markdown_processor.py
|
16
|
+
sparrow_parse/extractor/pdf_optimizer.py
|
17
|
+
sparrow_parse/extractor/unstructured_processor.py
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
sparrow_parse
|
@@ -1,41 +0,0 @@
|
|
1
|
-
[tool.poetry]
|
2
|
-
name = "sparrow-parse"
|
3
|
-
version = "0.3.0"
|
4
|
-
description = "Sparrow Parse is a Python package for parsing and extracting information from documents."
|
5
|
-
authors = ["Andrej Baranovskij <andrejus.baranovskis@gmail.com>"]
|
6
|
-
license = "GPL-3.0"
|
7
|
-
readme = "README.md"
|
8
|
-
homepage = "https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse"
|
9
|
-
repository = "https://github.com/katanaml/sparrow"
|
10
|
-
keywords = ["llm", "rag", "vision"]
|
11
|
-
classifiers = [
|
12
|
-
"Operating System :: OS Independent",
|
13
|
-
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
14
|
-
"Topic :: Software Development",
|
15
|
-
"Programming Language :: Python :: 3.10",
|
16
|
-
]
|
17
|
-
include = [
|
18
|
-
"LICENSE",
|
19
|
-
]
|
20
|
-
|
21
|
-
|
22
|
-
[tool.poetry.dependencies]
|
23
|
-
python = ">=3.9,<3.12"
|
24
|
-
torch = {version = "2.2.2", source = "pypi"}
|
25
|
-
unstructured = {version = "0.14.5", extras = ["all-docs"]}
|
26
|
-
unstructured-inference = "0.7.33"
|
27
|
-
rich = "^13.7.1"
|
28
|
-
pymupdf4llm = "0.0.6"
|
29
|
-
transformers = "4.41.2"
|
30
|
-
sentence-transformers = "3.0.1"
|
31
|
-
numpy = "1.26.4"
|
32
|
-
PyPDF2 = "3.0.1"
|
33
|
-
|
34
|
-
|
35
|
-
[tool.poetry.scripts]
|
36
|
-
sparrow-parse = 'sparrow_parse:main'
|
37
|
-
|
38
|
-
|
39
|
-
[build-system]
|
40
|
-
requires = ["poetry-core"]
|
41
|
-
build-backend = "poetry.core.masonry.api"
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = '0.3.0'
|
@@ -1,9 +0,0 @@
|
|
1
|
-
[
|
2
|
-
'<table><thead><th>No.</th><th>Description</th><th>Qty</th><th>UM</th><th>Net price</th><th>Net worth</th><th>VAT [%]</th><th>Gross worth</th></thead><tr><td></td><td>Wine Glasses Goblets Pair Clear
|
3
|
-
Glass</td><td>5,00</td><td>eacn</td><td>12,00</td><td>60,00</td><td>10%</td><td>66,00</td></tr><tr><td></td><td>With Hooks Stemware Storage Multiple Uses Iron Wine Rack Hanging
|
4
|
-
Glass</td><td>4,00</td><td>eacn</td><td>28,08</td><td>112,32</td><td>10%</td><td>123,55</td></tr><tr><td></td><td>Replacement Corkscrew Parts Spiral Worm Wine Opener Bottle
|
5
|
-
Houdini</td><td>1,00</td><td>eacn</td><td>7,50</td><td>7,50</td><td>10%</td><td>8,25</td></tr><tr><td></td><td>HOME ESSENTIALS GRADIENT STEMLESS WINE GLASSES SET OF 4 20 FL OZ (591 ml)
|
6
|
-
NEW</td><td>1,00</td><td>eacn</td><td>12,99</td><td>12,99</td><td>10%</td><td>14,29</td></tr></table>',
|
7
|
-
'<table><thead><th>VAT</th><th>[%]</th><th>Net worth</th><th>VAT</th><th>Gross worth</th></thead><tr><td></td><td>10%</td><td>192,81</td><td>19,28</td><td>212,09</td></tr><tr><td colspan="2">Total</td><td>$ 192,81</td><td>$
|
8
|
-
19,28</td><td>$ 212,09</td></tr></table>'
|
9
|
-
]
|
Binary file
|
Binary file
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sparrow_parse-0.3.0 → sparrow-parse-0.3.2}/sparrow_parse/extractor/unstructured_processor.py
RENAMED
File without changes
|
File without changes
|