sparrow-parse 0.3.4__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/PKG-INFO +11 -98
  2. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/README.md +8 -95
  3. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/setup.py +3 -3
  4. sparrow-parse-0.3.5/sparrow_parse/__init__.py +1 -0
  5. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse.egg-info/PKG-INFO +11 -98
  6. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse.egg-info/SOURCES.txt +0 -5
  7. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse.egg-info/requires.txt +1 -4
  8. sparrow-parse-0.3.4/sparrow_parse/__init__.py +0 -1
  9. sparrow-parse-0.3.4/sparrow_parse/extractors/html_extractor.py +0 -251
  10. sparrow-parse-0.3.4/sparrow_parse/helpers/html_extractor_helper.py +0 -374
  11. sparrow-parse-0.3.4/sparrow_parse/processors/markdown_processor.py +0 -137
  12. sparrow-parse-0.3.4/sparrow_parse/processors/unstructured_processor.py +0 -178
  13. sparrow-parse-0.3.4/sparrow_parse/temp.py +0 -27
  14. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/setup.cfg +0 -0
  15. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/__main__.py +0 -0
  16. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/extractors/__init__.py +0 -0
  17. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/extractors/vllm_extractor.py +0 -0
  18. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/helpers/__init__.py +0 -0
  19. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/helpers/pdf_optimizer.py +0 -0
  20. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/processors/__init__.py +0 -0
  21. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/processors/table_structure_processor.py +0 -0
  22. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/vllm/__init__.py +0 -0
  23. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/vllm/huggingface_inference.py +0 -0
  24. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/vllm/inference_base.py +0 -0
  25. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/vllm/inference_factory.py +0 -0
  26. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse/vllm/local_gpu_inference.py +0 -0
  27. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse.egg-info/dependency_links.txt +0 -0
  28. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse.egg-info/entry_points.txt +0 -0
  29. {sparrow-parse-0.3.4 → sparrow-parse-0.3.5}/sparrow_parse.egg-info/top_level.txt +0 -0
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.3.4
4
- Summary: Sparrow Parse is a Python package for parsing and extracting information from documents.
3
+ Version: 0.3.5
4
+ Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
7
7
  Author-email: andrejus.baranovskis@gmail.com
8
8
  License: UNKNOWN
9
9
  Project-URL: Homepage, https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
10
10
  Project-URL: Repository, https://github.com/katanaml/sparrow
11
- Keywords: llm,rag,vision
11
+ Keywords: llm,vllm,ocr,vision
12
12
  Platform: UNKNOWN
13
13
  Classifier: Operating System :: OS Independent
14
14
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
21
21
 
22
22
  ## Description
23
23
 
24
- This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) with helpful methods for data pre-processing, parsing and extracting information.
24
+ This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. This library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
25
25
 
26
26
  ## Install
27
27
 
@@ -29,101 +29,14 @@ This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-
29
29
  pip install sparrow-parse
30
30
  ```
31
31
 
32
- ## Pre-processing
33
-
34
- ### Unstructured
35
-
36
- ```
37
- from sparrow_parse.extractor.unstructured_processor import UnstructuredProcessor
38
-
39
- processor = UnstructuredProcessor()
40
-
41
- content, table_content = processor.extract_data(
42
- file_path, # file to process
43
- strategy, # data processing strategy supported by unstructured
44
- model_name, # model supported by unstructured
45
- options, # table extraction into HTML format
46
- local, # True if running from CLI, or False if running from FastAPI
47
- debug) # Debug
48
- ```
49
-
50
- Example:
51
-
52
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
53
-
54
- *strategy* - `hi_res`
55
-
56
- *model_name* - `yolox`
57
-
58
- *options* - `['tables', 'unstructured']`
59
-
60
- *local* - `True`
61
-
62
- *debug* - `True`
63
-
64
- ### Markdown
65
-
66
- ```
67
- from sparrow_parse.extractor.markdown_processor import MarkdownProcessor
68
-
69
- processor = MarkdownProcessor()
70
-
71
- content, table_content = processor.extract_data(
72
- file_path, # file to process
73
- options, # table extraction into HTML format
74
- local, # True if running from CLI, or False if running from FastAPI
75
- debug) # Debug
76
- ```
77
-
78
- Example:
79
-
80
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
81
-
82
- *options* - `['tables', 'markdown']`
83
-
84
- *local* - `True`
85
-
86
- *debug* - `True`
87
-
88
32
  ## Parsing and extraction
89
33
 
90
- ### HTML extractor
34
+ ### Sparrow Parse VL (vision-language model) extractor with Hugging Face GPU infra
91
35
 
92
36
  ```
93
- from sparrow_parse.extractor.html_extractor import HTMLExtractor
37
+ from sparrow_parse.vllm.inference_factory import InferenceFactory
38
+ from sparrow_parse.extractors.vllm_extractor import VLLMExtractor
94
39
 
95
- extractor = HTMLExtractor()
96
-
97
- answer, targets_unprocessed = extractor.read_data(
98
- target_columns, # list of table columns data to fetch
99
- data, # list of HTML tables
100
- column_keywords, # list of valid column names, can be empty. Useful to filter junk content
101
- group_by_rows, # JSON result grouping
102
- update_targets, # Set to true, if page contains multiple tables with the same columns
103
- local, # True if running from CLI, or False if running from FastAPI
104
- debug) # Debug
105
-
106
- ```
107
-
108
- Example:
109
-
110
- *target_columns* - `['description', 'qty', 'net_price', 'net_worth', 'vat', 'gross_worth']`
111
-
112
- *data* - `list of HTML tables`
113
-
114
- *column_keywords* - `None`
115
-
116
- *group_by_rows* - `True`
117
-
118
- *update_targets* - `True`
119
-
120
- *local* - `True`
121
-
122
- *debug* - `True`
123
-
124
- ### Sparrow Parse VL (vision-language) extractor
125
-
126
- ```
127
40
  extractor = VLLMExtractor()
128
41
 
129
42
  # export HF_TOKEN="hf_"
@@ -141,8 +54,8 @@ model_inference_instance = factory.get_inference_instance()
141
54
 
142
55
  input_data = [
143
56
  {
144
- "image": "/Users/andrejb/Documents/work/epik/bankstatement/bonds_table.png",
145
- "text_input": "retrieve financial instruments data. return response in JSON format"
57
+ "image": "/data/bonds_table.png",
58
+ "text_input": "retrieve all data. return response in JSON format"
146
59
  }
147
60
  ]
148
61
 
@@ -151,7 +64,7 @@ result = extractor.run_inference(model_inference_instance, input_data, generic_q
151
64
  print("Inference Result:", result)
152
65
  ```
153
66
 
154
- ## PDF optimization
67
+ ## PDF pre-processing
155
68
 
156
69
  ```
157
70
  from sparrow_parse.extractor.pdf_optimizer import PDFOptimizer
@@ -166,7 +79,7 @@ num_pages, output_files, temp_dir = pdf_optimizer.split_pdf_to_pages(file_path,
166
79
 
167
80
  Example:
168
81
 
169
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
82
+ *file_path* - `/data/invoice_1.pdf`
170
83
 
171
84
  *output_directory* - set to not `None`, for debug purposes only
172
85
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  ## Description
4
4
 
5
- This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) with helpful methods for data pre-processing, parsing and extracting information.
5
+ This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. This library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
6
6
 
7
7
  ## Install
8
8
 
@@ -10,101 +10,14 @@ This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-
10
10
  pip install sparrow-parse
11
11
  ```
12
12
 
13
- ## Pre-processing
14
-
15
- ### Unstructured
16
-
17
- ```
18
- from sparrow_parse.extractor.unstructured_processor import UnstructuredProcessor
19
-
20
- processor = UnstructuredProcessor()
21
-
22
- content, table_content = processor.extract_data(
23
- file_path, # file to process
24
- strategy, # data processing strategy supported by unstructured
25
- model_name, # model supported by unstructured
26
- options, # table extraction into HTML format
27
- local, # True if running from CLI, or False if running from FastAPI
28
- debug) # Debug
29
- ```
30
-
31
- Example:
32
-
33
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
34
-
35
- *strategy* - `hi_res`
36
-
37
- *model_name* - `yolox`
38
-
39
- *options* - `['tables', 'unstructured']`
40
-
41
- *local* - `True`
42
-
43
- *debug* - `True`
44
-
45
- ### Markdown
46
-
47
- ```
48
- from sparrow_parse.extractor.markdown_processor import MarkdownProcessor
49
-
50
- processor = MarkdownProcessor()
51
-
52
- content, table_content = processor.extract_data(
53
- file_path, # file to process
54
- options, # table extraction into HTML format
55
- local, # True if running from CLI, or False if running from FastAPI
56
- debug) # Debug
57
- ```
58
-
59
- Example:
60
-
61
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
62
-
63
- *options* - `['tables', 'markdown']`
64
-
65
- *local* - `True`
66
-
67
- *debug* - `True`
68
-
69
13
  ## Parsing and extraction
70
14
 
71
- ### HTML extractor
15
+ ### Sparrow Parse VL (vision-language model) extractor with Hugging Face GPU infra
72
16
 
73
17
  ```
74
- from sparrow_parse.extractor.html_extractor import HTMLExtractor
18
+ from sparrow_parse.vllm.inference_factory import InferenceFactory
19
+ from sparrow_parse.extractors.vllm_extractor import VLLMExtractor
75
20
 
76
- extractor = HTMLExtractor()
77
-
78
- answer, targets_unprocessed = extractor.read_data(
79
- target_columns, # list of table columns data to fetch
80
- data, # list of HTML tables
81
- column_keywords, # list of valid column names, can be empty. Useful to filter junk content
82
- group_by_rows, # JSON result grouping
83
- update_targets, # Set to true, if page contains multiple tables with the same columns
84
- local, # True if running from CLI, or False if running from FastAPI
85
- debug) # Debug
86
-
87
- ```
88
-
89
- Example:
90
-
91
- *target_columns* - `['description', 'qty', 'net_price', 'net_worth', 'vat', 'gross_worth']`
92
-
93
- *data* - `list of HTML tables`
94
-
95
- *column_keywords* - `None`
96
-
97
- *group_by_rows* - `True`
98
-
99
- *update_targets* - `True`
100
-
101
- *local* - `True`
102
-
103
- *debug* - `True`
104
-
105
- ### Sparrow Parse VL (vision-language) extractor
106
-
107
- ```
108
21
  extractor = VLLMExtractor()
109
22
 
110
23
  # export HF_TOKEN="hf_"
@@ -122,8 +35,8 @@ model_inference_instance = factory.get_inference_instance()
122
35
 
123
36
  input_data = [
124
37
  {
125
- "image": "/Users/andrejb/Documents/work/epik/bankstatement/bonds_table.png",
126
- "text_input": "retrieve financial instruments data. return response in JSON format"
38
+ "image": "/data/bonds_table.png",
39
+ "text_input": "retrieve all data. return response in JSON format"
127
40
  }
128
41
  ]
129
42
 
@@ -132,7 +45,7 @@ result = extractor.run_inference(model_inference_instance, input_data, generic_q
132
45
  print("Inference Result:", result)
133
46
  ```
134
47
 
135
- ## PDF optimization
48
+ ## PDF pre-processing
136
49
 
137
50
  ```
138
51
  from sparrow_parse.extractor.pdf_optimizer import PDFOptimizer
@@ -147,7 +60,7 @@ num_pages, output_files, temp_dir = pdf_optimizer.split_pdf_to_pages(file_path,
147
60
 
148
61
  Example:
149
62
 
150
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
63
+ *file_path* - `/data/invoice_1.pdf`
151
64
 
152
65
  *output_directory* - set to not `None`, for debug purposes only
153
66
 
@@ -8,10 +8,10 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
8
8
 
9
9
  setup(
10
10
  name="sparrow-parse",
11
- version="0.3.4",
11
+ version="0.3.5",
12
12
  author="Andrej Baranovskij",
13
13
  author_email="andrejus.baranovskis@gmail.com",
14
- description="Sparrow Parse is a Python package for parsing and extracting information from documents.",
14
+ description="Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.",
15
15
  long_description=long_description,
16
16
  long_description_content_type="text/markdown",
17
17
  url="https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse",
@@ -30,7 +30,7 @@ setup(
30
30
  'sparrow-parse=sparrow_parse:main',
31
31
  ],
32
32
  },
33
- keywords="llm, rag, vision",
33
+ keywords="llm, vllm, ocr, vision",
34
34
  packages=find_packages(),
35
35
  python_requires='>=3.10',
36
36
  install_requires=requirements,
@@ -0,0 +1 @@
1
+ __version__ = '0.3.5'
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparrow-parse
3
- Version: 0.3.4
4
- Summary: Sparrow Parse is a Python package for parsing and extracting information from documents.
3
+ Version: 0.3.5
4
+ Summary: Sparrow Parse is a Python package (part of Sparrow) for parsing and extracting information from documents.
5
5
  Home-page: https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
6
6
  Author: Andrej Baranovskij
7
7
  Author-email: andrejus.baranovskis@gmail.com
8
8
  License: UNKNOWN
9
9
  Project-URL: Homepage, https://github.com/katanaml/sparrow/tree/main/sparrow-data/parse
10
10
  Project-URL: Repository, https://github.com/katanaml/sparrow
11
- Keywords: llm,rag,vision
11
+ Keywords: llm,vllm,ocr,vision
12
12
  Platform: UNKNOWN
13
13
  Classifier: Operating System :: OS Independent
14
14
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
21
21
 
22
22
  ## Description
23
23
 
24
- This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) with helpful methods for data pre-processing, parsing and extracting information.
24
+ This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-parse/) library with helpful methods for data pre-processing, parsing and extracting information. This library relies on Visual LLM functionality, Table Transformers and is part of Sparrow. Check main [README](https://github.com/katanaml/sparrow)
25
25
 
26
26
  ## Install
27
27
 
@@ -29,101 +29,14 @@ This module implements Sparrow Parse [library](https://pypi.org/project/sparrow-
29
29
  pip install sparrow-parse
30
30
  ```
31
31
 
32
- ## Pre-processing
33
-
34
- ### Unstructured
35
-
36
- ```
37
- from sparrow_parse.extractor.unstructured_processor import UnstructuredProcessor
38
-
39
- processor = UnstructuredProcessor()
40
-
41
- content, table_content = processor.extract_data(
42
- file_path, # file to process
43
- strategy, # data processing strategy supported by unstructured
44
- model_name, # model supported by unstructured
45
- options, # table extraction into HTML format
46
- local, # True if running from CLI, or False if running from FastAPI
47
- debug) # Debug
48
- ```
49
-
50
- Example:
51
-
52
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
53
-
54
- *strategy* - `hi_res`
55
-
56
- *model_name* - `yolox`
57
-
58
- *options* - `['tables', 'unstructured']`
59
-
60
- *local* - `True`
61
-
62
- *debug* - `True`
63
-
64
- ### Markdown
65
-
66
- ```
67
- from sparrow_parse.extractor.markdown_processor import MarkdownProcessor
68
-
69
- processor = MarkdownProcessor()
70
-
71
- content, table_content = processor.extract_data(
72
- file_path, # file to process
73
- options, # table extraction into HTML format
74
- local, # True if running from CLI, or False if running from FastAPI
75
- debug) # Debug
76
- ```
77
-
78
- Example:
79
-
80
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
81
-
82
- *options* - `['tables', 'markdown']`
83
-
84
- *local* - `True`
85
-
86
- *debug* - `True`
87
-
88
32
  ## Parsing and extraction
89
33
 
90
- ### HTML extractor
34
+ ### Sparrow Parse VL (vision-language model) extractor with Hugging Face GPU infra
91
35
 
92
36
  ```
93
- from sparrow_parse.extractor.html_extractor import HTMLExtractor
37
+ from sparrow_parse.vllm.inference_factory import InferenceFactory
38
+ from sparrow_parse.extractors.vllm_extractor import VLLMExtractor
94
39
 
95
- extractor = HTMLExtractor()
96
-
97
- answer, targets_unprocessed = extractor.read_data(
98
- target_columns, # list of table columns data to fetch
99
- data, # list of HTML tables
100
- column_keywords, # list of valid column names, can be empty. Useful to filter junk content
101
- group_by_rows, # JSON result grouping
102
- update_targets, # Set to true, if page contains multiple tables with the same columns
103
- local, # True if running from CLI, or False if running from FastAPI
104
- debug) # Debug
105
-
106
- ```
107
-
108
- Example:
109
-
110
- *target_columns* - `['description', 'qty', 'net_price', 'net_worth', 'vat', 'gross_worth']`
111
-
112
- *data* - `list of HTML tables`
113
-
114
- *column_keywords* - `None`
115
-
116
- *group_by_rows* - `True`
117
-
118
- *update_targets* - `True`
119
-
120
- *local* - `True`
121
-
122
- *debug* - `True`
123
-
124
- ### Sparrow Parse VL (vision-language) extractor
125
-
126
- ```
127
40
  extractor = VLLMExtractor()
128
41
 
129
42
  # export HF_TOKEN="hf_"
@@ -141,8 +54,8 @@ model_inference_instance = factory.get_inference_instance()
141
54
 
142
55
  input_data = [
143
56
  {
144
- "image": "/Users/andrejb/Documents/work/epik/bankstatement/bonds_table.png",
145
- "text_input": "retrieve financial instruments data. return response in JSON format"
57
+ "image": "/data/bonds_table.png",
58
+ "text_input": "retrieve all data. return response in JSON format"
146
59
  }
147
60
  ]
148
61
 
@@ -151,7 +64,7 @@ result = extractor.run_inference(model_inference_instance, input_data, generic_q
151
64
  print("Inference Result:", result)
152
65
  ```
153
66
 
154
- ## PDF optimization
67
+ ## PDF pre-processing
155
68
 
156
69
  ```
157
70
  from sparrow_parse.extractor.pdf_optimizer import PDFOptimizer
@@ -166,7 +79,7 @@ num_pages, output_files, temp_dir = pdf_optimizer.split_pdf_to_pages(file_path,
166
79
 
167
80
  Example:
168
81
 
169
- *file_path* - `/Users/andrejb/infra/shared/katana-git/sparrow/sparrow-ml/llm/data/invoice_1.pdf`
82
+ *file_path* - `/data/invoice_1.pdf`
170
83
 
171
84
  *output_directory* - set to not `None`, for debug purposes only
172
85
 
@@ -2,7 +2,6 @@ README.md
2
2
  setup.py
3
3
  sparrow_parse/__init__.py
4
4
  sparrow_parse/__main__.py
5
- sparrow_parse/temp.py
6
5
  sparrow_parse.egg-info/PKG-INFO
7
6
  sparrow_parse.egg-info/SOURCES.txt
8
7
  sparrow_parse.egg-info/dependency_links.txt
@@ -10,15 +9,11 @@ sparrow_parse.egg-info/entry_points.txt
10
9
  sparrow_parse.egg-info/requires.txt
11
10
  sparrow_parse.egg-info/top_level.txt
12
11
  sparrow_parse/extractors/__init__.py
13
- sparrow_parse/extractors/html_extractor.py
14
12
  sparrow_parse/extractors/vllm_extractor.py
15
13
  sparrow_parse/helpers/__init__.py
16
- sparrow_parse/helpers/html_extractor_helper.py
17
14
  sparrow_parse/helpers/pdf_optimizer.py
18
15
  sparrow_parse/processors/__init__.py
19
- sparrow_parse/processors/markdown_processor.py
20
16
  sparrow_parse/processors/table_structure_processor.py
21
- sparrow_parse/processors/unstructured_processor.py
22
17
  sparrow_parse/vllm/__init__.py
23
18
  sparrow_parse/vllm/huggingface_inference.py
24
19
  sparrow_parse/vllm/inference_base.py
@@ -1,11 +1,8 @@
1
- torch==2.2.2
2
- unstructured[all-docs]==0.14.5
3
- unstructured-inference==0.7.33
4
1
  rich
5
- pymupdf4llm==0.0.9
6
2
  transformers==4.41.2
7
3
  sentence-transformers==3.0.1
8
4
  numpy==1.26.4
9
5
  pypdf==4.3.0
10
6
  easyocr==1.7.1
11
7
  gradio_client
8
+ pdf2image
@@ -1 +0,0 @@
1
- __version__ = '0.3.4'