sunholo 0.110.4__py3-none-any.whl → 0.112.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/loaders.py +21 -12
- sunholo/database/alloydb_client.py +11 -7
- sunholo-0.112.0.dist-info/METADATA +211 -0
- {sunholo-0.110.4.dist-info → sunholo-0.112.0.dist-info}/RECORD +8 -8
- {sunholo-0.110.4.dist-info → sunholo-0.112.0.dist-info}/WHEEL +1 -1
- sunholo-0.110.4.dist-info/METADATA +0 -209
- {sunholo-0.110.4.dist-info → sunholo-0.112.0.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.110.4.dist-info → sunholo-0.112.0.dist-info}/entry_points.txt +0 -0
- {sunholo-0.110.4.dist-info → sunholo-0.112.0.dist-info}/top_level.txt +0 -0
sunholo/chunker/loaders.py
CHANGED
|
@@ -11,9 +11,10 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
from
|
|
16
|
-
|
|
14
|
+
try:
|
|
15
|
+
from langchain_unstructured import UnstructuredLoader
|
|
16
|
+
except ImportError:
|
|
17
|
+
UnstructuredLoader = None
|
|
17
18
|
|
|
18
19
|
from langchain_community.document_loaders import GitLoader
|
|
19
20
|
from langchain_community.document_loaders import GoogleDriveLoader
|
|
@@ -159,10 +160,12 @@ def read_gdrive_to_document(url: str, metadata: dict = None):
|
|
|
159
160
|
|
|
160
161
|
def read_url_to_document(url: str, metadata: dict = None):
|
|
161
162
|
|
|
163
|
+
if not UnstructuredLoader:
|
|
164
|
+
raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
|
|
162
165
|
unstructured_kwargs = {"pdf_infer_table_structure": True,
|
|
163
166
|
"extract_image_block_types": ["Image", "Table"]
|
|
164
167
|
}
|
|
165
|
-
loader =
|
|
168
|
+
loader = UnstructuredLoader(web_url=url, mode="elements", unstructured_kwargs=unstructured_kwargs)
|
|
166
169
|
docs = loader.load()
|
|
167
170
|
if metadata is not None:
|
|
168
171
|
for doc in docs:
|
|
@@ -170,7 +173,7 @@ def read_url_to_document(url: str, metadata: dict = None):
|
|
|
170
173
|
if not doc.metadata.get("source") and doc.metadata.get("url"):
|
|
171
174
|
doc.metadata["source"] = doc.metadata["url"]
|
|
172
175
|
|
|
173
|
-
log.info(f"
|
|
176
|
+
log.info(f"UnstructuredLoader docs: {docs}")
|
|
174
177
|
|
|
175
178
|
return docs
|
|
176
179
|
|
|
@@ -184,18 +187,21 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
|
|
|
184
187
|
log.info(f"Already uploaded to bucket, skipping {pdf_path}")
|
|
185
188
|
return []
|
|
186
189
|
|
|
187
|
-
log.info(f"Sending {pdf_path} to
|
|
190
|
+
log.info(f"Sending {pdf_path} to UnstructuredLoader")
|
|
188
191
|
UNSTRUCTURED_URL = os.getenv("UNSTRUCTURED_URL")
|
|
189
192
|
unstructured_kwargs = {"pdf_infer_table_structure": True,
|
|
190
193
|
"extract_image_block_types": ["Image", "Table"]
|
|
191
194
|
}
|
|
192
195
|
|
|
196
|
+
if not UnstructuredLoader:
|
|
197
|
+
raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
|
|
198
|
+
|
|
193
199
|
if UNSTRUCTURED_URL:
|
|
194
200
|
log.debug(f"Found UNSTRUCTURED_URL: {UNSTRUCTURED_URL}")
|
|
195
201
|
the_endpoint = f"{UNSTRUCTURED_URL}/general/v0/general"
|
|
196
202
|
try:
|
|
197
|
-
loader =
|
|
198
|
-
pdf_path,
|
|
203
|
+
loader = UnstructuredLoader(
|
|
204
|
+
file_path=pdf_path,
|
|
199
205
|
url=the_endpoint,
|
|
200
206
|
mode="elements",
|
|
201
207
|
**unstructured_kwargs)
|
|
@@ -206,8 +212,8 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
|
|
|
206
212
|
else:
|
|
207
213
|
raise err
|
|
208
214
|
else:
|
|
209
|
-
loader =
|
|
210
|
-
pdf_path,
|
|
215
|
+
loader = UnstructuredLoader(
|
|
216
|
+
file_path=pdf_path,
|
|
211
217
|
api_key=UNSTRUCTURED_KEY,
|
|
212
218
|
mode="elements",
|
|
213
219
|
**unstructured_kwargs)
|
|
@@ -216,7 +222,7 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
|
|
|
216
222
|
try:
|
|
217
223
|
docs = loader.load() # this takes a long time 30m+ for big PDF files
|
|
218
224
|
except ValueError as e:
|
|
219
|
-
log.info(f"Error for {gs_file} from
|
|
225
|
+
log.info(f"Error for {gs_file} from UnstructuredLoader: {str(e)}")
|
|
220
226
|
pdf_path = pathlib.Path(gs_file)
|
|
221
227
|
if pdf_path.suffix == ".pdf":
|
|
222
228
|
local_doc = read_pdf_file(pdf_path, metadata=metadata)
|
|
@@ -262,13 +268,16 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
|
|
|
262
268
|
|
|
263
269
|
def convert_to_txt_and_extract(gs_file, split=False):
|
|
264
270
|
|
|
271
|
+
if not UnstructuredLoader:
|
|
272
|
+
raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
|
|
273
|
+
|
|
265
274
|
log.info("trying file parsing locally via .txt conversion")
|
|
266
275
|
txt_file = None
|
|
267
276
|
docs = []
|
|
268
277
|
try:
|
|
269
278
|
# Convert the file to .txt and try again
|
|
270
279
|
txt_file = convert_to_txt(gs_file)
|
|
271
|
-
loader =
|
|
280
|
+
loader = UnstructuredLoader(
|
|
272
281
|
txt_file,
|
|
273
282
|
mode="elements")
|
|
274
283
|
|
|
@@ -302,7 +302,7 @@ class AlloyDBClient:
|
|
|
302
302
|
SELECT page_content, source, langchain_metadata, images_gsurls, doc_id::text as doc_id
|
|
303
303
|
FROM "{table_name}"
|
|
304
304
|
WHERE source ILIKE '%{source}%'
|
|
305
|
-
LIMIT
|
|
305
|
+
LIMIT 1000;
|
|
306
306
|
"""
|
|
307
307
|
|
|
308
308
|
return query
|
|
@@ -319,7 +319,7 @@ class AlloyDBClient:
|
|
|
319
319
|
SELECT page_content, source, langchain_metadata, images_gsurls, doc_id::text as doc_id
|
|
320
320
|
FROM "{table_name}"
|
|
321
321
|
WHERE doc_id = '{doc_id}'
|
|
322
|
-
LIMIT
|
|
322
|
+
LIMIT 500;
|
|
323
323
|
"""
|
|
324
324
|
|
|
325
325
|
return query
|
|
@@ -362,11 +362,16 @@ class AlloyDBClient:
|
|
|
362
362
|
conditions = self._and_or_ilike(sources, search_type=search_type)
|
|
363
363
|
|
|
364
364
|
query = f"""
|
|
365
|
-
|
|
365
|
+
WITH ranked_sources AS (
|
|
366
|
+
SELECT *,
|
|
367
|
+
ROW_NUMBER() OVER (PARTITION BY source ORDER BY doc_id) as chunk_num
|
|
366
368
|
FROM {table_name}
|
|
367
369
|
WHERE {conditions}
|
|
368
|
-
|
|
369
|
-
|
|
370
|
+
)
|
|
371
|
+
SELECT *
|
|
372
|
+
FROM ranked_sources
|
|
373
|
+
ORDER BY source ASC, chunk_num ASC
|
|
374
|
+
LIMIT 1000;
|
|
370
375
|
"""
|
|
371
376
|
|
|
372
377
|
return query
|
|
@@ -378,10 +383,9 @@ class AlloyDBClient:
|
|
|
378
383
|
if sources:
|
|
379
384
|
conditions = self._and_or_ilike(sources, search_type=search_type)
|
|
380
385
|
query = f"""
|
|
381
|
-
SELECT source AS objectId
|
|
386
|
+
SELECT DISTINCT source AS objectId
|
|
382
387
|
FROM {table_name}
|
|
383
388
|
WHERE {conditions}
|
|
384
|
-
GROUP BY source
|
|
385
389
|
ORDER BY source ASC
|
|
386
390
|
LIMIT 500;
|
|
387
391
|
"""
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: sunholo
|
|
3
|
+
Version: 0.112.0
|
|
4
|
+
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
|
+
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.112.0.tar.gz
|
|
7
|
+
Author: Holosun ApS
|
|
8
|
+
Author-email: multivac@sunholo.com
|
|
9
|
+
License: Apache License, Version 2.0
|
|
10
|
+
Keywords: llms,devops,google_cloud_platform
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE.txt
|
|
21
|
+
Requires-Dist: google-auth
|
|
22
|
+
Requires-Dist: ruamel.yaml
|
|
23
|
+
Requires-Dist: langchain==0.2.16
|
|
24
|
+
Requires-Dist: langchain_experimental==0.0.65
|
|
25
|
+
Requires-Dist: langchain-community==0.2.17
|
|
26
|
+
Requires-Dist: langsmith==0.1.143
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Requires-Dist: anthropic[vertex]; extra == "all"
|
|
29
|
+
Requires-Dist: asyncpg; extra == "all"
|
|
30
|
+
Requires-Dist: azure-identity; extra == "all"
|
|
31
|
+
Requires-Dist: azure-storage-blob; extra == "all"
|
|
32
|
+
Requires-Dist: fastapi; extra == "all"
|
|
33
|
+
Requires-Dist: flask; extra == "all"
|
|
34
|
+
Requires-Dist: google-auth; extra == "all"
|
|
35
|
+
Requires-Dist: google-auth-httplib2; extra == "all"
|
|
36
|
+
Requires-Dist: google-auth-oauthlib; extra == "all"
|
|
37
|
+
Requires-Dist: google-cloud-aiplatform>=1.58.0; extra == "all"
|
|
38
|
+
Requires-Dist: google-api-python-client; extra == "all"
|
|
39
|
+
Requires-Dist: google-cloud-alloydb-connector[pg8000]; extra == "all"
|
|
40
|
+
Requires-Dist: google-cloud-bigquery; extra == "all"
|
|
41
|
+
Requires-Dist: google-cloud-build; extra == "all"
|
|
42
|
+
Requires-Dist: google-cloud-service-control; extra == "all"
|
|
43
|
+
Requires-Dist: google-cloud-logging; extra == "all"
|
|
44
|
+
Requires-Dist: google-cloud-storage; extra == "all"
|
|
45
|
+
Requires-Dist: google-cloud-pubsub; extra == "all"
|
|
46
|
+
Requires-Dist: google-cloud-discoveryengine; extra == "all"
|
|
47
|
+
Requires-Dist: google-cloud-texttospeech; extra == "all"
|
|
48
|
+
Requires-Dist: google-generativeai>=0.7.1; extra == "all"
|
|
49
|
+
Requires-Dist: gunicorn; extra == "all"
|
|
50
|
+
Requires-Dist: httpcore; extra == "all"
|
|
51
|
+
Requires-Dist: httpx; extra == "all"
|
|
52
|
+
Requires-Dist: jsonschema; extra == "all"
|
|
53
|
+
Requires-Dist: lancedb; extra == "all"
|
|
54
|
+
Requires-Dist: langchain>=0.2.16; extra == "all"
|
|
55
|
+
Requires-Dist: langchain-experimental>=0.0.61; extra == "all"
|
|
56
|
+
Requires-Dist: langchain-community>=0.2.11; extra == "all"
|
|
57
|
+
Requires-Dist: langchain-openai==0.1.25; extra == "all"
|
|
58
|
+
Requires-Dist: langchain-google-genai==1.0.10; extra == "all"
|
|
59
|
+
Requires-Dist: langchain_google_alloydb_pg; extra == "all"
|
|
60
|
+
Requires-Dist: langchain-anthropic==0.1.23; extra == "all"
|
|
61
|
+
Requires-Dist: langchain-google-vertexai; extra == "all"
|
|
62
|
+
Requires-Dist: langchain-unstructured; extra == "all"
|
|
63
|
+
Requires-Dist: langfuse; extra == "all"
|
|
64
|
+
Requires-Dist: numpy; extra == "all"
|
|
65
|
+
Requires-Dist: pg8000; extra == "all"
|
|
66
|
+
Requires-Dist: pgvector; extra == "all"
|
|
67
|
+
Requires-Dist: pillow; extra == "all"
|
|
68
|
+
Requires-Dist: playwright; extra == "all"
|
|
69
|
+
Requires-Dist: psutil; extra == "all"
|
|
70
|
+
Requires-Dist: psycopg2-binary; extra == "all"
|
|
71
|
+
Requires-Dist: pypdf; extra == "all"
|
|
72
|
+
Requires-Dist: python-hcl2; extra == "all"
|
|
73
|
+
Requires-Dist: python-socketio; extra == "all"
|
|
74
|
+
Requires-Dist: pytesseract; extra == "all"
|
|
75
|
+
Requires-Dist: rich; extra == "all"
|
|
76
|
+
Requires-Dist: sounddevice; extra == "all"
|
|
77
|
+
Requires-Dist: supabase; extra == "all"
|
|
78
|
+
Requires-Dist: tabulate; extra == "all"
|
|
79
|
+
Requires-Dist: tantivy; extra == "all"
|
|
80
|
+
Requires-Dist: tenacity; extra == "all"
|
|
81
|
+
Requires-Dist: tiktoken; extra == "all"
|
|
82
|
+
Requires-Dist: unstructured[all-docs,local-inference]; extra == "all"
|
|
83
|
+
Requires-Dist: xlwings; extra == "all"
|
|
84
|
+
Provides-Extra: azure
|
|
85
|
+
Requires-Dist: azure-identity; extra == "azure"
|
|
86
|
+
Requires-Dist: azure-storage-blob; extra == "azure"
|
|
87
|
+
Provides-Extra: cli
|
|
88
|
+
Requires-Dist: jsonschema>=4.21.1; extra == "cli"
|
|
89
|
+
Requires-Dist: rich; extra == "cli"
|
|
90
|
+
Provides-Extra: database
|
|
91
|
+
Requires-Dist: asyncpg; extra == "database"
|
|
92
|
+
Requires-Dist: supabase; extra == "database"
|
|
93
|
+
Requires-Dist: sqlalchemy; extra == "database"
|
|
94
|
+
Requires-Dist: pg8000; extra == "database"
|
|
95
|
+
Requires-Dist: pgvector; extra == "database"
|
|
96
|
+
Requires-Dist: psycopg2-binary; extra == "database"
|
|
97
|
+
Requires-Dist: lancedb; extra == "database"
|
|
98
|
+
Requires-Dist: tantivy; extra == "database"
|
|
99
|
+
Provides-Extra: pipeline
|
|
100
|
+
Requires-Dist: GitPython; extra == "pipeline"
|
|
101
|
+
Requires-Dist: lark; extra == "pipeline"
|
|
102
|
+
Requires-Dist: langchain-unstructured; extra == "pipeline"
|
|
103
|
+
Requires-Dist: psutil; extra == "pipeline"
|
|
104
|
+
Requires-Dist: pypdf; extra == "pipeline"
|
|
105
|
+
Requires-Dist: pytesseract; extra == "pipeline"
|
|
106
|
+
Requires-Dist: tabulate; extra == "pipeline"
|
|
107
|
+
Requires-Dist: unstructured[all-docs,local-inference]; extra == "pipeline"
|
|
108
|
+
Provides-Extra: gcp
|
|
109
|
+
Requires-Dist: anthropic[vertex]; extra == "gcp"
|
|
110
|
+
Requires-Dist: google-api-python-client; extra == "gcp"
|
|
111
|
+
Requires-Dist: google-cloud-alloydb-connector[pg8000]; extra == "gcp"
|
|
112
|
+
Requires-Dist: google-auth-httplib2; extra == "gcp"
|
|
113
|
+
Requires-Dist: google-auth-oauthlib; extra == "gcp"
|
|
114
|
+
Requires-Dist: google-cloud-aiplatform>=1.58.0; extra == "gcp"
|
|
115
|
+
Requires-Dist: google-cloud-bigquery; extra == "gcp"
|
|
116
|
+
Requires-Dist: google-cloud-build; extra == "gcp"
|
|
117
|
+
Requires-Dist: google-cloud-service-control; extra == "gcp"
|
|
118
|
+
Requires-Dist: google-cloud-storage; extra == "gcp"
|
|
119
|
+
Requires-Dist: google-cloud-logging; extra == "gcp"
|
|
120
|
+
Requires-Dist: google-cloud-pubsub; extra == "gcp"
|
|
121
|
+
Requires-Dist: google-cloud-discoveryengine; extra == "gcp"
|
|
122
|
+
Requires-Dist: google-cloud-texttospeech; extra == "gcp"
|
|
123
|
+
Requires-Dist: google-generativeai>=0.7.1; extra == "gcp"
|
|
124
|
+
Requires-Dist: langchain-google-genai==1.0.10; extra == "gcp"
|
|
125
|
+
Requires-Dist: langchain_google_alloydb_pg>=0.2.2; extra == "gcp"
|
|
126
|
+
Requires-Dist: langchain-google-vertexai; extra == "gcp"
|
|
127
|
+
Requires-Dist: pillow; extra == "gcp"
|
|
128
|
+
Provides-Extra: openai
|
|
129
|
+
Requires-Dist: langchain-openai==0.1.25; extra == "openai"
|
|
130
|
+
Requires-Dist: tiktoken; extra == "openai"
|
|
131
|
+
Provides-Extra: anthropic
|
|
132
|
+
Requires-Dist: langchain-anthropic==0.1.23; extra == "anthropic"
|
|
133
|
+
Provides-Extra: tools
|
|
134
|
+
Requires-Dist: openapi-spec-validator; extra == "tools"
|
|
135
|
+
Requires-Dist: playwright; extra == "tools"
|
|
136
|
+
Provides-Extra: http
|
|
137
|
+
Requires-Dist: fastapi; extra == "http"
|
|
138
|
+
Requires-Dist: flask; extra == "http"
|
|
139
|
+
Requires-Dist: gunicorn; extra == "http"
|
|
140
|
+
Requires-Dist: httpcore; extra == "http"
|
|
141
|
+
Requires-Dist: httpx; extra == "http"
|
|
142
|
+
Requires-Dist: langfuse; extra == "http"
|
|
143
|
+
Requires-Dist: python-socketio; extra == "http"
|
|
144
|
+
Requires-Dist: requests; extra == "http"
|
|
145
|
+
Requires-Dist: tenacity; extra == "http"
|
|
146
|
+
Provides-Extra: excel
|
|
147
|
+
Requires-Dist: xlwings; extra == "excel"
|
|
148
|
+
Requires-Dist: requests; extra == "excel"
|
|
149
|
+
Requires-Dist: rich; extra == "excel"
|
|
150
|
+
Provides-Extra: iac
|
|
151
|
+
Requires-Dist: python-hcl2; extra == "iac"
|
|
152
|
+
Provides-Extra: tts
|
|
153
|
+
Requires-Dist: google-cloud-texttospeech; extra == "tts"
|
|
154
|
+
Requires-Dist: numpy; extra == "tts"
|
|
155
|
+
Requires-Dist: sounddevice; extra == "tts"
|
|
156
|
+
|
|
157
|
+
## Introduction
|
|
158
|
+
This is the Sunholo Python project, a comprehensive toolkit for working with language models and vector stores on Google Cloud Platform. It provides a wide range of functionalities and utilities to facilitate the development and deployment of language model applications.
|
|
159
|
+
|
|
160
|
+
Please refer to the website for full documentation at https://dev.sunholo.com/
|
|
161
|
+
|
|
162
|
+
## Listen to the audio file:
|
|
163
|
+
|
|
164
|
+
A [NotebookLM](https://notebooklm.google/) generated podcast of the codebase that may help give you an overview of what the library is capable of:
|
|
165
|
+
|
|
166
|
+
[Listen to the audio file from Google Drive](https://drive.google.com/file/d/1GvwRmiYDjPjN2hXQ8plhnVDByu6TmgCQ/view?usp=drive_link) or on the website at https://dev.sunholo.com/docs/
|
|
167
|
+
|
|
168
|
+
> "Ever wish you could build your own AI?..."
|
|
169
|
+
|
|
170
|
+
## Tests via pytest
|
|
171
|
+
|
|
172
|
+
If loading from GitHub, run tests:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
pip install pytest
|
|
176
|
+
pip install . --use-feature=in-tree-build
|
|
177
|
+
pytest tests
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Demos
|
|
181
|
+
|
|
182
|
+
Using https://github.com/charmbracelet/vhs
|
|
183
|
+
|
|
184
|
+
```sh
|
|
185
|
+
vhs record > cassette.tape
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Then make gif:
|
|
189
|
+
|
|
190
|
+
```sh
|
|
191
|
+
vhs docs/tapes/config-list.tape
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
Copyright [2024] [Holosun ApS]
|
|
198
|
+
|
|
199
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
200
|
+
you may not use this file except in compliance with the License.
|
|
201
|
+
You may obtain a copy of the License at
|
|
202
|
+
|
|
203
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
204
|
+
|
|
205
|
+
Unless required by applicable law or agreed to in writing, software
|
|
206
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
207
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
208
|
+
See the License for the specific language governing permissions and
|
|
209
|
+
limitations under the License.
|
|
210
|
+
```
|
|
211
|
+
|
|
@@ -34,7 +34,7 @@ sunholo/chunker/azure.py,sha256=MVF9_-QdKUoJqlpEJ49pv2sdjMDxEiMNxzmO7w5nWDQ,3270
|
|
|
34
34
|
sunholo/chunker/doc_handling.py,sha256=UAf9BmUMpKCKRlAMl1qNZK6xDNYWk1z3ARoftWoa_54,8734
|
|
35
35
|
sunholo/chunker/encode_metadata.py,sha256=hxxd9KU35Xi0Z_EL8kt_oD66pKfBLhEjBImC16ew-Eo,1919
|
|
36
36
|
sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,1868
|
|
37
|
-
sunholo/chunker/loaders.py,sha256=
|
|
37
|
+
sunholo/chunker/loaders.py,sha256=QaM-M1wmbA2iLIDvBKpC5-TPKMlQIxS01gMKj5n9RyM,10547
|
|
38
38
|
sunholo/chunker/message_data.py,sha256=EaiY7_HClpcfPUAYaAm6Zk5ReeZ9s9F_jBVd0kDgI-4,10836
|
|
39
39
|
sunholo/chunker/pdfs.py,sha256=njDPop751GMHi3cOwIKd2Yct-_lWR2gqcB7WykfHphs,2480
|
|
40
40
|
sunholo/chunker/process_chunker_data.py,sha256=uO-YOEHIjAOy0ZMJ0vea9OMNsQBISHfhbtgoyuHiP6s,3598
|
|
@@ -59,7 +59,7 @@ sunholo/components/retriever.py,sha256=bKIVT7_18Ut3OJd0E0jyiISPnD9qkHWVjcQPT4i1_
|
|
|
59
59
|
sunholo/components/vectorstore.py,sha256=xKk7micTRwZckaI7U6PxvFz_ZSjCH48xPTDYiDcv2tc,5913
|
|
60
60
|
sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
|
|
61
61
|
sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
|
|
62
|
-
sunholo/database/alloydb_client.py,sha256=
|
|
62
|
+
sunholo/database/alloydb_client.py,sha256=q732tmRdSDutnUk7vRUPUPpi-yU5FK5rQko8co6yke0,19132
|
|
63
63
|
sunholo/database/database.py,sha256=VqhZdkXUNdvWn8sUcUV3YNby1JDVf7IykPVXWBtxo9U,7361
|
|
64
64
|
sunholo/database/lancedb.py,sha256=DyfZntiFKBlVPaFooNN1Z6Pl-LAs4nxWKKuq8GBqN58,715
|
|
65
65
|
sunholo/database/static_dbs.py,sha256=8cvcMwUK6c32AS2e_WguKXWMkFf5iN3g9WHzsh0C07Q,442
|
|
@@ -150,9 +150,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
150
150
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
151
151
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
152
152
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
153
|
-
sunholo-0.
|
|
154
|
-
sunholo-0.
|
|
155
|
-
sunholo-0.
|
|
156
|
-
sunholo-0.
|
|
157
|
-
sunholo-0.
|
|
158
|
-
sunholo-0.
|
|
153
|
+
sunholo-0.112.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
154
|
+
sunholo-0.112.0.dist-info/METADATA,sha256=kiphk-fAQurwpMyZVdgAqVdhS4yUr8AJPLjUAZze2_I,8685
|
|
155
|
+
sunholo-0.112.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
156
|
+
sunholo-0.112.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
157
|
+
sunholo-0.112.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
158
|
+
sunholo-0.112.0.dist-info/RECORD,,
|
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: sunholo
|
|
3
|
-
Version: 0.110.4
|
|
4
|
-
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
|
-
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.110.4.tar.gz
|
|
7
|
-
Author: Holosun ApS
|
|
8
|
-
Author-email: multivac@sunholo.com
|
|
9
|
-
License: Apache License, Version 2.0
|
|
10
|
-
Keywords: llms,devops,google_cloud_platform
|
|
11
|
-
Classifier: Development Status :: 3 - Alpha
|
|
12
|
-
Classifier: Intended Audience :: Developers
|
|
13
|
-
Classifier: Topic :: Software Development :: Build Tools
|
|
14
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
-
Description-Content-Type: text/markdown
|
|
20
|
-
License-File: LICENSE.txt
|
|
21
|
-
Requires-Dist: google-auth
|
|
22
|
-
Requires-Dist: ruamel.yaml
|
|
23
|
-
Requires-Dist: langchain ==0.2.16
|
|
24
|
-
Requires-Dist: langchain-experimental ==0.0.65
|
|
25
|
-
Requires-Dist: langchain-community ==0.2.17
|
|
26
|
-
Requires-Dist: langsmith ==0.1.143
|
|
27
|
-
Provides-Extra: all
|
|
28
|
-
Requires-Dist: anthropic[vertex] ; extra == 'all'
|
|
29
|
-
Requires-Dist: asyncpg ; extra == 'all'
|
|
30
|
-
Requires-Dist: azure-identity ; extra == 'all'
|
|
31
|
-
Requires-Dist: azure-storage-blob ; extra == 'all'
|
|
32
|
-
Requires-Dist: fastapi ; extra == 'all'
|
|
33
|
-
Requires-Dist: flask ; extra == 'all'
|
|
34
|
-
Requires-Dist: google-auth ; extra == 'all'
|
|
35
|
-
Requires-Dist: google-auth-httplib2 ; extra == 'all'
|
|
36
|
-
Requires-Dist: google-auth-oauthlib ; extra == 'all'
|
|
37
|
-
Requires-Dist: google-cloud-aiplatform >=1.58.0 ; extra == 'all'
|
|
38
|
-
Requires-Dist: google-api-python-client ; extra == 'all'
|
|
39
|
-
Requires-Dist: google-cloud-alloydb-connector[pg8000] ; extra == 'all'
|
|
40
|
-
Requires-Dist: google-cloud-bigquery ; extra == 'all'
|
|
41
|
-
Requires-Dist: google-cloud-build ; extra == 'all'
|
|
42
|
-
Requires-Dist: google-cloud-service-control ; extra == 'all'
|
|
43
|
-
Requires-Dist: google-cloud-logging ; extra == 'all'
|
|
44
|
-
Requires-Dist: google-cloud-storage ; extra == 'all'
|
|
45
|
-
Requires-Dist: google-cloud-pubsub ; extra == 'all'
|
|
46
|
-
Requires-Dist: google-cloud-discoveryengine ; extra == 'all'
|
|
47
|
-
Requires-Dist: google-cloud-texttospeech ; extra == 'all'
|
|
48
|
-
Requires-Dist: google-generativeai >=0.7.1 ; extra == 'all'
|
|
49
|
-
Requires-Dist: gunicorn ; extra == 'all'
|
|
50
|
-
Requires-Dist: httpcore ; extra == 'all'
|
|
51
|
-
Requires-Dist: httpx ; extra == 'all'
|
|
52
|
-
Requires-Dist: jsonschema ; extra == 'all'
|
|
53
|
-
Requires-Dist: lancedb ; extra == 'all'
|
|
54
|
-
Requires-Dist: langchain >=0.2.16 ; extra == 'all'
|
|
55
|
-
Requires-Dist: langchain-experimental >=0.0.61 ; extra == 'all'
|
|
56
|
-
Requires-Dist: langchain-community >=0.2.11 ; extra == 'all'
|
|
57
|
-
Requires-Dist: langchain-openai ==0.1.25 ; extra == 'all'
|
|
58
|
-
Requires-Dist: langchain-google-genai ==1.0.10 ; extra == 'all'
|
|
59
|
-
Requires-Dist: langchain-google-alloydb-pg ; extra == 'all'
|
|
60
|
-
Requires-Dist: langchain-anthropic ==0.1.23 ; extra == 'all'
|
|
61
|
-
Requires-Dist: langchain-google-vertexai ; extra == 'all'
|
|
62
|
-
Requires-Dist: langfuse ; extra == 'all'
|
|
63
|
-
Requires-Dist: numpy ; extra == 'all'
|
|
64
|
-
Requires-Dist: pg8000 ; extra == 'all'
|
|
65
|
-
Requires-Dist: pgvector ; extra == 'all'
|
|
66
|
-
Requires-Dist: pillow ; extra == 'all'
|
|
67
|
-
Requires-Dist: playwright ; extra == 'all'
|
|
68
|
-
Requires-Dist: psutil ; extra == 'all'
|
|
69
|
-
Requires-Dist: psycopg2-binary ; extra == 'all'
|
|
70
|
-
Requires-Dist: pypdf ; extra == 'all'
|
|
71
|
-
Requires-Dist: python-hcl2 ; extra == 'all'
|
|
72
|
-
Requires-Dist: python-socketio ; extra == 'all'
|
|
73
|
-
Requires-Dist: pytesseract ; extra == 'all'
|
|
74
|
-
Requires-Dist: rich ; extra == 'all'
|
|
75
|
-
Requires-Dist: sounddevice ; extra == 'all'
|
|
76
|
-
Requires-Dist: supabase ; extra == 'all'
|
|
77
|
-
Requires-Dist: tabulate ; extra == 'all'
|
|
78
|
-
Requires-Dist: tantivy ; extra == 'all'
|
|
79
|
-
Requires-Dist: tenacity ; extra == 'all'
|
|
80
|
-
Requires-Dist: tiktoken ; extra == 'all'
|
|
81
|
-
Requires-Dist: unstructured[local-inference] ==0.14.9 ; extra == 'all'
|
|
82
|
-
Requires-Dist: xlwings ; extra == 'all'
|
|
83
|
-
Provides-Extra: anthropic
|
|
84
|
-
Requires-Dist: langchain-anthropic ==0.1.23 ; extra == 'anthropic'
|
|
85
|
-
Provides-Extra: azure
|
|
86
|
-
Requires-Dist: azure-identity ; extra == 'azure'
|
|
87
|
-
Requires-Dist: azure-storage-blob ; extra == 'azure'
|
|
88
|
-
Provides-Extra: cli
|
|
89
|
-
Requires-Dist: jsonschema >=4.21.1 ; extra == 'cli'
|
|
90
|
-
Requires-Dist: rich ; extra == 'cli'
|
|
91
|
-
Provides-Extra: database
|
|
92
|
-
Requires-Dist: asyncpg ; extra == 'database'
|
|
93
|
-
Requires-Dist: supabase ; extra == 'database'
|
|
94
|
-
Requires-Dist: sqlalchemy ; extra == 'database'
|
|
95
|
-
Requires-Dist: pg8000 ; extra == 'database'
|
|
96
|
-
Requires-Dist: pgvector ; extra == 'database'
|
|
97
|
-
Requires-Dist: psycopg2-binary ; extra == 'database'
|
|
98
|
-
Requires-Dist: lancedb ; extra == 'database'
|
|
99
|
-
Requires-Dist: tantivy ; extra == 'database'
|
|
100
|
-
Provides-Extra: excel
|
|
101
|
-
Requires-Dist: xlwings ; extra == 'excel'
|
|
102
|
-
Requires-Dist: requests ; extra == 'excel'
|
|
103
|
-
Requires-Dist: rich ; extra == 'excel'
|
|
104
|
-
Provides-Extra: gcp
|
|
105
|
-
Requires-Dist: anthropic[vertex] ; extra == 'gcp'
|
|
106
|
-
Requires-Dist: google-api-python-client ; extra == 'gcp'
|
|
107
|
-
Requires-Dist: google-cloud-alloydb-connector[pg8000] ; extra == 'gcp'
|
|
108
|
-
Requires-Dist: google-auth-httplib2 ; extra == 'gcp'
|
|
109
|
-
Requires-Dist: google-auth-oauthlib ; extra == 'gcp'
|
|
110
|
-
Requires-Dist: google-cloud-aiplatform >=1.58.0 ; extra == 'gcp'
|
|
111
|
-
Requires-Dist: google-cloud-bigquery ; extra == 'gcp'
|
|
112
|
-
Requires-Dist: google-cloud-build ; extra == 'gcp'
|
|
113
|
-
Requires-Dist: google-cloud-service-control ; extra == 'gcp'
|
|
114
|
-
Requires-Dist: google-cloud-storage ; extra == 'gcp'
|
|
115
|
-
Requires-Dist: google-cloud-logging ; extra == 'gcp'
|
|
116
|
-
Requires-Dist: google-cloud-pubsub ; extra == 'gcp'
|
|
117
|
-
Requires-Dist: google-cloud-discoveryengine ; extra == 'gcp'
|
|
118
|
-
Requires-Dist: google-cloud-texttospeech ; extra == 'gcp'
|
|
119
|
-
Requires-Dist: google-generativeai >=0.7.1 ; extra == 'gcp'
|
|
120
|
-
Requires-Dist: langchain-google-genai ==1.0.10 ; extra == 'gcp'
|
|
121
|
-
Requires-Dist: langchain-google-alloydb-pg >=0.2.2 ; extra == 'gcp'
|
|
122
|
-
Requires-Dist: langchain-google-vertexai ; extra == 'gcp'
|
|
123
|
-
Requires-Dist: pillow ; extra == 'gcp'
|
|
124
|
-
Provides-Extra: http
|
|
125
|
-
Requires-Dist: fastapi ; extra == 'http'
|
|
126
|
-
Requires-Dist: flask ; extra == 'http'
|
|
127
|
-
Requires-Dist: gunicorn ; extra == 'http'
|
|
128
|
-
Requires-Dist: httpcore ; extra == 'http'
|
|
129
|
-
Requires-Dist: httpx ; extra == 'http'
|
|
130
|
-
Requires-Dist: langfuse ; extra == 'http'
|
|
131
|
-
Requires-Dist: python-socketio ; extra == 'http'
|
|
132
|
-
Requires-Dist: requests ; extra == 'http'
|
|
133
|
-
Requires-Dist: tenacity ; extra == 'http'
|
|
134
|
-
Provides-Extra: iac
|
|
135
|
-
Requires-Dist: python-hcl2 ; extra == 'iac'
|
|
136
|
-
Provides-Extra: openai
|
|
137
|
-
Requires-Dist: langchain-openai ==0.1.25 ; extra == 'openai'
|
|
138
|
-
Requires-Dist: tiktoken ; extra == 'openai'
|
|
139
|
-
Provides-Extra: pipeline
|
|
140
|
-
Requires-Dist: GitPython ; extra == 'pipeline'
|
|
141
|
-
Requires-Dist: lark ; extra == 'pipeline'
|
|
142
|
-
Requires-Dist: psutil ; extra == 'pipeline'
|
|
143
|
-
Requires-Dist: pypdf ; extra == 'pipeline'
|
|
144
|
-
Requires-Dist: pytesseract ; extra == 'pipeline'
|
|
145
|
-
Requires-Dist: tabulate ; extra == 'pipeline'
|
|
146
|
-
Requires-Dist: unstructured[local-inference] ==0.14.9 ; extra == 'pipeline'
|
|
147
|
-
Provides-Extra: tools
|
|
148
|
-
Requires-Dist: openapi-spec-validator ; extra == 'tools'
|
|
149
|
-
Requires-Dist: playwright ; extra == 'tools'
|
|
150
|
-
Provides-Extra: tts
|
|
151
|
-
Requires-Dist: google-cloud-texttospeech ; extra == 'tts'
|
|
152
|
-
Requires-Dist: numpy ; extra == 'tts'
|
|
153
|
-
Requires-Dist: sounddevice ; extra == 'tts'
|
|
154
|
-
|
|
155
|
-
## Introduction
|
|
156
|
-
This is the Sunholo Python project, a comprehensive toolkit for working with language models and vector stores on Google Cloud Platform. It provides a wide range of functionalities and utilities to facilitate the development and deployment of language model applications.
|
|
157
|
-
|
|
158
|
-
Please refer to the website for full documentation at https://dev.sunholo.com/
|
|
159
|
-
|
|
160
|
-
## Listen to the audio file:
|
|
161
|
-
|
|
162
|
-
A [NotebookLM](https://notebooklm.google/) generated podcast of the codebase that may help give you an overview of what the library is capable of:
|
|
163
|
-
|
|
164
|
-
[Listen to the audio file from Google Drive](https://drive.google.com/file/d/1GvwRmiYDjPjN2hXQ8plhnVDByu6TmgCQ/view?usp=drive_link) or on the website at https://dev.sunholo.com/docs/
|
|
165
|
-
|
|
166
|
-
> "Ever wish you could build your own AI?..."
|
|
167
|
-
|
|
168
|
-
## Tests via pytest
|
|
169
|
-
|
|
170
|
-
If loading from GitHub, run tests:
|
|
171
|
-
|
|
172
|
-
```bash
|
|
173
|
-
pip install pytest
|
|
174
|
-
pip install . --use-feature=in-tree-build
|
|
175
|
-
pytest tests
|
|
176
|
-
```
|
|
177
|
-
|
|
178
|
-
## Demos
|
|
179
|
-
|
|
180
|
-
Using https://github.com/charmbracelet/vhs
|
|
181
|
-
|
|
182
|
-
```sh
|
|
183
|
-
vhs record > cassette.tape
|
|
184
|
-
```
|
|
185
|
-
|
|
186
|
-
Then make gif:
|
|
187
|
-
|
|
188
|
-
```sh
|
|
189
|
-
vhs docs/tapes/config-list.tape
|
|
190
|
-
```
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
```
|
|
195
|
-
Copyright [2024] [Holosun ApS]
|
|
196
|
-
|
|
197
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
|
198
|
-
you may not use this file except in compliance with the License.
|
|
199
|
-
You may obtain a copy of the License at
|
|
200
|
-
|
|
201
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
|
202
|
-
|
|
203
|
-
Unless required by applicable law or agreed to in writing, software
|
|
204
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
|
205
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
206
|
-
See the License for the specific language governing permissions and
|
|
207
|
-
limitations under the License.
|
|
208
|
-
```
|
|
209
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|