sunholo 0.109.1__py3-none-any.whl → 0.109.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/genai/file_handling.py +48 -12
- sunholo/senses/stream_voice.py +2 -0
- {sunholo-0.109.1.dist-info → sunholo-0.109.4.dist-info}/METADATA +6 -5
- {sunholo-0.109.1.dist-info → sunholo-0.109.4.dist-info}/RECORD +8 -8
- {sunholo-0.109.1.dist-info → sunholo-0.109.4.dist-info}/WHEEL +1 -1
- {sunholo-0.109.1.dist-info → sunholo-0.109.4.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.109.1.dist-info → sunholo-0.109.4.dist-info}/entry_points.txt +0 -0
- {sunholo-0.109.1.dist-info → sunholo-0.109.4.dist-info}/top_level.txt +0 -0
sunholo/genai/file_handling.py
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
from ..custom_logging import log
|
|
2
2
|
from ..gcs import get_bytes_from_gcs
|
|
3
3
|
|
|
4
|
+
from functools import partial
|
|
4
5
|
import mimetypes
|
|
5
6
|
import asyncio
|
|
6
7
|
import tempfile
|
|
7
8
|
import re
|
|
9
|
+
import os
|
|
8
10
|
import traceback
|
|
9
11
|
try:
|
|
10
12
|
import google.generativeai as genai
|
|
11
|
-
from google.generativeai.types import file_types
|
|
12
13
|
except ImportError:
|
|
13
14
|
genai = None
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
DOCUMENT_MIMES = [
|
|
17
17
|
'application/pdf',
|
|
18
18
|
'application/x-javascript',
|
|
@@ -72,12 +72,25 @@ ALLOWED_MIME_TYPES = set(AUDIO_MIMES + VIDEO_MIMES + IMAGE_MIMES + DOCUMENT_MIME
|
|
|
72
72
|
# 'url': 'https://firebasestorage.googleapis.com/v0/b/multiv...',
|
|
73
73
|
# 'contentType': 'image/jpeg'}
|
|
74
74
|
# ]
|
|
75
|
+
|
|
76
|
+
def sanitize_file(filename):
|
|
77
|
+
# Split the filename into name and extension
|
|
78
|
+
name, extension = os.path.splitext(filename)
|
|
79
|
+
|
|
80
|
+
# Sanitize the name by removing invalid characters and converting to lowercase
|
|
81
|
+
sanitized_name = re.sub(r'[^a-z0-9-]', '', name.lower())
|
|
82
|
+
sanitized_name = re.sub(r'^-+|-+$', '', sanitized_name) # Remove leading or trailing dashes
|
|
83
|
+
|
|
84
|
+
# Reattach the original extension
|
|
85
|
+
return f"{sanitized_name}"
|
|
86
|
+
|
|
75
87
|
async def construct_file_content(gs_list, bucket:str):
|
|
76
88
|
"""
|
|
77
89
|
Args:
|
|
78
90
|
- gs_list: a list of dicts representing files in a bucket
|
|
79
91
|
- contentType: The content type of the file on GCS
|
|
80
92
|
- storagePath: The path in the bucket
|
|
93
|
+
- name: The name of the file
|
|
81
94
|
- bucket: The bucket the files are in
|
|
82
95
|
|
|
83
96
|
"""
|
|
@@ -102,32 +115,50 @@ async def construct_file_content(gs_list, bucket:str):
|
|
|
102
115
|
tasks = []
|
|
103
116
|
for file_info in file_list:
|
|
104
117
|
img_url = f"gs://{bucket}/{file_info['storagePath']}"
|
|
118
|
+
display_url = file_info.get('url')
|
|
105
119
|
mime_type = file_info['contentType']
|
|
106
|
-
|
|
107
|
-
|
|
120
|
+
name = sanitize_file(file_info['name'])
|
|
121
|
+
log.info(f"Processing {name=}")
|
|
122
|
+
try:
|
|
123
|
+
myfile = genai.get_file(name)
|
|
124
|
+
content.append(
|
|
125
|
+
{"role": "user", "parts": [
|
|
126
|
+
{"file_data": myfile},
|
|
127
|
+
{"text": f"You have been given the ability to work with file {name=} with {mime_type=} {display_url=}"}
|
|
128
|
+
]
|
|
129
|
+
})
|
|
130
|
+
log.info(f"Found existing genai.get_file {name=}")
|
|
131
|
+
|
|
132
|
+
except Exception as e:
|
|
133
|
+
log.info(f"Not found checking genai.get_file: '{name}' {str(e)}")
|
|
134
|
+
tasks.append(download_gcs_upload_genai(img_url, mime_type, name=name))
|
|
108
135
|
|
|
109
136
|
# Run all tasks in parallel
|
|
110
|
-
|
|
137
|
+
if tasks:
|
|
138
|
+
task_content = await asyncio.gather(*tasks)
|
|
139
|
+
content.extend(task_content)
|
|
111
140
|
|
|
112
141
|
return content
|
|
113
142
|
|
|
114
143
|
# Helper function to handle each file download with error handling
|
|
115
|
-
async def download_file_with_error_handling(img_url, mime_type):
|
|
144
|
+
async def download_file_with_error_handling(img_url, mime_type, name):
|
|
116
145
|
try:
|
|
117
|
-
return await download_gcs_upload_genai(img_url, mime_type)
|
|
146
|
+
return await download_gcs_upload_genai(img_url, mime_type, name)
|
|
118
147
|
except Exception as err:
|
|
119
148
|
msg= f"Error processing file from {img_url}: {str(err)}"
|
|
120
149
|
log.error(msg)
|
|
121
150
|
return {"role": "user", "parts": [{"text": msg}]}
|
|
122
151
|
|
|
123
|
-
async def download_gcs_upload_genai(img_url, mime_type, retries=3, delay=2):
|
|
152
|
+
async def download_gcs_upload_genai(img_url, mime_type, name=None, retries=3, delay=2):
|
|
124
153
|
import aiofiles
|
|
154
|
+
from google.generativeai.types import file_types
|
|
125
155
|
"""
|
|
126
156
|
Downloads and uploads a file with retries in case of failure.
|
|
127
157
|
|
|
128
158
|
Args:
|
|
129
159
|
- img_url: str The URL of the file to download.
|
|
130
160
|
- mime_type: str The MIME type of the file.
|
|
161
|
+
- name: str Optional name, else a random one will be created
|
|
131
162
|
- retries: int Number of retry attempts before failing.
|
|
132
163
|
- delay: int Initial delay between retries, exponentially increasing.
|
|
133
164
|
|
|
@@ -159,7 +190,7 @@ async def download_gcs_upload_genai(img_url, mime_type, retries=3, delay=2):
|
|
|
159
190
|
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
|
|
160
191
|
downloaded_file = temp_file.name
|
|
161
192
|
|
|
162
|
-
sanitized_file =
|
|
193
|
+
sanitized_file = sanitize_file(downloaded_file)
|
|
163
194
|
|
|
164
195
|
log.info(f"Writing file {sanitized_file}")
|
|
165
196
|
async with aiofiles.open(sanitized_file, 'wb') as f:
|
|
@@ -167,8 +198,13 @@ async def download_gcs_upload_genai(img_url, mime_type, retries=3, delay=2):
|
|
|
167
198
|
|
|
168
199
|
# Upload the file and get its content reference
|
|
169
200
|
try:
|
|
170
|
-
downloaded_content: file_types.File = await asyncio.to_thread(
|
|
171
|
-
|
|
201
|
+
downloaded_content: file_types.File = await asyncio.to_thread(
|
|
202
|
+
partial(genai.upload_file, name=name, mime_type=mime_type),
|
|
203
|
+
sanitized_file
|
|
204
|
+
)
|
|
205
|
+
return {"role": "user", "parts": [{"file_data": downloaded_content},
|
|
206
|
+
{"text": f"You have been given the ability to read and work with filename '{name}' with {mime_type=}."}
|
|
207
|
+
]}
|
|
172
208
|
except Exception as err:
|
|
173
209
|
msg = f"Could not upload {sanitized_file} to genai.upload_file: {str(err)} {traceback.format_exc()}"
|
|
174
210
|
log.error(msg)
|
sunholo/senses/stream_voice.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.109.
|
|
3
|
+
Version: 0.109.4
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.109.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.109.4.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -21,8 +21,9 @@ License-File: LICENSE.txt
|
|
|
21
21
|
Requires-Dist: google-auth
|
|
22
22
|
Requires-Dist: ruamel.yaml
|
|
23
23
|
Requires-Dist: langchain ==0.2.16
|
|
24
|
-
Requires-Dist: langchain-experimental
|
|
25
|
-
Requires-Dist: langchain-community
|
|
24
|
+
Requires-Dist: langchain-experimental ==0.0.65
|
|
25
|
+
Requires-Dist: langchain-community ==0.2.17
|
|
26
|
+
Requires-Dist: langsmith ==0.1.140
|
|
26
27
|
Provides-Extra: all
|
|
27
28
|
Requires-Dist: anthropic[vertex] ; extra == 'all'
|
|
28
29
|
Requires-Dist: asyncpg ; extra == 'all'
|
|
@@ -50,7 +51,7 @@ Requires-Dist: httpcore ; extra == 'all'
|
|
|
50
51
|
Requires-Dist: httpx ; extra == 'all'
|
|
51
52
|
Requires-Dist: jsonschema ; extra == 'all'
|
|
52
53
|
Requires-Dist: lancedb ; extra == 'all'
|
|
53
|
-
Requires-Dist: langchain >=0.2.
|
|
54
|
+
Requires-Dist: langchain >=0.2.16 ; extra == 'all'
|
|
54
55
|
Requires-Dist: langchain-experimental >=0.0.61 ; extra == 'all'
|
|
55
56
|
Requires-Dist: langchain-community >=0.2.11 ; extra == 'all'
|
|
56
57
|
Requires-Dist: langchain-openai ==0.1.25 ; extra == 'all'
|
|
@@ -86,7 +86,7 @@ sunholo/gcs/download_url.py,sha256=Ul81n1rklr8WogPsuxWWD1Nr8RHU451LzHPMJNhAKzw,6
|
|
|
86
86
|
sunholo/gcs/extract_and_sign.py,sha256=paRrTCvCN5vkQwCB7OSkxWi-pfOgOtZ0bwdXE08c3Ps,1546
|
|
87
87
|
sunholo/gcs/metadata.py,sha256=oQLcXi4brsZ74aegWyC1JZmhlaEV270HS5_UWtAYYWE,898
|
|
88
88
|
sunholo/genai/__init__.py,sha256=6SWK7uV5F625J-P3xQoD6WKL59a9RSaidj-Guslyt8Q,192
|
|
89
|
-
sunholo/genai/file_handling.py,sha256=
|
|
89
|
+
sunholo/genai/file_handling.py,sha256=cGQeDvB93-3XgHRA020E7344yG7_EgX0KAsJLitQJBg,7552
|
|
90
90
|
sunholo/genai/images.py,sha256=EyjsDqt6XQw99pZUQamomCpMOoIah9bp3XY94WPU7Ms,1678
|
|
91
91
|
sunholo/genai/init.py,sha256=yG8E67TduFCTQPELo83OJuWfjwTnGZsyACospahyEaY,687
|
|
92
92
|
sunholo/genai/process_funcs_cls.py,sha256=7_RQMqIAZ3nPP-GFgCHBvS39fwuWuGtvSyuJaJN_G3E,31590
|
|
@@ -117,7 +117,7 @@ sunholo/qna/__init__.py,sha256=F8q1uR_HreoSX0IfmKY1qoSwIgXhO2Q8kuDSxh9_-EE,28
|
|
|
117
117
|
sunholo/qna/parsers.py,sha256=YpOaK5S_LxJ6FbliSYDc3AVOJ62RVduayoNnzi_p8CM,2494
|
|
118
118
|
sunholo/qna/retry.py,sha256=yMw7RTkw-RXCzfENPJOt8c32mXlpvOR589EGkvK-6yI,2028
|
|
119
119
|
sunholo/senses/__init__.py,sha256=fbWqVwwzkV5uRSb8lQzo4pn0ja_VYVWbUYapurSowBs,39
|
|
120
|
-
sunholo/senses/stream_voice.py,sha256=
|
|
120
|
+
sunholo/senses/stream_voice.py,sha256=VpCmooEKghBT1jPJe9mX7gKggGqY6qt-bpO7hwY4sPE,18122
|
|
121
121
|
sunholo/streaming/__init__.py,sha256=MpbydI2UYo_adttPQFkxNM33b-QRyNEbrKJx0C2AGPc,241
|
|
122
122
|
sunholo/streaming/content_buffer.py,sha256=0LHMwH4ctq5kjhIgMFNH0bA1RL0jMISlLVzzLcFrvv4,12766
|
|
123
123
|
sunholo/streaming/langserve.py,sha256=hi7q8WY8DPKrALl9m_dOMxWOdE-iEuk7YW05SVDFIX8,6514
|
|
@@ -150,9 +150,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
150
150
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
151
151
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
152
152
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
153
|
-
sunholo-0.109.
|
|
154
|
-
sunholo-0.109.
|
|
155
|
-
sunholo-0.109.
|
|
156
|
-
sunholo-0.109.
|
|
157
|
-
sunholo-0.109.
|
|
158
|
-
sunholo-0.109.
|
|
153
|
+
sunholo-0.109.4.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
154
|
+
sunholo-0.109.4.dist-info/METADATA,sha256=_nE3mKEViVRioU2NglcZCIXJ0PMqdsM7lYi__xkj8ao,8705
|
|
155
|
+
sunholo-0.109.4.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
|
|
156
|
+
sunholo-0.109.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
157
|
+
sunholo-0.109.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
158
|
+
sunholo-0.109.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|